diff -Nurb linux-2.6.22-570/.config.orig linux-2.6.22-591/.config.orig --- linux-2.6.22-570/.config.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/.config.orig 2007-12-21 15:54:46.000000000 -0500 @@ -0,0 +1,1693 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.22-prep +# Fri Dec 21 15:54:46 2007 +# +CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_OOM_PANIC=y +# CONFIG_CONTAINER_DEBUG is not set +# CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +# CONFIG_CONTAINER_CPUACCT is not set +# CONFIG_CONTAINER_NS is not set +# CONFIG_RELAY is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_PROC_SMAPS=y +CONFIG_PROC_CLEAR_REFS=y +CONFIG_PROC_PAGEMAP=y +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_PAGE_GROUP_BY_MOBILITY=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set +CONFIG_STOP_MACHINE=y + +# +# Block layer +# +CONFIG_BLOCK=y +CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Processor type and features +# +# CONFIG_TICK_ONESHOT is not set +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_SMP=y +# CONFIG_X86_PC is not set +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +CONFIG_X86_GENERICARCH=y +# CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set +CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_MODEL=4 +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_NR_CPUS=32 +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_MCE=y +CONFIG_X86_MCE_NONFATAL=y +CONFIG_X86_MCE_P4THERMAL=y +CONFIG_VM86=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_X86_REBOOTFIXUPS is not set +CONFIG_MICROCODE=y +CONFIG_MICROCODE_OLD_INTERFACE=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_HIGHMEM=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_RESOURCES_64BIT=y +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_NR_QUICK=1 +# CONFIG_HIGHPTE is not set +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +# CONFIG_EFI is not set +# CONFIG_IRQBALANCE is not set +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_KEXEC=y +# CONFIG_CRASH_DUMP is not set +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 +# CONFIG_HOTPLUG_CPU is not set +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +CONFIG_PM_LEGACY=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SYSFS_DEPRECATED=y + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_FAN=y +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_THERMAL=y +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set +CONFIG_ACPI_BLACKLIST_YEAR=2001 +CONFIG_ACPI_DEBUG=y +# CONFIG_ACPI_DEBUG_FUNC_TRACE is not set +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y +# CONFIG_ACPI_CONTAINER is not set +# CONFIG_ACPI_SBS is not set +# CONFIG_APM is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_DEBUG=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K6 is not set +# CONFIG_X86_POWERNOW_K7 is not set +CONFIG_X86_POWERNOW_K8=y +CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_GX_SUSPMOD is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_SPEEDSTEP_ICH is not set +# CONFIG_X86_SPEEDSTEP_SMI is not set +# CONFIG_X86_P4_CLOCKMOD is not set +# CONFIG_X86_CPUFREQ_NFORCE2 is not set +# CONFIG_X86_LONGRUN is not set +# CONFIG_X86_LONGHAUL is not set +# CONFIG_X86_E_POWERSAVER is not set + +# +# shared options +# +CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_SPEEDSTEP_LIB is not set + +# +# CPU idle PM support +# +# CONFIG_CPU_IDLE is not set + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_HT_IRQ is not set +CONFIG_ISA_DMA_API=y +# CONFIG_ISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_NET_NS is not set +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IP_VS is not set +# CONFIG_ICMP_IPOD is not set +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +# CONFIG_INET6_XFRM_MODE_BEET is not set +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_NETWORK_SECMARK is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# +# CONFIG_NETFILTER_NETLINK is not set +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +# CONFIG_NF_CT_ACCT is not set +# CONFIG_NF_CONNTRACK_MARK is not set +# CONFIG_NF_CONNTRACK_EVENTS is not set +# CONFIG_NF_CT_PROTO_SCTP is not set +# CONFIG_NF_CONNTRACK_AMANDA is not set +# CONFIG_NF_CONNTRACK_FTP is not set +# CONFIG_NF_CONNTRACK_H323 is not set +# CONFIG_NF_CONNTRACK_IRC is not set +# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set +# CONFIG_NF_CONNTRACK_PPTP is not set +# CONFIG_NF_CONNTRACK_SANE is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_NF_CONNTRACK_TFTP is not set +CONFIG_NETFILTER_XTABLES=m +# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_MARK is not set +# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set +# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set +# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +# CONFIG_NETFILTER_XT_TARGET_SETXID is not set +# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set +# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set +# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set +# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set +# CONFIG_NETFILTER_XT_MATCH_DCCP is not set +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +# CONFIG_NETFILTER_XT_MATCH_HELPER is not set +# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set +# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set +# CONFIG_NETFILTER_XT_MATCH_MAC is not set +# CONFIG_NETFILTER_XT_MATCH_MARK is not set +# CONFIG_NETFILTER_XT_MATCH_POLICY is not set +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set +# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +# CONFIG_NETFILTER_XT_MATCH_REALM is not set +# CONFIG_NETFILTER_XT_MATCH_SCTP is not set +# CONFIG_NETFILTER_XT_MATCH_STATE is not set +# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set +# CONFIG_NETFILTER_XT_MATCH_STRING is not set +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +# CONFIG_IP_NF_MATCH_IPRANGE is not set +# CONFIG_IP_NF_MATCH_TOS is not set +# CONFIG_IP_NF_MATCH_RECENT is not set +# CONFIG_IP_NF_MATCH_ECN is not set +# CONFIG_IP_NF_MATCH_AH is not set +# CONFIG_IP_NF_MATCH_TTL is not set +# CONFIG_IP_NF_MATCH_OWNER is not set +# CONFIG_IP_NF_MATCH_ADDRTYPE is not set +CONFIG_IP_NF_FILTER=m +# CONFIG_IP_NF_TARGET_REJECT is not set +# CONFIG_IP_NF_TARGET_LOG is not set +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +# CONFIG_IP_NF_TARGET_MASQUERADE is not set +# CONFIG_IP_NF_TARGET_REDIRECT is not set +# CONFIG_IP_NF_TARGET_NETMAP is not set +# CONFIG_IP_NF_TARGET_SAME is not set +# CONFIG_NF_NAT_SNMP_BASIC is not set +# CONFIG_NF_NAT_FTP is not set +# CONFIG_NF_NAT_IRC is not set +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=m +# CONFIG_IP_NF_TARGET_TOS is not set +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +# CONFIG_IP_NF_RAW is not set +# CONFIG_IP_NF_ARPTABLES is not set +# CONFIG_IP_NF_SET is not set + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +# CONFIG_NF_CONNTRACK_IPV6 is not set +# CONFIG_IP6_NF_QUEUE is not set +# CONFIG_IP6_NF_IPTABLES is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +CONFIG_NET_PKTGEN=m +# CONFIG_NET_TCPPROBE is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +CONFIG_CONNECTOR=m +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +CONFIG_PNP=y +# CONFIG_PNP_DEBUG is not set + +# +# Protocols +# +CONFIG_PNPACPI=y + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# Misc devices +# +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_BLK_DEV_IDEACPI is not set +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_IDEPNP is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_BLK_DEV_ATIIXP is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_CS5535 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_IT8213 is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_BLK_DEV_TC86C001 is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +# CONFIG_SCSI_TGT is not set +CONFIG_SCSI_NETLINK=y +# CONFIG_SCSI_PROC_FS is not set + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set + +# +# SCSI low-level drivers +# +# CONFIG_ISCSI_TCP is not set +CONFIG_BLK_DEV_3W_XXXX_RAID=y +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +CONFIG_SCSI_AIC7XXX=y +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +CONFIG_AIC7XXX_DEBUG_ENABLE=y +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +# CONFIG_SCSI_AIC7XXX_OLD is not set +CONFIG_SCSI_AIC79XX=y +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=4000 +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_ATA_ACPI=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_SVW=y +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +CONFIG_SATA_NV=y +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +CONFIG_SATA_SIL=y +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +CONFIG_SATA_VIA=y +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +# CONFIG_DM_CRYPT is not set +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set +# CONFIG_DM_NETLINK is not set + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set +CONFIG_FUSION_MAX_SGE=128 +# CONFIG_FUSION_CTL is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +CONFIG_IEEE1394=y + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set + +# +# Controllers +# + +# +# Texas Instruments PCILynx requires I2C +# +CONFIG_IEEE1394_OHCI1394=y + +# +# Protocols +# +# CONFIG_IEEE1394_VIDEO1394 is not set +# CONFIG_IEEE1394_SBP2 is not set +# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set +# CONFIG_IEEE1394_ETH1394 is not set +# CONFIG_IEEE1394_DV1394 is not set +CONFIG_IEEE1394_RAWIO=y + +# +# I2O device support +# +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_ETUN is not set +# CONFIG_NET_SB1000 is not set +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=y +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +CONFIG_B44=y +CONFIG_FORCEDETH=y +# CONFIG_FORCEDETH_NAPI is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +CONFIG_8139CP=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_E1000E is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +CONFIG_R8169=y +# CONFIG_R8169_NAPI is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +CONFIG_SKY2=y +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y +CONFIG_BNX2=y +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +CONFIG_NETDEV_10000=y +# CONFIG_CHELSIO_T1 is not set +# CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_MLX4_CORE is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set +# CONFIG_USB_USBNET is not set +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +# CONFIG_PPP_ASYNC is not set +# CONFIG_PPP_SYNC_TTY is not set +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set +# CONFIG_PPPOE is not set +# CONFIG_SLIP is not set +CONFIG_SLHC=m +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_PNP=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set +# CONFIG_WATCHDOG is not set +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_AMD=y +CONFIG_HW_RANDOM_GEODE=y +CONFIG_HW_RANDOM_VIA=y +# CONFIG_NVRAM is not set +CONFIG_RTC=y +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set +# CONFIG_CS5535_GPIO is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +CONFIG_HANGCHECK_TIMER=y + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set +# CONFIG_HWMON is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +CONFIG_DAB=y +# CONFIG_USB_DABUSB is not set + +# +# Graphics support +# +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=m +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VIDEO_SELECT=y +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=y + +# +# Advanced Linux Sound Architecture +# +# CONFIG_SND is not set + +# +# Open Sound System +# +CONFIG_SOUND_PRIME=y +# CONFIG_OSS_OBSOLETE is not set +# CONFIG_SOUND_TRIDENT is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +# CONFIG_SOUND_OSS is not set + +# +# HID Devices +# +CONFIG_HID=y +# CONFIG_HID_DEBUG is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set +# CONFIG_USB_ISP116X_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set +# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +CONFIG_USB_PRINTER=y + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_BERRY_CHARGE is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGET is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set + +# +# USB DSL modem support +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# +# CONFIG_EDAC is not set + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + +# +# Virtualization +# +# CONFIG_KVM is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=m +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +CONFIG_NTFS_RW=y + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set + +# +# Layered filesystems +# +# CONFIG_UNION_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_CODEPAGE_852=y +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=y +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# +# Instrumentation Support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_UNUSED_SYMBOLS=y +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_UNWIND_INFO is not set +# CONFIG_FORCED_INLINING is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_LKDTM is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_WANT_EXTRA_DEBUG_INFORMATION is not set +# CONFIG_KGDB is not set +CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_RODATA is not set +# CONFIG_4KSTACKS is not set +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y + +# +# Linux VServer +# +CONFIG_VSERVER_FILESHARING=y +CONFIG_VSERVER_AUTO_LBACK=y +CONFIG_VSERVER_AUTO_SINGLE=y +CONFIG_VSERVER_COWBL=y +# CONFIG_VSERVER_VTIME is not set +# CONFIG_VSERVER_DEVICE is not set +CONFIG_VSERVER_PROC_SECURE=y +CONFIG_VSERVER_HARDCPU=y +CONFIG_VSERVER_IDLETIME=y +# CONFIG_VSERVER_IDLELIMIT is not set +# CONFIG_TAGGING_NONE is not set +# CONFIG_TAGGING_UID16 is not set +# CONFIG_TAGGING_GID16 is not set +CONFIG_TAGGING_ID24=y +# CONFIG_TAGGING_INTERN is not set +# CONFIG_TAG_NFSD is not set +# CONFIG_VSERVER_PRIVACY is not set +CONFIG_VSERVER_CONTEXTS=256 +CONFIG_VSERVER_WARN=y +# CONFIG_VSERVER_DEBUG is not set +CONFIG_VSERVER=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=y +CONFIG_CRC16=y +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +CONFIG_LIBCRC32C=y +CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y diff -Nurb linux-2.6.22-570/Documentation/DocBook/Makefile linux-2.6.22-591/Documentation/DocBook/Makefile --- linux-2.6.22-570/Documentation/DocBook/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/DocBook/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -11,7 +11,7 @@ procfs-guide.xml writing_usb_driver.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ - genericirq.xml + genericirq.xml kgdb.xml ### # The build process is as follows (targets): diff -Nurb linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl linux-2.6.22-591/Documentation/DocBook/kgdb.tmpl --- linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/DocBook/kgdb.tmpl 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,250 @@ + + + + + + KGDB Internals + + + + Tom + Rini + +
+ trini@kernel.crashing.org +
+
+
+
+ + + + Amit S. + Kale + +
+ amitkale@linsyssoft.com +
+
+
+
+ + + 2004-2005 + MontaVista Software, Inc. + + + 2004 + Amit S. Kale + + + + + This file is licensed under the terms of the GNU General Public License + version 2. This program is licensed "as is" without any warranty of any + kind, whether express or implied. + + + +
+ + + + Introduction + + kgdb is a source level debugger for linux kernel. It is used along + with gdb to debug a linux kernel. Kernel developers can debug a kernel + similar to application programs with the use of kgdb. It makes it + possible to place breakpoints in kernel code, step through the code + and observe variables. + + + Two machines are required for using kgdb. One of these machines is a + development machine and the other is a test machine. The machines are + typically connected through a serial line, a null-modem cable which + connects their serial ports. It is also possible however, to use an + ethernet connection between the machines. The kernel to be debugged + runs on the test machine. gdb runs on the development machine. The + serial line or ethernet connection is used by gdb to communicate to + the kernel being debugged. + + + + Compiling a kernel + + To enable CONFIG_KGDB, look under the "Kernel debugging" + and then select "KGDB: kernel debugging with remote gdb". + + + The first choice for I/O is CONFIG_KGDB_ONLY_MODULES. + This means that you will only be able to use KGDB after loading a + kernel module that defines how you want to be able to talk with + KGDB. There are two other choices (more on some architectures) that + can be enabled as modules later, if not picked here. + + The first of these is CONFIG_KGDB_8250_NOMODULE. + This has sub-options such as CONFIG_KGDB_SIMPLE_SERIAL + which toggles choosing the serial port by ttyS number or by specifying + a port and IRQ number. + + + The second of these choices on most systems for I/O is + CONFIG_KGDBOE. This requires that the machine to be + debugged has an ethernet card which supports the netpoll API, such as + the cards supported by CONFIG_E100. There are no + sub-options for this, but a kernel command line option is required. + + + + Booting the kernel + + The Kernel command line option kgdbwait makes kgdb + wait for gdb connection during booting of a kernel. If the + CONFIG_KGDB_8250 driver is used (or if applicable, + another serial driver) this breakpoint will happen very early on, before + console output. If you wish to change serial port information and you + have enabled both CONFIG_KGDB_8250 and + CONFIG_KGDB_SIMPLE_SERIAL then you must pass the option + kgdb8250=<io or mmio>,<address>,<baud + rate>,<irq> before kgdbwait. + The values io or mmio refer to + if the address being passed next needs to be memory mapped + (mmio) or not. The address must + be passed in hex and is the hardware address and will be remapped if + passed as mmio. The value + baud rate and irq are base-10. + The supported values for baud rate are + 9600, 19200, + 38400, 57600, and + 115200. + + + To have KGDB stop the kernel and wait, with the compiled values for the + serial driver, pass in: kgdbwait. + + + To specify the values of the SH SCI(F) serial port at boot: + kgdbsci=0,115200. + + + To specify the values of the serial port at boot: + kgdb8250=io,3f8,115200,3. + On IA64 this could also be: + kgdb8250=mmio,0xff5e0000,115200,74 + And to have KGDB also stop the kernel and wait for GDB to connect, pass in + kgdbwait after this arguement. + + + To configure the CONFIG_KGDBOE driver, pass in + kgdboe=[src-port]@<src-ip>/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr] + where: + + src-port (optional): source for UDP packets (defaults to 6443) + src-ip: source IP to use (interface address) + dev (optional): network interface (eth0) + tgt-port (optional): port GDB will use (defaults to 6442) + tgt-ip: IP address GDB will be connecting from + tgt-macaddr (optional): ethernet MAC address for logging agent (default is broadcast) + + + + The CONFIG_KGDBOE driver can be reconfigured at run + time, if CONFIG_SYSFS and + CONFIG_MODULES by echo'ing a new config string to + /sys/module/kgdboe/parameter/kgdboe. The + driver can be unconfigured with the special string + not_configured. + + + + Connecting gdb + + If you have used any of the methods to have KGDB stop and create + an initial breakpoint described in the previous chapter, kgdb prints + the message "Waiting for connection from remote gdb..." on the console + and waits for connection from gdb. At this point you connect gdb to kgdb. + + + Example (serial): + + + % gdb ./vmlinux + (gdb) set remotebaud 115200 + (gdb) target remote /dev/ttyS0 + + + Example (ethernet): + + + % gdb ./vmlinux + (gdb) target remote udp:192.168.2.2:6443 + + + Once connected, you can debug a kernel the way you would debug an + application program. + + + + Architecture specific notes + + SuperH: The NMI switch found on some boards can be used to trigger an + initial breakpoint. Subsequent triggers do nothing. If console + is enabled on the SCI(F) serial port, and that is the port being used + for KGDB, then you must trigger a breakpoint via sysrq, NMI, or + some other method prior to connecting, or echo a control-c to the + serial port. Also, to use the SCI(F) port for KGDB, the + CONFIG_SERIAL_SH_SCI driver must be enabled. + + + + The common backend (required) + + There are a few flags which must be set on every architecture in + their <asm/kgdb.h> file. These are: + + + + NUMREGBYTES: The size in bytes of all of the registers, so + that we can ensure they will all fit into a packet. + + + BUFMAX: The size in bytes of the buffer GDB will read into. + This must be larger than NUMREGBYTES. + + + CACHE_FLUSH_IS_SAFE: Set to one if it always safe to call + flush_cache_range or flush_icache_range. On some architectures, + these functions may not be safe to call on SMP since we keep other + CPUs in a holding pattern. + + + + + + There are also the following functions for the common backend, + found in kernel/kgdb.c that must be supplied by the + architecture-specific backend. No weak version of these is provided. + +!Iinclude/linux/kgdb.h + + + The common backend (optional) + + These functions are part of the common backend, found in kernel/kgdb.c + and are optionally implemented. Some functions (with _hw_ in the name) + end up being required on arches which use hardware breakpoints. + +!Ikernel/kgdb.c + + + Driver-Specific Functions + + Some of the I/O drivers have additional functions that can be + called, that are specific to the driver. Calls from other places + to these functions must be wrapped in #ifdefs for the driver in + question. + +!Idrivers/serial/8250_kgdb.c + +
diff -Nurb linux-2.6.22-570/Documentation/accounting/getdelays.c linux-2.6.22-591/Documentation/accounting/getdelays.c --- linux-2.6.22-570/Documentation/accounting/getdelays.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/accounting/getdelays.c 2007-12-21 15:36:11.000000000 -0500 @@ -49,6 +49,7 @@ int dbg; int print_delays; int print_io_accounting; +int print_task_context_switch_counts; __u64 stime, utime; #define PRINTF(fmt, arg...) { \ @@ -195,7 +196,7 @@ "IO %15s%15s\n" " %15llu%15llu\n" "MEM %15s%15s\n" - " %15llu%15llu\n\n", + " %15llu%15llu\n" "count", "real total", "virtual total", "delay total", t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, t->cpu_delay_total, @@ -204,6 +205,14 @@ "count", "delay total", t->swapin_count, t->swapin_delay_total); } +void task_context_switch_counts(struct taskstats *t) +{ + printf("\n\nTask %15s%15s\n" + " %15lu%15lu\n", + "voluntary", "nonvoluntary", + t->nvcsw, t->nivcsw); +} + void print_ioacct(struct taskstats *t) { printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n", @@ -235,7 +244,7 @@ struct msgtemplate msg; while (1) { - c = getopt(argc, argv, "diw:r:m:t:p:vl"); + c = getopt(argc, argv, "qdiw:r:m:t:p:vl"); if (c < 0) break; @@ -248,6 +257,10 @@ printf("printing IO accounting\n"); print_io_accounting = 1; break; + case 'q': + printf("printing task/process context switch rates\n"); + print_task_context_switch_counts = 1; + break; case 'w': logfile = strdup(optarg); printf("write to file %s\n", logfile); @@ -389,6 +402,8 @@ print_delayacct((struct taskstats *) NLA_DATA(na)); if (print_io_accounting) print_ioacct((struct taskstats *) NLA_DATA(na)); + if (print_task_context_switch_counts) + task_context_switch_counts((struct taskstats *) NLA_DATA(na)); if (fd) { if (write(fd, NLA_DATA(na), na->nla_len) < 0) { err(1,"write error\n"); diff -Nurb linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt linux-2.6.22-591/Documentation/accounting/taskstats-struct.txt --- linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/accounting/taskstats-struct.txt 2007-12-21 15:36:11.000000000 -0500 @@ -22,6 +22,8 @@ /* Extended accounting fields end */ Their values are collected if CONFIG_TASK_XACCT is set. +4) Per-task and per-thread context switch count statistics + Future extension should add fields to the end of the taskstats struct, and should not change the relative position of each field within the struct. @@ -158,4 +160,8 @@ /* Extended accounting fields end */ +4) Per-task and per-thread statistics + __u64 nvcsw; /* Context voluntary switch counter */ + __u64 nivcsw; /* Context involuntary switch counter */ + } diff -Nurb linux-2.6.22-570/Documentation/cachetlb.txt linux-2.6.22-591/Documentation/cachetlb.txt --- linux-2.6.22-570/Documentation/cachetlb.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/cachetlb.txt 2007-12-21 15:36:11.000000000 -0500 @@ -253,7 +253,7 @@ The first of these two routines is invoked after map_vm_area() has installed the page table entries. The second is invoked - before unmap_vm_area() deletes the page table entries. + before unmap_kernel_range() deletes the page table entries. There exists another whole class of cpu cache issues which currently require a whole different set of interfaces to handle properly. diff -Nurb linux-2.6.22-570/Documentation/containers.txt linux-2.6.22-591/Documentation/containers.txt --- linux-2.6.22-570/Documentation/containers.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/containers.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,543 @@ + CONTAINERS + ------- + +Written by Paul Menage based on Documentation/cpusets.txt + +Original copyright statements from cpusets.txt: +Portions Copyright (C) 2004 BULL SA. +Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. +Modified by Paul Jackson +Modified by Christoph Lameter + +CONTENTS: +========= + +1. Containers + 1.1 What are containers ? + 1.2 Why are containers needed ? + 1.3 How are containers implemented ? + 1.4 What does notify_on_release do ? + 1.5 How do I use containers ? +2. Usage Examples and Syntax + 2.1 Basic Usage + 2.2 Attaching processes +3. Kernel API + 3.1 Overview + 3.2 Synchronization + 3.3 Subsystem API +4. Questions + +1. Containers +========== + +1.1 What are containers ? +---------------------- + +Containers provide a mechanism for aggregating/partitioning sets of +tasks, and all their future children, into hierarchical groups with +specialized behaviour. + +Definitions: + +A *container* associates a set of tasks with a set of parameters for one +or more subsystems. + +A *subsystem* is a module that makes use of the task grouping +facilities provided by containers to treat groups of tasks in +particular ways. A subsystem is typically a "resource controller" that +schedules a resource or applies per-container limits, but it may be +anything that wants to act on a group of processes, e.g. a +virtualization subsystem. + +A *hierarchy* is a set of containers arranged in a tree, such that +every task in the system is in exactly one of the containers in the +hierarchy, and a set of subsystems; each subsystem has system-specific +state attached to each container in the hierarchy. Each hierarchy has +an instance of the container virtual filesystem associated with it. + +At any one time there may be multiple active hierachies of task +containers. Each hierarchy is a partition of all tasks in the system. + +User level code may create and destroy containers by name in an +instance of the container virtual file system, specify and query to +which container a task is assigned, and list the task pids assigned to +a container. Those creations and assignments only affect the hierarchy +associated with that instance of the container file system. + +On their own, the only use for containers is for simple job +tracking. The intention is that other subsystems hook into the generic +container support to provide new attributes for containers, such as +accounting/limiting the resources which processes in a container can +access. For example, cpusets (see Documentation/cpusets.txt) allows +you to associate a set of CPUs and a set of memory nodes with the +tasks in each container. + +1.2 Why are containers needed ? +---------------------------- + +There are multiple efforts to provide process aggregations in the +Linux kernel, mainly for resource tracking purposes. Such efforts +include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server +namespaces. These all require the basic notion of a +grouping/partitioning of processes, with newly forked processes ending +in the same group (container) as their parent process. + +The kernel container patch provides the minimum essential kernel +mechanisms required to efficiently implement such groups. It has +minimal impact on the system fast paths, and provides hooks for +specific subsystems such as cpusets to provide additional behaviour as +desired. + +Multiple hierarchy support is provided to allow for situations where +the division of tasks into containers is distinctly different for +different subsystems - having parallel hierarchies allows each +hierarchy to be a natural division of tasks, without having to handle +complex combinations of tasks that would be present if several +unrelated subsystems needed to be forced into the same tree of +containers. + +At one extreme, each resource controller or subsystem could be in a +separate hierarchy; at the other extreme, all subsystems +would be attached to the same hierarchy. + +As an example of a scenario (originally proposed by vatsa@in.ibm.com) +that can benefit from multiple hierarchies, consider a large +university server with various users - students, professors, system +tasks etc. The resource planning for this server could be along the +following lines: + + CPU : Top cpuset + / \ + CPUSet1 CPUSet2 + | | + (Profs) (Students) + + In addition (system tasks) are attached to topcpuset (so + that they can run anywhere) with a limit of 20% + + Memory : Professors (50%), students (30%), system (20%) + + Disk : Prof (50%), students (30%), system (20%) + + Network : WWW browsing (20%), Network File System (60%), others (20%) + / \ + Prof (15%) students (5%) + +Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go +into NFS network class. + +At the same time firefox/lynx will share an appropriate CPU/Memory class +depending on who launched it (prof/student). + +With the ability to classify tasks differently for different resources +(by putting those resource subsystems in different hierarchies) then +the admin can easily set up a script which receives exec notifications +and depending on who is launching the browser he can + + # echo browser_pid > /mnt///tasks + +With only a single hierarchy, he now would potentially have to create +a separate container for every browser launched and associate it with +approp network and other resource class. This may lead to +proliferation of such containers. + +Also lets say that the administrator would like to give enhanced network +access temporarily to a student's browser (since it is night and the user +wants to do online gaming :) OR give one of the students simulation +apps enhanced CPU power, + +With ability to write pids directly to resource classes, its just a +matter of : + + # echo pid > /mnt/network//tasks + (after some time) + # echo pid > /mnt/network//tasks + +Without this ability, he would have to split the container into +multiple separate ones and then associate the new containers with the +new resource classes. + + + +1.3 How are containers implemented ? +--------------------------------- + +Containers extends the kernel as follows: + + - Each task in the system has a reference-counted pointer to a + css_group. + + - A css_group contains a set of reference-counted pointers to + container_subsys_state objects, one for each container subsystem + registered in the system. There is no direct link from a task to + the container of which it's a member in each hierarchy, but this + can be determined by following pointers through the + container_subsys_state objects. This is because accessing the + subsystem state is something that's expected to happen frequently + and in performance-critical code, whereas operations that require a + task's actual container assignments (in particular, moving between + containers) are less common. A linked list runs through the cg_list + field of each task_struct using the css_group, anchored at + css_group->tasks. + + - A container hierarchy filesystem can be mounted for browsing and + manipulation from user space. + + - You can list all the tasks (by pid) attached to any container. + +The implementation of containers requires a few, simple hooks +into the rest of the kernel, none in performance critical paths: + + - in init/main.c, to initialize the root containers and initial + css_group at system boot. + + - in fork and exit, to attach and detach a task from its css_group. + +In addition a new file system, of type "container" may be mounted, to +enable browsing and modifying the containers presently known to the +kernel. When mounting a container hierarchy, you may specify a +comma-separated list of subsystems to mount as the filesystem mount +options. By default, mounting the container filesystem attempts to +mount a hierarchy containing all registered subsystems. + +If an active hierarchy with exactly the same set of subsystems already +exists, it will be reused for the new mount. If no existing hierarchy +matches, and any of the requested subsystems are in use in an existing +hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy +is activated, associated with the requested subsystems. + +It's not currently possible to bind a new subsystem to an active +container hierarchy, or to unbind a subsystem from an active container +hierarchy. This may be possible in future, but is fraught with nasty +error-recovery issues. + +When a container filesystem is unmounted, if there are any +subcontainers created below the top-level container, that hierarchy +will remain active even though unmounted; if there are no +subcontainers then the hierarchy will be deactivated. + +No new system calls are added for containers - all support for +querying and modifying containers is via this container file system. + +Each task under /proc has an added file named 'container' displaying, +for each active hierarchy, the subsystem names and the container name +as the path relative to the root of the container file system. + +Each container is represented by a directory in the container file system +containing the following files describing that container: + + - tasks: list of tasks (by pid) attached to that container + - notify_on_release flag: run /sbin/container_release_agent on exit? + +Other subsystems such as cpusets may add additional files in each +container dir + +New containers are created using the mkdir system call or shell +command. The properties of a container, such as its flags, are +modified by writing to the appropriate file in that containers +directory, as listed above. + +The named hierarchical structure of nested containers allows partitioning +a large system into nested, dynamically changeable, "soft-partitions". + +The attachment of each task, automatically inherited at fork by any +children of that task, to a container allows organizing the work load +on a system into related sets of tasks. A task may be re-attached to +any other container, if allowed by the permissions on the necessary +container file system directories. + +When a task is moved from one container to another, it gets a new +css_group pointer - if there's an already existing css_group with the +desired collection of containers then that group is reused, else a new +css_group is allocated. Note that the current implementation uses a +linear search to locate an appropriate existing css_group, so isn't +very efficient. A future version will use a hash table for better +performance. + +To allow access from a container to the css_groups (and hence tasks) +that comprise it, a set of cg_container_link objects form a lattice; +each cg_container_link is linked into a list of cg_container_links for +a single container on its cont_link_list field, and a list of +cg_container_links for a single css_group on its cg_link_list. + +Thus the set of tasks in a container can be listed by iterating over +each css_group that references the container, and sub-iterating over +each css_group's task set. + +The use of a Linux virtual file system (vfs) to represent the +container hierarchy provides for a familiar permission and name space +for containers, with a minimum of additional kernel code. + +1.4 What does notify_on_release do ? +------------------------------------ + +*** notify_on_release is disabled in the current patch set. It will be +*** reactivated in a future patch in a less-intrusive manner + +If the notify_on_release flag is enabled (1) in a container, then +whenever the last task in the container leaves (exits or attaches to +some other container) and the last child container of that container +is removed, then the kernel runs the command specified by the contents +of the "release_agent" file in that hierarchy's root directory, +supplying the pathname (relative to the mount point of the container +file system) of the abandoned container. This enables automatic +removal of abandoned containers. The default value of +notify_on_release in the root container at system boot is disabled +(0). The default value of other containers at creation is the current +value of their parents notify_on_release setting. The default value of +a container hierarchy's release_agent path is empty. + +1.5 How do I use containers ? +-------------------------- + +To start a new job that is to be contained within a container, using +the "cpuset" container subsystem, the steps are something like: + + 1) mkdir /dev/container + 2) mount -t container -ocpuset cpuset /dev/container + 3) Create the new container by doing mkdir's and write's (or echo's) in + the /dev/container virtual file system. + 4) Start a task that will be the "founding father" of the new job. + 5) Attach that task to the new container by writing its pid to the + /dev/container tasks file for that container. + 6) fork, exec or clone the job tasks from this founding father task. + +For example, the following sequence of commands will setup a container +named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, +and then start a subshell 'sh' in that container: + + mount -t container cpuset -ocpuset /dev/container + cd /dev/container + mkdir Charlie + cd Charlie + /bin/echo $$ > tasks + sh + # The subshell 'sh' is now running in container Charlie + # The next line should display '/Charlie' + cat /proc/self/container + +2. Usage Examples and Syntax +============================ + +2.1 Basic Usage +--------------- + +Creating, modifying, using the containers can be done through the container +virtual filesystem. + +To mount a container hierarchy will all available subsystems, type: +# mount -t container xxx /dev/container + +The "xxx" is not interpreted by the container code, but will appear in +/proc/mounts so may be any useful identifying string that you like. + +To mount a container hierarchy with just the cpuset and numtasks +subsystems, type: +# mount -t container -o cpuset,numtasks hier1 /dev/container + +To change the set of subsystems bound to a mounted hierarchy, just +remount with different options: + +# mount -o remount,cpuset,ns /dev/container + +Note that changing the set of subsystems is currently only supported +when the hierarchy consists of a single (root) container. Supporting +the ability to arbitrarily bind/unbind subsystems from an existing +container hierarchy is intended to be implemented in the future. + +Then under /dev/container you can find a tree that corresponds to the +tree of the containers in the system. For instance, /dev/container +is the container that holds the whole system. + +If you want to create a new container under /dev/container: +# cd /dev/container +# mkdir my_container + +Now you want to do something with this container. +# cd my_container + +In this directory you can find several files: +# ls +notify_on_release release_agent tasks +(plus whatever files are added by the attached subsystems) + +Now attach your shell to this container: +# /bin/echo $$ > tasks + +You can also create containers inside your container by using mkdir in this +directory. +# mkdir my_sub_cs + +To remove a container, just use rmdir: +# rmdir my_sub_cs + +This will fail if the container is in use (has containers inside, or +has processes attached, or is held alive by other subsystem-specific +reference). + +2.2 Attaching processes +----------------------- + +# /bin/echo PID > tasks + +Note that it is PID, not PIDs. You can only attach ONE task at a time. +If you have several tasks to attach, you have to do it one after another: + +# /bin/echo PID1 > tasks +# /bin/echo PID2 > tasks + ... +# /bin/echo PIDn > tasks + +3. Kernel API +============= + +3.1 Overview +------------ + +Each kernel subsystem that wants to hook into the generic container +system needs to create a container_subsys object. This contains +various methods, which are callbacks from the container system, along +with a subsystem id which will be assigned by the container system. + +Other fields in the container_subsys object include: + +- subsys_id: a unique array index for the subsystem, indicating which + entry in container->subsys[] this subsystem should be + managing. Initialized by container_register_subsys(); prior to this + it should be initialized to -1 + +- hierarchy: an index indicating which hierarchy, if any, this + subsystem is currently attached to. If this is -1, then the + subsystem is not attached to any hierarchy, and all tasks should be + considered to be members of the subsystem's top_container. It should + be initialized to -1. + +- name: should be initialized to a unique subsystem name prior to + calling container_register_subsystem. Should be no longer than + MAX_CONTAINER_TYPE_NAMELEN + +Each container object created by the system has an array of pointers, +indexed by subsystem id; this pointer is entirely managed by the +subsystem; the generic container code will never touch this pointer. + +3.2 Synchronization +------------------- + +There is a global mutex, container_mutex, used by the container +system. This should be taken by anything that wants to modify a +container. It may also be taken to prevent containers from being +modified, but more specific locks may be more appropriate in that +situation. + +See kernel/container.c for more details. + +Subsystems can take/release the container_mutex via the functions +container_lock()/container_unlock(), and can +take/release the callback_mutex via the functions +container_lock()/container_unlock(). + +Accessing a task's container pointer may be done in the following ways: +- while holding container_mutex +- while holding the task's alloc_lock (via task_lock()) +- inside an rcu_read_lock() section via rcu_dereference() + +3.3 Subsystem API +-------------------------- + +Each subsystem should: + +- add an entry in linux/container_subsys.h +- define a container_subsys object called _subsys + +Each subsystem may export the following methods. The only mandatory +methods are create/destroy. Any others that are null are presumed to +be successful no-ops. + +int create(struct container *cont) +LL=container_mutex + +Called to create a subsystem state object for a container. The +subsystem should set its subsystem pointer for the passed container, +returning 0 on success or a negative error code. On success, the +subsystem pointer should point to a structure of type +container_subsys_state (typically embedded in a larger +subsystem-specific object), which will be initialized by the container +system. Note that this will be called at initialization to create the +root subsystem state for this subsystem; this case can be identified +by the passed container object having a NULL parent (since it's the +root of the hierarchy) and may be an appropriate place for +initialization code. + +void destroy(struct container *cont) +LL=container_mutex + +The container system is about to destroy the passed container; the +subsystem should do any necessary cleanup + +int can_attach(struct container_subsys *ss, struct container *cont, + struct task_struct *task) +LL=container_mutex + +Called prior to moving a task into a container; if the subsystem +returns an error, this will abort the attach operation. If a NULL +task is passed, then a successful result indicates that *any* +unspecified task can be moved into the container. Note that this isn't +called on a fork. If this method returns 0 (success) then this should +remain valid while the caller holds container_mutex. + +void attach(struct container_subsys *ss, struct container *cont, + struct container *old_cont, struct task_struct *task) +LL=container_mutex + + +Called after the task has been attached to the container, to allow any +post-attachment activity that requires memory allocations or blocking. + +void fork(struct container_subsy *ss, struct task_struct *task) +LL=callback_mutex, maybe read_lock(tasklist_lock) + +Called when a task is forked into a container. Also called during +registration for all existing tasks. + +void exit(struct container_subsys *ss, struct task_struct *task) +LL=callback_mutex + +Called during task exit + +int populate(struct container_subsys *ss, struct container *cont) +LL=none + +Called after creation of a container to allow a subsystem to populate +the container directory with file entries. The subsystem should make +calls to container_add_file() with objects of type cftype (see +include/linux/container.h for details). Note that although this +method can return an error code, the error code is currently not +always handled well. + +void post_clone(struct container_subsys *ss, struct container *cont) + +Called at the end of container_clone() to do any paramater +initialization which might be required before a task could attach. For +example in cpusets, no task may attach before 'cpus' and 'mems' are set +up. + +void bind(struct container_subsys *ss, struct container *root) +LL=callback_mutex + +Called when a container subsystem is rebound to a different hierarchy +and root container. Currently this will only involve movement between +the default hierarchy (which never has sub-containers) and a hierarchy +that is being created/destroyed (and hence has no sub-containers). + +4. Questions +============ + +Q: what's up with this '/bin/echo' ? +A: bash's builtin 'echo' command does not check calls to write() against + errors. If you use it in the container file system, you won't be + able to tell whether a command succeeded or failed. + +Q: When I attach processes, only the first of the line gets really attached ! +A: We can only return one error code per call to write(). So you should also + put only ONE pid. + diff -Nurb linux-2.6.22-570/Documentation/cpuidle/core.txt linux-2.6.22-591/Documentation/cpuidle/core.txt --- linux-2.6.22-570/Documentation/cpuidle/core.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/cpuidle/core.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,17 @@ + + Supporting multiple CPU idle levels in kernel + + cpuidle + +General Information: + +Various CPUs today support multiple idle levels that are differentiated +by varying exit latencies and power consumption during idle. +cpuidle is a generic in-kernel infrastructure that separates +idle policy (governor) from idle mechanism (driver) and provides a +standardized infrastructure to support independent development of +governors and drivers. + +cpuidle resides under /drivers/cpuidle. + + diff -Nurb linux-2.6.22-570/Documentation/cpuidle/driver.txt linux-2.6.22-591/Documentation/cpuidle/driver.txt --- linux-2.6.22-570/Documentation/cpuidle/driver.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/cpuidle/driver.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,24 @@ + + + Supporting multiple CPU idle levels in kernel + + cpuidle drivers + + + + +cpuidle driver supports capability detection for a particular system. The +init and exit routines will be called for each online CPU, with a percpu +cpuidle_driver object and driver should fill in cpuidle_states inside +cpuidle_driver depending on the CPU capability. + +Driver can handle dynamic state changes (like battery<->AC), by calling +force_redetect interface. + +It is possible to have more than one driver registered at the same time and +user can switch between drivers using /sysfs interface. + +Interfaces: +int cpuidle_register_driver(struct cpuidle_driver *drv); +void cpuidle_unregister_driver(struct cpuidle_driver *drv); +int cpuidle_force_redetect(struct cpuidle_device *dev); diff -Nurb linux-2.6.22-570/Documentation/cpuidle/governor.txt linux-2.6.22-591/Documentation/cpuidle/governor.txt --- linux-2.6.22-570/Documentation/cpuidle/governor.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/cpuidle/governor.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,24 @@ + + + + Supporting multiple CPU idle levels in kernel + + cpuidle governors + + + + +cpuidle governor is policy routine that decides what idle state to enter at +any given time. cpuidle core uses different callbacks to governor while +handling idle entry. +* select_state callback where governor can determine next idle state to enter +* prepare_idle callback is called before entering an idle state +* scan callback is called after a driver forces redetection of the states + +More than one governor can be registered at the same time and +user can switch between drivers using /sysfs interface. + +Interfaces: +int cpuidle_register_governor(struct cpuidle_governor *gov); +void cpuidle_unregister_governor(struct cpuidle_governor *gov); + diff -Nurb linux-2.6.22-570/Documentation/cpuidle/sysfs.txt linux-2.6.22-591/Documentation/cpuidle/sysfs.txt --- linux-2.6.22-570/Documentation/cpuidle/sysfs.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/cpuidle/sysfs.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,27 @@ + + + Supporting multiple CPU idle levels in kernel + + cpuidle sysfs + +System global cpuidle information are under +/sys/devices/system/cpu/cpuidle + +The current interfaces in this directory has self-explanatory names: +* available_drivers +* available_governors +* current_driver +* current_governor + +Per logical CPU specific cpuidle information are under +/sys/devices/system/cpu/cpuX/cpuidle +for each online cpu X + +Under this percpu directory, there is a directory for each idle state supported +by the driver, which in turn has +* latency +* power +* time +* usage + + diff -Nurb linux-2.6.22-570/Documentation/cpusets.txt linux-2.6.22-591/Documentation/cpusets.txt --- linux-2.6.22-570/Documentation/cpusets.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/cpusets.txt 2007-12-21 15:36:11.000000000 -0500 @@ -7,6 +7,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. Modified by Paul Jackson Modified by Christoph Lameter +Modified by Paul Menage CONTENTS: ========= @@ -16,10 +17,9 @@ 1.2 Why are cpusets needed ? 1.3 How are cpusets implemented ? 1.4 What are exclusive cpusets ? - 1.5 What does notify_on_release do ? - 1.6 What is memory_pressure ? - 1.7 What is memory spread ? - 1.8 How do I use cpusets ? + 1.5 What is memory_pressure ? + 1.6 What is memory spread ? + 1.7 How do I use cpusets ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Adding/removing cpus @@ -43,18 +43,19 @@ hooks, beyond what is already present, required to manage dynamic job placement on large systems. -Each task has a pointer to a cpuset. Multiple tasks may reference -the same cpuset. Requests by a task, using the sched_setaffinity(2) -system call to include CPUs in its CPU affinity mask, and using the -mbind(2) and set_mempolicy(2) system calls to include Memory Nodes -in its memory policy, are both filtered through that tasks cpuset, -filtering out any CPUs or Memory Nodes not in that cpuset. The -scheduler will not schedule a task on a CPU that is not allowed in -its cpus_allowed vector, and the kernel page allocator will not -allocate a page on a node that is not allowed in the requesting tasks -mems_allowed vector. +Cpusets use the generic container subsystem described in +Documentation/container.txt. -User level code may create and destroy cpusets by name in the cpuset +Requests by a task, using the sched_setaffinity(2) system call to +include CPUs in its CPU affinity mask, and using the mbind(2) and +set_mempolicy(2) system calls to include Memory Nodes in its memory +policy, are both filtered through that tasks cpuset, filtering out any +CPUs or Memory Nodes not in that cpuset. The scheduler will not +schedule a task on a CPU that is not allowed in its cpus_allowed +vector, and the kernel page allocator will not allocate a page on a +node that is not allowed in the requesting tasks mems_allowed vector. + +User level code may create and destroy cpusets by name in the container virtual file system, manage the attributes and permissions of these cpusets and which CPUs and Memory Nodes are assigned to each cpuset, specify and query to which cpuset a task is assigned, and list the @@ -86,9 +87,6 @@ and a database), or * NUMA systems running large HPC applications with demanding performance characteristics. - * Also cpu_exclusive cpusets are useful for servers running orthogonal - workloads such as RT applications requiring low latency and HPC - applications that are throughput sensitive These subsets, or "soft partitions" must be able to be dynamically adjusted, as the job mix changes, without impacting other concurrently @@ -117,7 +115,7 @@ - Cpusets are sets of allowed CPUs and Memory Nodes, known to the kernel. - Each task in the system is attached to a cpuset, via a pointer - in the task structure to a reference counted cpuset structure. + in the task structure to a reference counted container structure. - Calls to sched_setaffinity are filtered to just those CPUs allowed in that tasks cpuset. - Calls to mbind and set_mempolicy are filtered to just @@ -131,8 +129,6 @@ - A cpuset may be marked exclusive, which ensures that no other cpuset (except direct ancestors and descendents) may contain any overlapping CPUs or Memory Nodes. - Also a cpu_exclusive cpuset would be associated with a sched - domain. - You can list all the tasks (by pid) attached to any cpuset. The implementation of cpusets requires a few, simple hooks @@ -144,23 +140,15 @@ allowed in that tasks cpuset. - in sched.c migrate_all_tasks(), to keep migrating tasks within the CPUs allowed by their cpuset, if possible. - - in sched.c, a new API partition_sched_domains for handling - sched domain changes associated with cpu_exclusive cpusets - and related changes in both sched.c and arch/ia64/kernel/domain.c - in the mbind and set_mempolicy system calls, to mask the requested Memory Nodes by what's allowed in that tasks cpuset. - in page_alloc.c, to restrict memory to allowed nodes. - in vmscan.c, to restrict page recovery to the current cpuset. -In addition a new file system, of type "cpuset" may be mounted, -typically at /dev/cpuset, to enable browsing and modifying the cpusets -presently known to the kernel. No new system calls are added for -cpusets - all support for querying and modifying cpusets is via -this cpuset file system. - -Each task under /proc has an added file named 'cpuset', displaying -the cpuset name, as the path relative to the root of the cpuset file -system. +You should mount the "container" filesystem type in order to enable +browsing and modifying the cpusets presently known to the kernel. No +new system calls are added for cpusets - all support for querying and +modifying cpusets is via this cpuset file system. The /proc//status file for each task has two added lines, displaying the tasks cpus_allowed (on which CPUs it may be scheduled) @@ -170,16 +158,15 @@ Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff Mems_allowed: ffffffff,ffffffff -Each cpuset is represented by a directory in the cpuset file system -containing the following files describing that cpuset: +Each cpuset is represented by a directory in the container file system +containing (on top of the standard container files) the following +files describing that cpuset: - cpus: list of CPUs in that cpuset - mems: list of Memory Nodes in that cpuset - memory_migrate flag: if set, move pages to cpusets nodes - cpu_exclusive flag: is cpu placement exclusive? - mem_exclusive flag: is memory placement exclusive? - - tasks: list of tasks (by pid) attached to that cpuset - - notify_on_release flag: run /sbin/cpuset_release_agent on exit? - memory_pressure: measure of how much paging pressure in cpuset In addition, the root cpuset only has the following file: @@ -231,15 +218,6 @@ a direct ancestor or descendent, may share any of the same CPUs or Memory Nodes. -A cpuset that is cpu_exclusive has a scheduler (sched) domain -associated with it. The sched domain consists of all CPUs in the -current cpuset that are not part of any exclusive child cpusets. -This ensures that the scheduler load balancing code only balances -against the CPUs that are in the sched domain as defined above and -not all of the CPUs in the system. This removes any overhead due to -load balancing code trying to pull tasks outside of the cpu_exclusive -cpuset only to be prevented by the tasks' cpus_allowed mask. - A cpuset that is mem_exclusive restricts kernel allocations for page, buffer and other data commonly shared by the kernel across multiple users. All cpusets, whether mem_exclusive or not, restrict @@ -253,21 +231,7 @@ outside even a mem_exclusive cpuset. -1.5 What does notify_on_release do ? ------------------------------------- - -If the notify_on_release flag is enabled (1) in a cpuset, then whenever -the last task in the cpuset leaves (exits or attaches to some other -cpuset) and the last child cpuset of that cpuset is removed, then -the kernel runs the command /sbin/cpuset_release_agent, supplying the -pathname (relative to the mount point of the cpuset file system) of the -abandoned cpuset. This enables automatic removal of abandoned cpusets. -The default value of notify_on_release in the root cpuset at system -boot is disabled (0). The default value of other cpusets at creation -is the current value of their parents notify_on_release setting. - - -1.6 What is memory_pressure ? +1.5 What is memory_pressure ? ----------------------------- The memory_pressure of a cpuset provides a simple per-cpuset metric of the rate that the tasks in a cpuset are attempting to free up in @@ -324,7 +288,7 @@ times 1000. -1.7 What is memory spread ? +1.6 What is memory spread ? --------------------------- There are two boolean flag files per cpuset that control where the kernel allocates pages for the file system buffers and related in @@ -395,7 +359,7 @@ can become very uneven. -1.8 How do I use cpusets ? +1.7 How do I use cpusets ? -------------------------- In order to minimize the impact of cpusets on critical kernel @@ -485,7 +449,7 @@ To start a new job that is to be contained within a cpuset, the steps are: 1) mkdir /dev/cpuset - 2) mount -t cpuset none /dev/cpuset + 2) mount -t container -ocpuset cpuset /dev/cpuset 3) Create the new cpuset by doing mkdir's and write's (or echo's) in the /dev/cpuset virtual file system. 4) Start a task that will be the "founding father" of the new job. @@ -497,7 +461,7 @@ named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, and then start a subshell 'sh' in that cpuset: - mount -t cpuset none /dev/cpuset + mount -t container -ocpuset cpuset /dev/cpuset cd /dev/cpuset mkdir Charlie cd Charlie @@ -529,7 +493,7 @@ virtual filesystem. To mount it, type: -# mount -t cpuset none /dev/cpuset +# mount -t container -o cpuset cpuset /dev/cpuset Then under /dev/cpuset you can find a tree that corresponds to the tree of the cpusets in the system. For instance, /dev/cpuset @@ -572,6 +536,18 @@ This will fail if the cpuset is in use (has cpusets inside, or has processes attached). +Note that for legacy reasons, the "cpuset" filesystem exists as a +wrapper around the container filesystem. + +The command + +mount -t cpuset X /dev/cpuset + +is equivalent to + +mount -t container -ocpuset X /dev/cpuset +echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent + 2.2 Adding/removing cpus ------------------------ diff -Nurb linux-2.6.22-570/Documentation/feature-removal-schedule.txt linux-2.6.22-591/Documentation/feature-removal-schedule.txt --- linux-2.6.22-570/Documentation/feature-removal-schedule.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/feature-removal-schedule.txt 2007-12-21 15:36:11.000000000 -0500 @@ -162,6 +162,33 @@ --------------------------- +What: filemap_nopage, filemap_populate +When: April 2007 +Why: These legacy interfaces no longer have any callers in the kernel and + any functionality provided can be provided with filemap_fault. The + removal schedule is short because they are a big maintainence burden + and have some bugs. +Who: Nick Piggin + +--------------------------- + +What: vm_ops.populate, install_page +When: April 2007 +Why: These legacy interfaces no longer have any callers in the kernel and + any functionality provided can be provided with vm_ops.fault. +Who: Nick Piggin + +--------------------------- + +What: vm_ops.nopage +When: February 2008, provided in-kernel callers have been converted +Why: This interface is replaced by vm_ops.fault, but it has been around + forever, is used by a lot of drivers, and doesn't cost much to + maintain. +Who: Nick Piggin + +--------------------------- + What: Interrupt only SA_* flags When: September 2007 Why: The interrupt related SA_* flags are replaced by IRQF_* to move them @@ -280,25 +307,6 @@ --------------------------- -What: Multipath cached routing support in ipv4 -When: in 2.6.23 -Why: Code was merged, then submitter immediately disappeared leaving - us with no maintainer and lots of bugs. The code should not have - been merged in the first place, and many aspects of it's - implementation are blocking more critical core networking - development. It's marked EXPERIMENTAL and no distribution - enables it because it cause obscure crashes due to unfixable bugs - (interfaces don't return errors so memory allocation can't be - handled, calling contexts of these interfaces make handling - errors impossible too because they get called after we've - totally commited to creating a route object, for example). - This problem has existed for years and no forward progress - has ever been made, and nobody steps up to try and salvage - this code, so we're going to finally just get rid of it. -Who: David S. Miller - ---------------------------- - What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) When: December 2007 Why: These functions are a leftover from 2.4 times. They have several diff -Nurb linux-2.6.22-570/Documentation/filesystems/00-INDEX linux-2.6.22-591/Documentation/filesystems/00-INDEX --- linux-2.6.22-570/Documentation/filesystems/00-INDEX 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/filesystems/00-INDEX 2007-12-21 15:36:11.000000000 -0500 @@ -84,6 +84,8 @@ - info and mount options for the UDF filesystem. ufs.txt - info on the ufs filesystem. +unionfs/ + - info on the unionfs filesystem vfat.txt - info on using the VFAT filesystem used in Windows NT and Windows 95 vfs.txt diff -Nurb linux-2.6.22-570/Documentation/filesystems/Locking linux-2.6.22-591/Documentation/filesystems/Locking --- linux-2.6.22-570/Documentation/filesystems/Locking 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/filesystems/Locking 2007-12-21 15:36:11.000000000 -0500 @@ -510,12 +510,14 @@ prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); + struct page *(*fault)(struct vm_area_struct*, struct fault_data *); struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); locking rules: BKL mmap_sem open: no yes close: no yes +fault: no yes nopage: no yes ================================================================================ diff -Nurb linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt linux-2.6.22-591/Documentation/filesystems/configfs/configfs.txt --- linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/filesystems/configfs/configfs.txt 2007-12-21 15:36:11.000000000 -0500 @@ -238,6 +238,8 @@ struct config_group *(*make_group)(struct config_group *group, const char *name); int (*commit_item)(struct config_item *item); + void (*disconnect_notify)(struct config_group *group, + struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); }; @@ -268,6 +270,16 @@ for the item to actually disappear from the subsystem's usage. But it is gone from configfs. +When drop_item() is called, the item's linkage has already been torn +down. It no longer has a reference on its parent and has no place in +the item hierarchy. If a client needs to do some cleanup before this +teardown happens, the subsystem can implement the +ct_group_ops->disconnect_notify() method. The method is called after +configfs has removed the item from the filesystem view but before the +item is removed from its parent group. Like drop_item(), +disconnect_notify() is void and cannot fail. Client subsystems should +not drop any references here, as they still must do it in drop_item(). + A config_group cannot be removed while it still has child items. This is implemented in the configfs rmdir(2) code. ->drop_item() will not be called, as the item has not been dropped. rmdir(2) will fail, as the @@ -386,6 +398,33 @@ rmdir(2). They also are not considered when rmdir(2) on the parent group is checking for children. +[Dependant Subsystems] + +Sometimes other drivers depend on particular configfs items. For +example, ocfs2 mounts depend on a heartbeat region item. If that +region item is removed with rmdir(2), the ocfs2 mount must BUG or go +readonly. Not happy. + +configfs provides two additional API calls: configfs_depend_item() and +configfs_undepend_item(). A client driver can call +configfs_depend_item() on an existing item to tell configfs that it is +depended on. configfs will then return -EBUSY from rmdir(2) for that +item. When the item is no longer depended on, the client driver calls +configfs_undepend_item() on it. + +These API cannot be called underneath any configfs callbacks, as +they will conflict. They can block and allocate. A client driver +probably shouldn't calling them of its own gumption. Rather it should +be providing an API that external subsystems call. + +How does this work? Imagine the ocfs2 mount process. When it mounts, +it asks for a heartbeat region item. This is done via a call into the +heartbeat code. Inside the heartbeat code, the region item is looked +up. Here, the heartbeat code calls configfs_depend_item(). If it +succeeds, then heartbeat knows the region is safe to give to ocfs2. +If it fails, it was being torn down anyway, and heartbeat can gracefully +pass up an error. + [Committable Items] NOTE: Committable items are currently unimplemented. diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX linux-2.6.22-591/Documentation/filesystems/unionfs/00-INDEX --- linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/filesystems/unionfs/00-INDEX 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,10 @@ +00-INDEX + - this file. +concepts.txt + - A brief introduction of concepts. +issues.txt + - A summary of known issues with unionfs. +rename.txt + - Information regarding rename operations. +usage.txt + - Usage information and examples. diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt linux-2.6.22-591/Documentation/filesystems/unionfs/concepts.txt --- linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/filesystems/unionfs/concepts.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,75 @@ +Unionfs 2.0 CONCEPTS: +===================== + +This file describes the concepts needed by a namespace unification file +system. + +Branch Priority: +================ + +Each branch is assigned a unique priority - starting from 0 (highest +priority). No two branches can have the same priority. + + +Branch Mode: +============ + +Each branch is assigned a mode - read-write or read-only. This allows +directories on media mounted read-write to be used in a read-only manner. + + +Whiteouts: +========== + +A whiteout removes a file name from the namespace. Whiteouts are needed when +one attempts to remove a file on a read-only branch. + +Suppose we have a two-branch union, where branch 0 is read-write and branch +1 is read-only. And a file 'foo' on branch 1: + +./b0/ +./b1/ +./b1/foo + +The unified view would simply be: + +./union/ +./union/foo + +Since 'foo' is stored on a read-only branch, it cannot be removed. A +whiteout is used to remove the name 'foo' from the unified namespace. Again, +since branch 1 is read-only, the whiteout cannot be created there. So, we +try on a higher priority (lower numerically) branch and create the whiteout +there. + +./b0/ +./b0/.wh.foo +./b1/ +./b1/foo + +Later, when Unionfs traverses branches (due to lookup or readdir), it +eliminate 'foo' from the namespace (as well as the whiteout itself.) + + +Duplicate Elimination: +====================== + +It is possible for files on different branches to have the same name. +Unionfs then has to select which instance of the file to show to the user. +Given the fact that each branch has a priority associated with it, the +simplest solution is to take the instance from the highest priority +(numerically lowest value) and "hide" the others. + + +Copyup: +======= + +When a change is made to the contents of a file's data or meta-data, they +have to be stored somewhere. The best way is to create a copy of the +original file on a branch that is writable, and then redirect the write +though to this copy. The copy must be made on a higher priority branch so +that lookup and readdir return this newer "version" of the file rather than +the original (see duplicate elimination). + + +For more information, see . diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt linux-2.6.22-591/Documentation/filesystems/unionfs/issues.txt --- linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/filesystems/unionfs/issues.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,39 @@ +KNOWN Unionfs 2.0 ISSUES: +========================= + +1. The NFS server returns -EACCES for read-only exports, instead of -EROFS. + This means we can't reliably detect a read-only NFS export. + +2. Modifying a Unionfs branch directly, while the union is mounted, is + currently unsupported, because it could cause a cache incoherency between + the union layer and the lower file systems (for that reason, Unionfs + currently prohibits using branches which overlap with each other, even + partially). We have tested Unionfs under such conditions, and fixed any + bugs we found (Unionfs comes with an extensive regression test suite). + However, it may still be possible that changes made to lower branches + directly could cause cache incoherency which, in the worst case, may case + an oops. + + Unionfs 2.0 has a temporary workaround for this. You can force Unionfs + to increase the superblock generation number, and hence purge all cached + Unionfs objects, which would then be re-gotten from the lower branches. + This should ensure cache consistency. To increase the generation number, + executed the command: + + mount -t unionfs -o remount,incgen none MOUNTPOINT + + Note that the older way of incrementing the generation number using an + ioctl, is no longer supported in Unionfs 2.0. Ioctls in general are not + encouraged. Plus, an ioctl is per-file concept, whereas the generation + number is a per-file-system concept. Worse, such an ioctl requires an + open file, which then has to be invalidated by the very nature of the + generation number increase (read: the old generation increase ioctl was + pretty racy). + +3. Unionfs should not use lookup_one_len() on the underlying f/s as it + confuses NFS. Currently, unionfs_lookup() passes lookup intents to the + lower file-system, this eliminates part of the problem. The remaining + calls to lookup_one_len may need to be changed to pass an intent. + + +For more information, see . diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt linux-2.6.22-591/Documentation/filesystems/unionfs/rename.txt --- linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/filesystems/unionfs/rename.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,31 @@ +Rename is a complex beast. The following table shows which rename(2) operations +should succeed and which should fail. + +o: success +E: error (either unionfs or vfs) +X: EXDEV + +none = file does not exist +file = file is a file +dir = file is a empty directory +child= file is a non-empty directory +wh = file is a directory containing only whiteouts; this makes it logically + empty + + none file dir child wh +file o o E E E +dir o E o E o +child X E X E X +wh o E o E o + + +Renaming directories: +===================== + +Whenever a empty (either physically or logically) directory is being renamed, +the following sequence of events should take place: + +1) Remove whiteouts from both source and destination directory +2) Rename source to destination +3) Make destination opaque to prevent anything under it from showing up + diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt linux-2.6.22-591/Documentation/filesystems/unionfs/usage.txt --- linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/filesystems/unionfs/usage.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,90 @@ +Unionfs is a stackable unification file system, which can appear to merge +the contents of several directories (branches), while keeping their physical +content separate. Unionfs is useful for unified source tree management, +merged contents of split CD-ROM, merged separate software package +directories, data grids, and more. Unionfs allows any mix of read-only and +read-write branches, as well as insertion and deletion of branches anywhere +in the fan-out. To maintain Unix semantics, Unionfs handles elimination of +duplicates, partial-error conditions, and more. + +# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT + +The available branch-option for the mount command is: + + dirs=branch[=ro|=rw][:...] + +specifies a separated list of which directories compose the union. +Directories that come earlier in the list have a higher precedence than +those which come later. Additionally, read-only or read-write permissions of +the branch can be specified by appending =ro or =rw (default) to each +directory. + +Syntax: + + dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw] + +Example: + + dirs=/writable_branch=rw:/read-only_branch=ro + + +DYNAMIC BRANCH MANAGEMENT AND REMOUNTS +====================================== + +You can remount a union and change its overall mode, or reconfigure the +branches, as follows. + +To downgrade a union from read-write to read-only: + +# mount -t unionfs -o remount,ro none MOUNTPOINT + +To upgrade a union from read-only to read-write: + +# mount -t unionfs -o remount,rw none MOUNTPOINT + +To delete a branch /foo, regardless where it is in the current union: + +# mount -t unionfs -o del=/foo none MOUNTPOINT + +To insert (add) a branch /foo before /bar: + +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT + +To insert (add) a branch /foo (with the "rw" mode flag) before /bar: + +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT + +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a +new highest-priority branch), you can use the above syntax, or use a short +hand version as follows: + +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT + +To append a branch to the very end (new lowest-priority branch): + +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT + +To append a branch to the very end (new lowest-priority branch), in +read-only mode: + +# mount -t unionfs -o remount,add=:/foo:ro none MOUNTPOINT + +Finally, to change the mode of one existing branch, say /foo, from read-only +to read-write, and change /bar from read-write to read-only: + +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT + + +CACHE CONSISTENCY +================= + +If you modify any file on any of the lower branches directly, while there is +a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs +to purge its caches and re-get the objects. To do that, you have to +increment the generation number of the superblock using the following +command: + +# mount -t unionfs -o remount,incgen none MOUNTPOINT + + +For more information, see . diff -Nurb linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c linux-2.6.22-591/Documentation/firmware_class/firmware_sample_firmware_class.c --- linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/firmware_class/firmware_sample_firmware_class.c 2007-12-21 15:36:11.000000000 -0500 @@ -78,6 +78,7 @@ firmware_loading_show, firmware_loading_store); static ssize_t firmware_data_read(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct class_device *class_dev = to_class_dev(kobj); @@ -88,6 +89,7 @@ return count; } static ssize_t firmware_data_write(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct class_device *class_dev = to_class_dev(kobj); diff -Nurb linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt linux-2.6.22-591/Documentation/power/freezing-of-tasks.txt --- linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/power/freezing-of-tasks.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,160 @@ +Freezing of tasks + (C) 2007 Rafael J. Wysocki , GPL + +I. What is the freezing of tasks? + +The freezing of tasks is a mechanism by which user space processes and some +kernel threads are controlled during hibernation or system-wide suspend (on some +architectures). + +II. How does it work? + +There are four per-task flags used for that, PF_NOFREEZE, PF_FROZEN, TIF_FREEZE +and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have +PF_NOFREEZE unset (all user space processes and some kernel threads) are +regarded as 'freezable' and treated in a special way before the system enters a +suspend state as well as before a hibernation image is created (in what follows +we only consider hibernation, but the description also applies to suspend). + +Namely, as the first step of the hibernation procedure the function +freeze_processes() (defined in kernel/power/process.c) is called. It executes +try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and +sends a fake signal to each of them. A task that receives such a signal and has +TIF_FREEZE set, should react to it by calling the refrigerator() function +(defined in kernel/power/process.c), which sets the task's PF_FROZEN flag, +changes its state to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is +cleared for it. Then, we say that the task is 'frozen' and therefore the set of +functions handling this mechanism is called 'the freezer' (these functions are +defined in kernel/power/process.c and include/linux/freezer.h). User space +processes are generally frozen before kernel threads. + +It is not recommended to call refrigerator() directly. Instead, it is +recommended to use the try_to_freeze() function (defined in +include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the +task enter refrigerator() if the flag is set. + +For user space processes try_to_freeze() is called automatically from the +signal-handling code, but the freezable kernel threads need to call it +explicitly in suitable places. The code to do this may look like the following: + + do { + hub_events(); + wait_event_interruptible(khubd_wait, + !list_empty(&hub_event_list)); + try_to_freeze(); + } while (!signal_pending(current)); + +(from drivers/usb/core/hub.c::hub_thread()). + +If a freezable kernel thread fails to call try_to_freeze() after the freezer has +set TIF_FREEZE for it, the freezing of tasks will fail and the entire +hibernation operation will be cancelled. For this reason, freezable kernel +threads must call try_to_freeze() somewhere. + +After the system memory state has been restored from a hibernation image and +devices have been reinitialized, the function thaw_processes() is called in +order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that +have been frozen leave refrigerator() and continue running. + +III. Which kernel threads are freezable? + +Kernel threads are not freezable by default. However, a kernel thread may clear +PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE +directly is strongly discouraged). From this point it is regarded as freezable +and must call try_to_freeze() in a suitable place. + +IV. Why do we do that? + +Generally speaking, there is a couple of reasons to use the freezing of tasks: + +1. The principal reason is to prevent filesystems from being damaged after +hibernation. At the moment we have no simple means of checkpointing +filesystems, so if there are any modifications made to filesystem data and/or +metadata on disks, we cannot bring them back to the state from before the +modifications. At the same time each hibernation image contains some +filesystem-related information that must be consistent with the state of the +on-disk data and metadata after the system memory state has been restored from +the image (otherwise the filesystems will be damaged in a nasty way, usually +making them almost impossible to repair). We therefore freeze tasks that might +cause the on-disk filesystems' data and metadata to be modified after the +hibernation image has been created and before the system is finally powered off. +The majority of these are user space processes, but if any of the kernel threads +may cause something like this to happen, they have to be freezable. + +2. The second reason is to prevent user space processes and some kernel threads +from interfering with the suspending and resuming of devices. A user space +process running on a second CPU while we are suspending devices may, for +example, be troublesome and without the freezing of tasks we would need some +safeguards against race conditions that might occur in such a case. + +Although Linus Torvalds doesn't like the freezing of tasks, he said this in one +of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): + +"RJW:> Why we freeze tasks at all or why we freeze kernel threads? + +Linus: In many ways, 'at all'. + +I _do_ realize the IO request queue issues, and that we cannot actually do +s2ram with some devices in the middle of a DMA. So we want to be able to +avoid *that*, there's no question about that. And I suspect that stopping +user threads and then waiting for a sync is practically one of the easier +ways to do so. + +So in practice, the 'at all' may become a 'why freeze kernel threads?' and +freezing user threads I don't find really objectionable." + +Still, there are kernel threads that may want to be freezable. For example, if +a kernel that belongs to a device driver accesses the device directly, it in +principle needs to know when the device is suspended, so that it doesn't try to +access it at that time. However, if the kernel thread is freezable, it will be +frozen before the driver's .suspend() callback is executed and it will be +thawed after the driver's .resume() callback has run, so it won't be accessing +the device while it's suspended. + +3. Another reason for freezing tasks is to prevent user space processes from +realizing that hibernation (or suspend) operation takes place. Ideally, user +space processes should not notice that such a system-wide operation has occurred +and should continue running without any problems after the restore (or resume +from suspend). Unfortunately, in the most general case this is quite difficult +to achieve without the freezing of tasks. Consider, for example, a process +that depends on all CPUs being online while it's running. Since we need to +disable nonboot CPUs during the hibernation, if this process is not frozen, it +may notice that the number of CPUs has changed and may start to work incorrectly +because of that. + +V. Are there any problems related to the freezing of tasks? + +Yes, there are. + +First of all, the freezing of kernel threads may be tricky if they depend one +on another. For example, if kernel thread A waits for a completion (in the +TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B +and B is frozen in the meantime, then A will be blocked until B is thawed, which +may be undesirable. That's why kernel threads are not freezable by default. + +Second, there are the following two problems related to the freezing of user +space processes: +1. Putting processes into an uninterruptible sleep distorts the load average. +2. Now that we have FUSE, plus the framework for doing device drivers in +userspace, it gets even more complicated because some userspace processes are +now doing the sorts of things that kernel threads do +(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html). + +The problem 1. seems to be fixable, although it hasn't been fixed so far. The +other one is more serious, but it seems that we can work around it by using +hibernation (and suspend) notifiers (in that case, though, we won't be able to +avoid the realization by the user space processes that the hibernation is taking +place). + +There are also problems that the freezing of tasks tends to expose, although +they are not directly related to it. For example, if request_firmware() is +called from a device driver's .resume() routine, it will timeout and eventually +fail, because the user land process that should respond to the request is frozen +at this point. So, seemingly, the failure is due to the freezing of tasks. +Suppose, however, that the firmware file is located on a filesystem accessible +only through another device that hasn't been resumed yet. In that case, +request_firmware() will fail regardless of whether or not the freezing of tasks +is used. Consequently, the problem is not really related to the freezing of +tasks, since it generally exists anyway. [The solution to this particular +problem is to keep the firmware in memory after it's loaded for the first time +and upload if from memory to the device whenever necessary.] diff -Nurb linux-2.6.22-570/Documentation/power/kernel_threads.txt linux-2.6.22-591/Documentation/power/kernel_threads.txt --- linux-2.6.22-570/Documentation/power/kernel_threads.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/power/kernel_threads.txt 1969-12-31 19:00:00.000000000 -0500 @@ -1,40 +0,0 @@ -KERNEL THREADS - - -Freezer - -Upon entering a suspended state the system will freeze all -tasks. This is done by delivering pseudosignals. This affects -kernel threads, too. To successfully freeze a kernel thread -the thread has to check for the pseudosignal and enter the -refrigerator. Code to do this looks like this: - - do { - hub_events(); - wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list)); - try_to_freeze(); - } while (!signal_pending(current)); - -from drivers/usb/core/hub.c::hub_thread() - - -The Unfreezable - -Some kernel threads however, must not be frozen. The kernel must -be able to finish pending IO operations and later on be able to -write the memory image to disk. Kernel threads needed to do IO -must stay awake. Such threads must mark themselves unfreezable -like this: - - /* - * This thread doesn't need any user-level access, - * so get rid of all our resources. - */ - daemonize("usb-storage"); - - current->flags |= PF_NOFREEZE; - -from drivers/usb/storage/usb.c::usb_stor_control_thread() - -Such drivers are themselves responsible for staying quiet during -the actual snapshotting. diff -Nurb linux-2.6.22-570/Documentation/power/swsusp.txt linux-2.6.22-591/Documentation/power/swsusp.txt --- linux-2.6.22-570/Documentation/power/swsusp.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/power/swsusp.txt 2007-12-21 15:36:11.000000000 -0500 @@ -140,21 +140,11 @@ website, and not to the Linux Kernel Mailing List. We are working toward merging suspend2 into the mainline kernel. -Q: A kernel thread must voluntarily freeze itself (call 'refrigerator'). -I found some kernel threads that don't do it, and they don't freeze -so the system can't sleep. Is this a known behavior? - -A: All such kernel threads need to be fixed, one by one. Select the -place where the thread is safe to be frozen (no kernel semaphores -should be held at that point and it must be safe to sleep there), and -add: - - try_to_freeze(); - -If the thread is needed for writing the image to storage, you should -instead set the PF_NOFREEZE process flag when creating the thread (and -be very careful). +Q: What is the freezing of tasks and why are we using it? +A: The freezing of tasks is a mechanism by which user space processes and some +kernel threads are controlled during hibernation or system-wide suspend (on some +architectures). See freezing-of-tasks.txt for details. Q: What is the difference between "platform" and "shutdown"? diff -Nurb linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt linux-2.6.22-591/Documentation/scsi/scsi_fc_transport.txt --- linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/scsi/scsi_fc_transport.txt 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,450 @@ + SCSI FC Tansport + ============================================= + +Date: 4/12/2007 +Kernel Revisions for features: + rports : <> + vports : 2.6.22 (? TBD) + + +Introduction +============ +This file documents the features and components of the SCSI FC Transport. +It also provides documents the API between the transport and FC LLDDs. +The FC transport can be found at: + drivers/scsi/scsi_transport_fc.c + include/scsi/scsi_transport_fc.h + include/scsi/scsi_netlink_fc.h + +This file is found at Documentation/scsi/scsi_fc_transport.txt + + +FC Remote Ports (rports) +======================================================================== +<< To Be Supplied >> + + +FC Virtual Ports (vports) +======================================================================== + +Overview: +------------------------------- + + New FC standards have defined mechanisms which allows for a single physical + port to appear on as multiple communication ports. Using the N_Port Id + Virtualization (NPIV) mechanism, a point-to-point connection to a Fabric + can be assigned more than 1 N_Port_ID. Each N_Port_ID appears as a + separate port to other endpoints on the fabric, even though it shares one + physical link to the switch for communication. Each N_Port_ID can have a + unique view of the fabric based on fabric zoning and array lun-masking + (just like a normal non-NPIV adapter). Using the Virtual Fabric (VF) + mechanism, adding a fabric header to each frame allows the port to + interact with the Fabric Port to join multiple fabrics. The port will + obtain an N_Port_ID on each fabric it joins. Each fabric will have its + own unique view of endpoints and configuration parameters. NPIV may be + used together with VF so that the port can obtain multiple N_Port_IDs + on each virtual fabric. + + The FC transport is now recognizing a new object - a vport. A vport is + an entity that has a world-wide unique World Wide Port Name (wwpn) and + World Wide Node Name (wwnn). The transport also allows for the FC4's to + be specified for the vport, with FCP_Initiator being the primary role + expected. Once instantiated by one of the above methods, it will have a + distinct N_Port_ID and view of fabric endpoints and storage entities. + The fc_host associated with the physical adapter will export the ability + to create vports. The transport will create the vport object within the + Linux device tree, and instruct the fc_host's driver to instantiate the + virtual port. Typically, the driver will create a new scsi_host instance + on the vport, resulting in a unique namespace for the vport. + Thus, whether a FC port is based on a physical port or on a virtual port, + each will appear as a unique scsi_host with its own target and lun space. + + Note: At this time, the transport is written to create only NPIV-based + vports. However, consideration was given to VF-based vports and it + should be a minor change to add support if needed. The remaining + discussion will concentrate on NPIV. + + Note: World Wide Name assignment (and uniqueness guarantees) are left + up to an administrative entity controling the vport. For example, + if vports are to be associated with virtual machines, a XEN mgmt + utility would be responsible for creating wwpn/wwnn's for the vport, + using it's own naming authority and OUI. (Note: it already does this + for virtual MAC addresses). + + +Device Trees and Vport Objects: +------------------------------- + + Today, the device tree typically contains the scsi_host object, + with rports and scsi target objects underneath it. Currently the FC + transport creates the vport object and places it under the scsi_host + object corresponding to the physical adapter. The LLDD will allocate + a new scsi_host for the vport and link it's object under the vport. + The remainder of the tree under the vports scsi_host is the same + as the non-NPIV case. The transport is written currently to easily + allow the parent of the vport to be something other than the scsi_host. + This could be used in the future to link the object onto a vm-specific + device tree. If the vport's parent is not the physical port's scsi_host, + a symbolic link to the vport object will be placed in the physical + port's scsi_host. + + Here's what to expect in the device tree : + The typical Physical Port's Scsi_Host: + /sys/devices/.../host17/ + and it has the typical decendent tree: + /sys/devices/.../host17/rport-17:0-0/target17:0:0/17:0:0:0: + and then the vport is created on the Physical Port: + /sys/devices/.../host17/vport-17:0-0 + and the vport's Scsi_Host is then created: + /sys/devices/.../host17/vport-17:0-0/host18 + and then the rest of the tree progresses, such as: + /sys/devices/.../host17/vport-17:0-0/host18/rport-18:0-0/target18:0:0/18:0:0:0: + + Here's what to expect in the sysfs tree : + scsi_hosts: + /sys/class/scsi_host/host17 physical port's scsi_host + /sys/class/scsi_host/host18 vport's scsi_host + fc_hosts: + /sys/class/fc_host/host17 physical port's fc_host + /sys/class/fc_host/host18 vport's fc_host + fc_vports: + /sys/class/fc_vports/vport-17:0-0 the vport's fc_vport + fc_rports: + /sys/class/fc_remote_ports/rport-17:0-0 rport on the physical port + /sys/class/fc_remote_ports/rport-18:0-0 rport on the vport + + +Vport Attributes: +------------------------------- + + The new fc_vport class object has the following attributes + + node_name: Read_Only + The WWNN of the vport + + port_name: Read_Only + The WWPN of the vport + + roles: Read_Only + Indicates the FC4 roles enabled on the vport. + + symbolic_name: Read_Write + A string, appended to the driver's symbolic port name string, which + is registered with the switch to identify the vport. For example, + a hypervisor could set this string to "Xen Domain 2 VM 5 Vport 2", + and this set of identifiers can be seen on switch management screens + to identify the port. + + vport_delete: Write_Only + When written with a "1", will tear down the vport. + + vport_disable: Write_Only + When written with a "1", will transition the vport to a disabled. + state. The vport will still be instantiated with the Linux kernel, + but it will not be active on the FC link. + When written with a "0", will enable the vport. + + vport_last_state: Read_Only + Indicates the previous state of the vport. See the section below on + "Vport States". + + vport_state: Read_Only + Indicates the state of the vport. See the section below on + "Vport States". + + vport_type: Read_Only + Reflects the FC mechanism used to create the virtual port. + Only NPIV is supported currently. + + + For the fc_host class object, the following attributes are added for vports: + + max_npiv_vports: Read_Only + Indicates the maximum number of NPIV-based vports that the + driver/adapter can support on the fc_host. + + npiv_vports_inuse: Read_Only + Indicates how many NPIV-based vports have been instantiated on the + fc_host. + + vport_create: Write_Only + A "simple" create interface to instantiate a vport on an fc_host. + A ":" string is written to the attribute. The transport + then instantiates the vport object and calls the LLDD to create the + vport with the role of FCP_Initiator. Each WWN is specified as 16 + hex characters and may *not* contain any prefixes (e.g. 0x, x, etc). + + vport_delete: Write_Only + A "simple" delete interface to teardown a vport. A ":" + string is written to the attribute. The transport will locate the + vport on the fc_host with the same WWNs and tear it down. Each WWN + is specified as 16 hex characters and may *not* contain any prefixes + (e.g. 0x, x, etc). + + +Vport States: +------------------------------- + + Vport instantiation consists of two parts: + - Creation with the kernel and LLDD. This means all transport and + driver data structures are built up, and device objects created. + This is equivalent to a driver "attach" on an adapter, which is + independent of the adapter's link state. + - Instantiation of the vport on the FC link via ELS traffic, etc. + This is equivalent to a "link up" and successfull link initialization. + Futher information can be found in the interfaces section below for + Vport Creation. + + Once a vport has been instantiated with the kernel/LLDD, a vport state + can be reported via the sysfs attribute. The following states exist: + + FC_VPORT_UNKNOWN - Unknown + An temporary state, typically set only while the vport is being + instantiated with the kernel and LLDD. + + FC_VPORT_ACTIVE - Active + The vport has been successfully been created on the FC link. + It is fully functional. + + FC_VPORT_DISABLED - Disabled + The vport instantiated, but "disabled". The vport is not instantiated + on the FC link. This is equivalent to a physical port with the + link "down". + + FC_VPORT_LINKDOWN - Linkdown + The vport is not operational as the physical link is not operational. + + FC_VPORT_INITIALIZING - Initializing + The vport is in the process of instantiating on the FC link. + The LLDD will set this state just prior to starting the ELS traffic + to create the vport. This state will persist until the vport is + successfully created (state becomes FC_VPORT_ACTIVE) or it fails + (state is one of the values below). As this state is transitory, + it will not be preserved in the "vport_last_state". + + FC_VPORT_NO_FABRIC_SUPP - No Fabric Support + The vport is not operational. One of the following conditions were + encountered: + - The FC topology is not Point-to-Point + - The FC port is not connected to an F_Port + - The F_Port has indicated that NPIV is not supported. + + FC_VPORT_NO_FABRIC_RSCS - No Fabric Resources + The vport is not operational. The Fabric failed FDISC with a status + indicating that it does not have sufficient resources to complete + the operation. + + FC_VPORT_FABRIC_LOGOUT - Fabric Logout + The vport is not operational. The Fabric has LOGO'd the N_Port_ID + associated with the vport. + + FC_VPORT_FABRIC_REJ_WWN - Fabric Rejected WWN + The vport is not operational. The Fabric failed FDISC with a status + indicating that the WWN's are not valid. + + FC_VPORT_FAILED - VPort Failed + The vport is not operational. This is a catchall for all other + error conditions. + + + The following state table indicates the different state transitions: + + State Event New State + -------------------------------------------------------------------- + n/a Initialization Unknown + Unknown: Link Down Linkdown + Link Up & Loop No Fabric Support + Link Up & no Fabric No Fabric Support + Link Up & FLOGI response No Fabric Support + indicates no NPIV support + Link Up & FDISC being sent Initializing + Disable request Disable + Linkdown: Link Up Unknown + Initializing: FDISC ACC Active + FDISC LS_RJT w/ no resources No Fabric Resources + FDISC LS_RJT w/ invalid Fabric Rejected WWN + pname or invalid nport_id + FDISC LS_RJT failed for Vport Failed + other reasons + Link Down Linkdown + Disable request Disable + Disable: Enable request Unknown + Active: LOGO received from fabric Fabric Logout + Link Down Linkdown + Disable request Disable + Fabric Logout: Link still up Unknown + + The following 4 error states all have the same transitions: + No Fabric Support: + No Fabric Resources: + Fabric Rejected WWN: + Vport Failed: + Disable request Disable + Link goes down Linkdown + + +Transport <-> LLDD Interfaces : +------------------------------- + +Vport support by LLDD: + + The LLDD indicates support for vports by supplying a vport_create() + function in the transport template. The presense of this function will + cause the creation of the new attributes on the fc_host. As part of + the physical port completing its initialization relative to the + transport, it should set the max_npiv_vports attribute to indicate the + maximum number of vports the driver and/or adapter supports. + + +Vport Creation: + + The LLDD vport_create() syntax is: + + int vport_create(struct fc_vport *vport, bool disable) + + where: + vport: Is the newly allocated vport object + disable: If "true", the vport is to be created in a disabled stated. + If "false", the vport is to be enabled upon creation. + + When a request is made to create a new vport (via sgio/netlink, or the + vport_create fc_host attribute), the transport will validate that the LLDD + can support another vport (e.g. max_npiv_vports > npiv_vports_inuse). + If not, the create request will be failed. If space remains, the transport + will increment the vport count, create the vport object, and then call the + LLDD's vport_create() function with the newly allocated vport object. + + As mentioned above, vport creation is divided into two parts: + - Creation with the kernel and LLDD. This means all transport and + driver data structures are built up, and device objects created. + This is equivalent to a driver "attach" on an adapter, which is + independent of the adapter's link state. + - Instantiation of the vport on the FC link via ELS traffic, etc. + This is equivalent to a "link up" and successfull link initialization. + + The LLDD's vport_create() function will not synchronously wait for both + parts to be fully completed before returning. It must validate that the + infrastructure exists to support NPIV, and complete the first part of + vport creation (data structure build up) before returning. We do not + hinge vport_create() on the link-side operation mainly because: + - The link may be down. It is not a failure if it is. It simply + means the vport is in an inoperable state until the link comes up. + This is consistent with the link bouncing post vport creation. + - The vport may be created in a disabled state. + - This is consistent with a model where: the vport equates to a + FC adapter. The vport_create is synonymous with driver attachment + to the adapter, which is independent of link state. + + Note: special error codes have been defined to delineate infrastructure + failure cases for quicker resolution. + + The expected behavior for the LLDD's vport_create() function is: + - Validate Infrastructure: + - If the driver or adapter cannot support another vport, whether + due to improper firmware, (a lie about) max_npiv, or a lack of + some other resource - return VPCERR_UNSUPPORTED. + - If the driver validates the WWN's against those already active on + the adapter and detects an overlap - return VPCERR_BAD_WWN. + - If the driver detects the topology is loop, non-fabric, or the + FLOGI did not support NPIV - return VPCERR_NO_FABRIC_SUPP. + - Allocate data structures. If errors are encountered, such as out + of memory conditions, return the respective negative Exxx error code. + - If the role is FCP Initiator, the LLDD is to : + - Call scsi_host_alloc() to allocate a scsi_host for the vport. + - Call scsi_add_host(new_shost, &vport->dev) to start the scsi_host + and bind it as a child of the vport device. + - Initializes the fc_host attribute values. + - Kick of further vport state transitions based on the disable flag and + link state - and return success (zero). + + LLDD Implementers Notes: + - It is suggested that there be a different fc_function_templates for + the physical port and the virtual port. The physical port's template + would have the vport_create, vport_delete, and vport_disable functions, + while the vports would not. + - It is suggested that there be different scsi_host_templates + for the physical port and virtual port. Likely, there are driver + attributes, embedded into the scsi_host_template, that are applicable + for the physical port only (link speed, topology setting, etc). This + ensures that the attributes are applicable to the respective scsi_host. + + +Vport Disable/Enable: + + The LLDD vport_disable() syntax is: + + int vport_disable(struct fc_vport *vport, bool disable) + + where: + vport: Is vport to to be enabled or disabled + disable: If "true", the vport is to be disabled. + If "false", the vport is to be enabled. + + When a request is made to change the disabled state on a vport, the + transport will validate the request against the existing vport state. + If the request is to disable and the vport is already disabled, the + request will fail. Similarly, if the request is to enable, and the + vport is not in a disabled state, the request will fail. If the request + is valid for the vport state, the transport will call the LLDD to + change the vport's state. + + Within the LLDD, if a vport is disabled, it remains instantiated with + the kernel and LLDD, but it is not active or visible on the FC link in + any way. (see Vport Creation and the 2 part instantiation discussion). + The vport will remain in this state until it is deleted or re-enabled. + When enabling a vport, the LLDD reinstantiates the vport on the FC + link - essentially restarting the LLDD statemachine (see Vport States + above). + + +Vport Deletion: + + The LLDD vport_delete() syntax is: + + int vport_delete(struct fc_vport *vport) + + where: + vport: Is vport to delete + + When a request is made to delete a vport (via sgio/netlink, or via the + fc_host or fc_vport vport_delete attributes), the transport will call + the LLDD to terminate the vport on the FC link, and teardown all other + datastructures and references. If the LLDD completes successfully, + the transport will teardown the vport objects and complete the vport + removal. If the LLDD delete request fails, the vport object will remain, + but will be in an indeterminate state. + + Within the LLDD, the normal code paths for a scsi_host teardown should + be followed. E.g. If the vport has a FCP Initiator role, the LLDD + will call fc_remove_host() for the vports scsi_host, followed by + scsi_remove_host() and scsi_host_put() for the vports scsi_host. + + +Other: + fc_host port_type attribute: + There is a new fc_host port_type value - FC_PORTTYPE_NPIV. This value + must be set on all vport-based fc_hosts. Normally, on a physical port, + the port_type attribute would be set to NPORT, NLPORT, etc based on the + topology type and existence of the fabric. As this is not applicable to + a vport, it makes more sense to report the FC mechanism used to create + the vport. + + Driver unload: + FC drivers are required to call fc_remove_host() prior to calling + scsi_remove_host(). This allows the fc_host to tear down all remote + ports prior the scsi_host being torn down. The fc_remove_host() call + was updated to remove all vports for the fc_host as well. + + +Credits +======= +The following people have contributed to this document: + + + + + + +James Smart +james.smart@emulex.com + diff -Nurb linux-2.6.22-570/Documentation/sysctl/kernel.txt linux-2.6.22-591/Documentation/sysctl/kernel.txt --- linux-2.6.22-570/Documentation/sysctl/kernel.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/Documentation/sysctl/kernel.txt 2007-12-21 15:36:11.000000000 -0500 @@ -29,6 +29,7 @@ - java-interpreter [ binfmt_java, obsolete ] - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] +- mmap_min_addr - modprobe ==> Documentation/kmod.txt - msgmax - msgmnb @@ -178,6 +179,19 @@ ============================================================== +mmap_min_addr + +This file indicates the amount of address space which a user process will be +restricted from mmaping. Since kernel null dereference bugs could +accidentally operate based on the information in the first couple of pages of +memory userspace processes should not be allowed to write to them. By default +this value is set to 0 and no protections will be enforced by the security +module. Setting this value to something like 64k will allow the vast majority +of applications to work correctly and provide defense in depth against future +potential kernel bugs. + +============================================================== + osrelease, ostype & version: # cat osrelease diff -Nurb linux-2.6.22-570/Documentation/sysfs-rules.txt linux-2.6.22-591/Documentation/sysfs-rules.txt --- linux-2.6.22-570/Documentation/sysfs-rules.txt 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/Documentation/sysfs-rules.txt 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,166 @@ +Rules on how to access information in the Linux kernel sysfs + +The kernel exported sysfs exports internal kernel implementation-details +and depends on internal kernel structures and layout. It is agreed upon +by the kernel developers that the Linux kernel does not provide a stable +internal API. As sysfs is a direct export of kernel internal +structures, the sysfs interface can not provide a stable interface eighter, +it may always change along with internal kernel changes. + +To minimize the risk of breaking users of sysfs, which are in most cases +low-level userspace applications, with a new kernel release, the users +of sysfs must follow some rules to use an as abstract-as-possible way to +access this filesystem. The current udev and HAL programs already +implement this and users are encouraged to plug, if possible, into the +abstractions these programs provide instead of accessing sysfs +directly. + +But if you really do want or need to access sysfs directly, please follow +the following rules and then your programs should work with future +versions of the sysfs interface. + +- Do not use libsysfs + It makes assumptions about sysfs which are not true. Its API does not + offer any abstraction, it exposes all the kernel driver-core + implementation details in its own API. Therefore it is not better than + reading directories and opening the files yourself. + Also, it is not actively maintained, in the sense of reflecting the + current kernel-development. The goal of providing a stable interface + to sysfs has failed, it causes more problems, than it solves. It + violates many of the rules in this document. + +- sysfs is always at /sys + Parsing /proc/mounts is a waste of time. Other mount points are a + system configuration bug you should not try to solve. For test cases, + possibly support a SYSFS_PATH environment variable to overwrite the + applications behavior, but never try to search for sysfs. Never try + to mount it, if you are not an early boot script. + +- devices are only "devices" + There is no such thing like class-, bus-, physical devices, + interfaces, and such that you can rely on in userspace. Everything is + just simply a "device". Class-, bus-, physical, ... types are just + kernel implementation details, which should not be expected by + applications that look for devices in sysfs. + + The properties of a device are: + o devpath (/devices/pci0000:00/0000:00:1d.1/usb2/2-2/2-2:1.0) + - identical to the DEVPATH value in the event sent from the kernel + at device creation and removal + - the unique key to the device at that point in time + - the kernels path to the device-directory without the leading + /sys, and always starting with with a slash + - all elements of a devpath must be real directories. Symlinks + pointing to /sys/devices must always be resolved to their real + target, and the target path must be used to access the device. + That way the devpath to the device matches the devpath of the + kernel used at event time. + - using or exposing symlink values as elements in a devpath string + is a bug in the application + + o kernel name (sda, tty, 0000:00:1f.2, ...) + - a directory name, identical to the last element of the devpath + - applications need to handle spaces and characters like '!' in + the name + + o subsystem (block, tty, pci, ...) + - simple string, never a path or a link + - retrieved by reading the "subsystem"-link and using only the + last element of the target path + + o driver (tg3, ata_piix, uhci_hcd) + - a simple string, which may contain spaces, never a path or a + link + - it is retrieved by reading the "driver"-link and using only the + last element of the target path + - devices which do not have "driver"-link, just do not have a + driver; copying the driver value in a child device context, is a + bug in the application + + o attributes + - the files in the device directory or files below a subdirectories + of the same device directory + - accessing attributes reached by a symlink pointing to another device, + like the "device"-link, is a bug in the application + + Everything else is just a kernel driver-core implementation detail, + that should not be assumed to be stable across kernel releases. + +- Properties of parent devices never belong into a child device. + Always look at the parent devices themselves for determining device + context properties. If the device 'eth0' or 'sda' does not have a + "driver"-link, then this device does not have a driver. Its value is empty. + Never copy any property of the parent-device into a child-device. Parent + device-properties may change dynamically without any notice to the + child device. + +- Hierarchy in a single device-tree + There is only one valid place in sysfs where hierarchy can be examined + and this is below: /sys/devices. + It is planned, that all device directories will end up in the tree + below this directory. + +- Classification by subsystem + There are currently three places for classification of devices: + /sys/block, /sys/class and /sys/bus. It is planned that these will + not contain any device-directories themselves, but only flat lists of + symlinks pointing to the unified /sys/devices tree. + All three places have completely different rules on how to access + device information. It is planned to merge all three + classification-directories into one place at /sys/subsystem, + following the layout of the bus-directories. All buses and + classes, including the converted block-subsystem, will show up + there. + The devices belonging to a subsystem will create a symlink in the + "devices" directory at /sys/subsystem//devices. + + If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be + ignored. If it does not exist, you have always to scan all three + places, as the kernel is free to move a subsystem from one place to + the other, as long as the devices are still reachable by the same + subsystem name. + + Assuming /sys/class/ and /sys/bus/, or + /sys/block and /sys/class/block are not interchangeable, is a bug in + the application. + +- Block + The converted block-subsystem at /sys/class/block, or + /sys/subsystem/block will contain the links for disks and partitions + at the same level, never in a hierarchy. Assuming the block-subsytem to + contain only disks and not partition-devices in the same flat list is + a bug in the application. + +- "device"-link and :-links + Never depend on the "device"-link. The "device"-link is a workaround + for the old layout, where class-devices are not created in + /sys/devices/ like the bus-devices. If the link-resolving of a + device-directory does not end in /sys/devices/, you can use the + "device"-link to find the parent devices in /sys/devices/. That is the + single valid use of the "device"-link, it must never appear in any + path as an element. Assuming the existence of the "device"-link for + a device in /sys/devices/ is a bug in the application. + Accessing /sys/class/net/eth0/device is a bug in the application. + + Never depend on the class-specific links back to the /sys/class + directory. These links are also a workaround for the design mistake + that class-devices are not created in /sys/devices. If a device + directory does not contain directories for child devices, these links + may be used to find the child devices in /sys/class. That is the single + valid use of these links, they must never appear in any path as an + element. Assuming the existence of these links for devices which are + real child device directories in the /sys/devices tree, is a bug in + the application. + + It is planned to remove all these links when when all class-device + directories live in /sys/devices. + +- Position of devices along device chain can change. + Never depend on a specific parent device position in the devpath, + or the chain of parent devices. The kernel is free to insert devices into + the chain. You must always request the parent device you are looking for + by its subsystem value. You need to walk up the chain until you find + the device that matches the expected subsystem. Depending on a specific + position of a parent device, or exposing relative paths, using "../" to + access the chain of parents, is a bug in the application. + diff -Nurb linux-2.6.22-570/MAINTAINERS linux-2.6.22-591/MAINTAINERS --- linux-2.6.22-570/MAINTAINERS 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/MAINTAINERS 2007-12-21 15:36:11.000000000 -0500 @@ -232,15 +232,15 @@ S: Supported ACPI BATTERY DRIVERS -P: Vladimir P. Lebedev -M: vladimir.p.lebedev@intel.com +P: Alexey Starikovskiy +M: astarikovskiy@suse.de L: linux-acpi@vger.kernel.org W: http://acpi.sourceforge.net/ S: Supported ACPI EC DRIVER P: Alexey Starikovskiy -M: alexey.y.starikovskiy@linux.intel.com +M: astarikovskiy@suse.de L: linux-acpi@vger.kernel.org W: http://acpi.sourceforge.net/ S: Supported @@ -2127,6 +2127,15 @@ L: kexec@lists.infradead.org S: Maintained +KGDB +P: Jason Wessel +M: jason.wessel@windriver.com +P: Amit S. Kale +M: amitkale@linsyssoft.com +W: http://sourceforge.net/projects/kgdb +L: kgdb-bugreport@lists.sourceforge.net +S: Maintained + KPROBES P: Prasanna S Panchamukhi M: prasanna@in.ibm.com @@ -3593,6 +3602,15 @@ W: http://www.kernel.dk S: Maintained +UNIONFS +P: Erez Zadok +M: ezk@cs.sunysb.edu +P: Josef "Jeff" Sipek +M: jsipek@cs.sunysb.edu +L: unionfs@filesystems.org +W: http://unionfs.filesystems.org +S: Maintained + USB ACM DRIVER P: Oliver Neukum M: oliver@neukum.name diff -Nurb linux-2.6.22-570/Makefile linux-2.6.22-591/Makefile --- linux-2.6.22-570/Makefile 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/Makefile 2007-12-21 15:36:16.000000000 -0500 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 22 -EXTRAVERSION = .14-vs2.3.0.29 +EXTRAVERSION = -prep NAME = Holy Dancing Manatees, Batman! # *DOCUMENTATION* @@ -496,6 +496,11 @@ CFLAGS += -fomit-frame-pointer endif +ifdef CONFIG_UNWIND_INFO +CFLAGS += -fasynchronous-unwind-tables +LDFLAGS_vmlinux += --eh-frame-hdr +endif + ifdef CONFIG_DEBUG_INFO CFLAGS += -g endif diff -Nurb linux-2.6.22-570/Makefile.orig linux-2.6.22-591/Makefile.orig --- linux-2.6.22-570/Makefile.orig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/Makefile.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,1493 +0,0 @@ -VERSION = 2 -PATCHLEVEL = 6 -SUBLEVEL = 22 -EXTRAVERSION = .14 -NAME = Holy Dancing Manatees, Batman! - -# *DOCUMENTATION* -# To see a list of typical targets execute "make help" -# More info can be located in ./README -# Comments in this file are targeted only to the developer, do not -# expect to learn how to build the kernel reading this file. - -# Do not: -# o use make's built-in rules and variables -# (this increases performance and avoid hard-to-debug behavour); -# o print "Entering directory ..."; -MAKEFLAGS += -rR --no-print-directory - -# We are using a recursive build, so we need to do a little thinking -# to get the ordering right. -# -# Most importantly: sub-Makefiles should only ever modify files in -# their own directory. If in some directory we have a dependency on -# a file in another dir (which doesn't happen often, but it's often -# unavoidable when linking the built-in.o targets which finally -# turn into vmlinux), we will call a sub make in that other dir, and -# after that we are sure that everything which is in that other dir -# is now up to date. -# -# The only cases where we need to modify files which have global -# effects are thus separated out and done before the recursive -# descending is started. They are now explicitly listed as the -# prepare rule. - -# To put more focus on warnings, be less verbose as default -# Use 'make V=1' to see the full commands - -ifdef V - ifeq ("$(origin V)", "command line") - KBUILD_VERBOSE = $(V) - endif -endif -ifndef KBUILD_VERBOSE - KBUILD_VERBOSE = 0 -endif - -# Call a source code checker (by default, "sparse") as part of the -# C compilation. -# -# Use 'make C=1' to enable checking of only re-compiled files. -# Use 'make C=2' to enable checking of *all* source files, regardless -# of whether they are re-compiled or not. -# -# See the file "Documentation/sparse.txt" for more details, including -# where to get the "sparse" utility. - -ifdef C - ifeq ("$(origin C)", "command line") - KBUILD_CHECKSRC = $(C) - endif -endif -ifndef KBUILD_CHECKSRC - KBUILD_CHECKSRC = 0 -endif - -# Use make M=dir to specify directory of external module to build -# Old syntax make ... SUBDIRS=$PWD is still supported -# Setting the environment variable KBUILD_EXTMOD take precedence -ifdef SUBDIRS - KBUILD_EXTMOD ?= $(SUBDIRS) -endif -ifdef M - ifeq ("$(origin M)", "command line") - KBUILD_EXTMOD := $(M) - endif -endif - - -# kbuild supports saving output files in a separate directory. -# To locate output files in a separate directory two syntaxes are supported. -# In both cases the working directory must be the root of the kernel src. -# 1) O= -# Use "make O=dir/to/store/output/files/" -# -# 2) Set KBUILD_OUTPUT -# Set the environment variable KBUILD_OUTPUT to point to the directory -# where the output files shall be placed. -# export KBUILD_OUTPUT=dir/to/store/output/files/ -# make -# -# The O= assignment takes precedence over the KBUILD_OUTPUT environment -# variable. - - -# KBUILD_SRC is set on invocation of make in OBJ directory -# KBUILD_SRC is not intended to be used by the regular user (for now) -ifeq ($(KBUILD_SRC),) - -# OK, Make called in directory where kernel src resides -# Do we want to locate output files in a separate directory? -ifdef O - ifeq ("$(origin O)", "command line") - KBUILD_OUTPUT := $(O) - endif -endif - -# That's our default target when none is given on the command line -PHONY := _all -_all: - -ifneq ($(KBUILD_OUTPUT),) -# Invoke a second make in the output directory, passing relevant variables -# check that the output directory actually exists -saved-output := $(KBUILD_OUTPUT) -KBUILD_OUTPUT := $(shell cd $(KBUILD_OUTPUT) && /bin/pwd) -$(if $(KBUILD_OUTPUT),, \ - $(error output directory "$(saved-output)" does not exist)) - -PHONY += $(MAKECMDGOALS) - -$(filter-out _all,$(MAKECMDGOALS)) _all: - $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \ - KBUILD_SRC=$(CURDIR) \ - KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $@ - -# Leave processing to above invocation of make -skip-makefile := 1 -endif # ifneq ($(KBUILD_OUTPUT),) -endif # ifeq ($(KBUILD_SRC),) - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -# If building an external module we do not care about the all: rule -# but instead _all depend on modules -PHONY += all -ifeq ($(KBUILD_EXTMOD),) -_all: all -else -_all: modules -endif - -srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR)) -TOPDIR := $(srctree) -# FIXME - TOPDIR is obsolete, use srctree/objtree -objtree := $(CURDIR) -src := $(srctree) -obj := $(objtree) - -VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) - -export srctree objtree VPATH TOPDIR - - -# SUBARCH tells the usermode build what the underlying arch is. That is set -# first, and if a usermode build is happening, the "ARCH=um" on the command -# line overrides the setting of ARCH below. If a native build is happening, -# then ARCH is assigned, getting whatever value it gets normally, and -# SUBARCH is subsequently ignored. - -SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ -e s/sa110/arm/ \ - -e s/s390x/s390/ -e s/parisc64/parisc/ \ - -e s/ppc.*/powerpc/ -e s/mips.*/mips/ ) - -# Cross compiling and selecting different set of gcc/bin-utils -# --------------------------------------------------------------------------- -# -# When performing cross compilation for other architectures ARCH shall be set -# to the target architecture. (See arch/* for the possibilities). -# ARCH can be set during invocation of make: -# make ARCH=ia64 -# Another way is to have ARCH set in the environment. -# The default ARCH is the host where make is executed. - -# CROSS_COMPILE specify the prefix used for all executables used -# during compilation. Only gcc and related bin-utils executables -# are prefixed with $(CROSS_COMPILE). -# CROSS_COMPILE can be set on the command line -# make CROSS_COMPILE=ia64-linux- -# Alternatively CROSS_COMPILE can be set in the environment. -# Default value for CROSS_COMPILE is not to prefix executables -# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile - -ARCH ?= $(SUBARCH) -CROSS_COMPILE ?= - -# Architecture as present in compile.h -UTS_MACHINE := $(ARCH) - -KCONFIG_CONFIG ?= .config - -# SHELL used by kbuild -CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ - else if [ -x /bin/bash ]; then echo /bin/bash; \ - else echo sh; fi ; fi) - -HOSTCC = gcc -HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -HOSTCXXFLAGS = -O2 - -# Decide whether to build built-in, modular, or both. -# Normally, just do built-in. - -KBUILD_MODULES := -KBUILD_BUILTIN := 1 - -# If we have only "make modules", don't compile built-in objects. -# When we're building modules with modversions, we need to consider -# the built-in objects during the descend as well, in order to -# make sure the checksums are up to date before we record them. - -ifeq ($(MAKECMDGOALS),modules) - KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1) -endif - -# If we have "make modules", compile modules -# in addition to whatever we do anyway. -# Just "make" or "make all" shall build modules as well - -ifneq ($(filter all _all modules,$(MAKECMDGOALS)),) - KBUILD_MODULES := 1 -endif - -ifeq ($(MAKECMDGOALS),) - KBUILD_MODULES := 1 -endif - -export KBUILD_MODULES KBUILD_BUILTIN -export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD - -# Beautify output -# --------------------------------------------------------------------------- -# -# Normally, we echo the whole command before executing it. By making -# that echo $($(quiet)$(cmd)), we now have the possibility to set -# $(quiet) to choose other forms of output instead, e.g. -# -# quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@ -# cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -# -# If $(quiet) is empty, the whole command will be printed. -# If it is set to "quiet_", only the short version will be printed. -# If it is set to "silent_", nothing will be printed at all, since -# the variable $(silent_cmd_cc_o_c) doesn't exist. -# -# A simple variant is to prefix commands with $(Q) - that's useful -# for commands that shall be hidden in non-verbose mode. -# -# $(Q)ln $@ :< -# -# If KBUILD_VERBOSE equals 0 then the above command will be hidden. -# If KBUILD_VERBOSE equals 1 then the above command is displayed. - -ifeq ($(KBUILD_VERBOSE),1) - quiet = - Q = -else - quiet=quiet_ - Q = @ -endif - -# If the user is running make -s (silent mode), suppress echoing of -# commands - -ifneq ($(findstring s,$(MAKEFLAGS)),) - quiet=silent_ -endif - -export quiet Q KBUILD_VERBOSE - - -# Look for make include files relative to root of kernel src -MAKEFLAGS += --include-dir=$(srctree) - -# We need some generic definitions. -include $(srctree)/scripts/Kbuild.include - -# Make variables (CC, etc...) - -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc -CPP = $(CC) -E -AR = $(CROSS_COMPILE)ar -NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip -OBJCOPY = $(CROSS_COMPILE)objcopy -OBJDUMP = $(CROSS_COMPILE)objdump -AWK = awk -GENKSYMS = scripts/genksyms/genksyms -DEPMOD = /sbin/depmod -KALLSYMS = scripts/kallsyms -PERL = perl -CHECK = sparse - -CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF) -MODFLAGS = -DMODULE -CFLAGS_MODULE = $(MODFLAGS) -AFLAGS_MODULE = $(MODFLAGS) -LDFLAGS_MODULE = -r -CFLAGS_KERNEL = -AFLAGS_KERNEL = - - -# Use LINUXINCLUDE when you must reference the include/ directory. -# Needed to be compatible with the O= option -LINUXINCLUDE := -Iinclude \ - $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ - -include include/linux/autoconf.h - -CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) - -CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common -AFLAGS := -D__ASSEMBLY__ - -# Read KERNELRELEASE from include/config/kernel.release (if it exists) -KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) -KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - -export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION -export ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE -export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS - -export CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE -export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE - -# When compiling out-of-tree modules, put MODVERDIR in the module -# tree rather than in the kernel tree. The kernel tree might -# even be read-only. -export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions - -# Files to ignore in find ... statements - -RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o -export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exclude CVS --exclude .pc --exclude .hg --exclude .git - -# =========================================================================== -# Rules shared between *config targets and build targets - -# Basic helpers built in scripts/ -PHONY += scripts_basic -scripts_basic: - $(Q)$(MAKE) $(build)=scripts/basic - -# To avoid any implicit rule to kick in, define an empty command. -scripts/basic/%: scripts_basic ; - -PHONY += outputmakefile -# outputmakefile generates a Makefile in the output directory, if using a -# separate output directory. This allows convenient use of make in the -# output directory. -outputmakefile: -ifneq ($(KBUILD_SRC),) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \ - $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL) -endif - -# To make sure we do not include .config for any of the *config targets -# catch them early, and hand them over to scripts/kconfig/Makefile -# It is allowed to specify more targets when calling make, including -# mixing *config targets and build targets. -# For example 'make oldconfig all'. -# Detect when mixed targets is specified, and make a second invocation -# of make so .config is not included in this case either (for *config). - -no-dot-config-targets := clean mrproper distclean \ - cscope TAGS tags help %docs check% \ - include/linux/version.h headers_% \ - kernelrelease kernelversion - -config-targets := 0 -mixed-targets := 0 -dot-config := 1 - -ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) - dot-config := 0 - endif -endif - -ifeq ($(KBUILD_EXTMOD),) - ifneq ($(filter config %config,$(MAKECMDGOALS)),) - config-targets := 1 - ifneq ($(filter-out config %config,$(MAKECMDGOALS)),) - mixed-targets := 1 - endif - endif -endif - -ifeq ($(mixed-targets),1) -# =========================================================================== -# We're called with mixed targets (*config and build targets). -# Handle them one by one. - -%:: FORCE - $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@ - -else -ifeq ($(config-targets),1) -# =========================================================================== -# *config targets only - make sure prerequisites are updated, and descend -# in scripts/kconfig to make the *config target - -# Read arch specific Makefile to set KBUILD_DEFCONFIG as needed. -# KBUILD_DEFCONFIG may point out an alternative default configuration -# used for 'make defconfig' -include $(srctree)/arch/$(ARCH)/Makefile -export KBUILD_DEFCONFIG - -config %config: scripts_basic outputmakefile FORCE - $(Q)mkdir -p include/linux include/config - $(Q)$(MAKE) $(build)=scripts/kconfig $@ - -else -# =========================================================================== -# Build targets only - this includes vmlinux, arch specific targets, clean -# targets and others. In general all targets except *config targets. - -ifeq ($(KBUILD_EXTMOD),) -# Additional helpers built in scripts/ -# Carefully list dependencies so we do not try to build scripts twice -# in parallel -PHONY += scripts -scripts: scripts_basic include/config/auto.conf - $(Q)$(MAKE) $(build)=$(@) - -# Objects we will link into vmlinux / subdirs we need to visit -init-y := init/ -drivers-y := drivers/ sound/ -net-y := net/ -libs-y := lib/ -core-y := usr/ -endif # KBUILD_EXTMOD - -ifeq ($(dot-config),1) -# Read in config --include include/config/auto.conf - -ifeq ($(KBUILD_EXTMOD),) -# Read in dependencies to all Kconfig* files, make sure to run -# oldconfig if changes are detected. --include include/config/auto.conf.cmd - -# To avoid any implicit rule to kick in, define an empty command -$(KCONFIG_CONFIG) include/config/auto.conf.cmd: ; - -# If .config is newer than include/config/auto.conf, someone tinkered -# with it and forgot to run make oldconfig. -# if auto.conf.cmd is missing then we are probably in a cleaned tree so -# we execute the config step to be sure to catch updated Kconfig files -include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd - $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig -else -# external modules needs include/linux/autoconf.h and include/config/auto.conf -# but do not care if they are up-to-date. Use auto.conf to trigger the test -PHONY += include/config/auto.conf - -include/config/auto.conf: - $(Q)test -e include/linux/autoconf.h -a -e $@ || ( \ - echo; \ - echo " ERROR: Kernel configuration is invalid."; \ - echo " include/linux/autoconf.h or $@ are missing."; \ - echo " Run 'make oldconfig && make prepare' on kernel src to fix it."; \ - echo; \ - /bin/false) - -endif # KBUILD_EXTMOD - -else -# Dummy target needed, because used as prerequisite -include/config/auto.conf: ; -endif # $(dot-config) - -# The all: target is the default when no target is given on the -# command line. -# This allow a user to issue only 'make' to build a kernel including modules -# Defaults vmlinux but it is usually overridden in the arch makefile -all: vmlinux - -ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -CFLAGS += -Os -else -CFLAGS += -O2 -endif - -include $(srctree)/arch/$(ARCH)/Makefile - -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) -else -CFLAGS += -fomit-frame-pointer -endif - -ifdef CONFIG_DEBUG_INFO -CFLAGS += -g -endif - -# Force gcc to behave correct even for buggy distributions -CFLAGS += $(call cc-option, -fno-stack-protector) - -# arch Makefile may override CC so keep this after arch Makefile is included -NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) -CHECKFLAGS += $(NOSTDINC_FLAGS) - -# warn about C99 declaration after statement -CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) - -# disable pointer signed / unsigned warnings in gcc 4.0 -CFLAGS += $(call cc-option,-Wno-pointer-sign,) - -# Default kernel image to build when no specific target is given. -# KBUILD_IMAGE may be overruled on the command line or -# set in the environment -# Also any assignments in arch/$(ARCH)/Makefile take precedence over -# this default value -export KBUILD_IMAGE ?= vmlinux - -# -# INSTALL_PATH specifies where to place the updated kernel and system map -# images. Default is /boot, but you can set it to other values -export INSTALL_PATH ?= /boot - -# -# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory -# relocations required by build roots. This is not defined in the -# makefile but the argument can be passed to make if needed. -# - -MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) -export MODLIB - -# -# INSTALL_MOD_STRIP, if defined, will cause modules to be -# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then -# the default option --strip-debug will be used. Otherwise, -# INSTALL_MOD_STRIP will used as the options to the strip command. - -ifdef INSTALL_MOD_STRIP -ifeq ($(INSTALL_MOD_STRIP),1) -mod_strip_cmd = $(STRIP) --strip-debug -else -mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP) -endif # INSTALL_MOD_STRIP=1 -else -mod_strip_cmd = true -endif # INSTALL_MOD_STRIP -export mod_strip_cmd - - -ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ - -vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ - $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ - $(net-y) $(net-m) $(libs-y) $(libs-m))) - -vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ - $(init-n) $(init-) \ - $(core-n) $(core-) $(drivers-n) $(drivers-) \ - $(net-n) $(net-) $(libs-n) $(libs-)))) - -init-y := $(patsubst %/, %/built-in.o, $(init-y)) -core-y := $(patsubst %/, %/built-in.o, $(core-y)) -drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y)) -net-y := $(patsubst %/, %/built-in.o, $(net-y)) -libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) -libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) -libs-y := $(libs-y1) $(libs-y2) - -# Build vmlinux -# --------------------------------------------------------------------------- -# vmlinux is built from the objects selected by $(vmlinux-init) and -# $(vmlinux-main). Most are built-in.o files from top-level directories -# in the kernel tree, others are specified in arch/$(ARCH)/Makefile. -# Ordering when linking is important, and $(vmlinux-init) must be first. -# -# vmlinux -# ^ -# | -# +-< $(vmlinux-init) -# | +--< init/version.o + more -# | -# +--< $(vmlinux-main) -# | +--< driver/built-in.o mm/built-in.o + more -# | -# +-< kallsyms.o (see description in CONFIG_KALLSYMS section) -# -# vmlinux version (uname -v) cannot be updated during normal -# descending-into-subdirs phase since we do not yet know if we need to -# update vmlinux. -# Therefore this step is delayed until just before final link of vmlinux - -# except in the kallsyms case where it is done just before adding the -# symbols to the kernel. -# -# System.map is generated to document addresses of all kernel symbols - -vmlinux-init := $(head-y) $(init-y) -vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y) -vmlinux-all := $(vmlinux-init) $(vmlinux-main) -vmlinux-lds := arch/$(ARCH)/kernel/vmlinux.lds -export KBUILD_VMLINUX_OBJS := $(vmlinux-all) - -# Rule to link vmlinux - also used during CONFIG_KALLSYMS -# May be overridden by arch/$(ARCH)/Makefile -quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ - $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) - -# Generate new vmlinux version -quiet_cmd_vmlinux_version = GEN .version - cmd_vmlinux_version = set -e; \ - if [ ! -r .version ]; then \ - rm -f .version; \ - echo 1 >.version; \ - else \ - mv .version .old_version; \ - expr 0$$(cat .old_version) + 1 >.version; \ - fi; \ - $(MAKE) $(build)=init - -# Generate System.map -quiet_cmd_sysmap = SYSMAP - cmd_sysmap = $(CONFIG_SHELL) $(srctree)/scripts/mksysmap - -# Link of vmlinux -# If CONFIG_KALLSYMS is set .version is already updated -# Generate System.map and verify that the content is consistent -# Use + in front of the vmlinux_version rule to silent warning with make -j2 -# First command is ':' to allow us to use + in front of the rule -define rule_vmlinux__ - : - $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version)) - - $(call cmd,vmlinux__) - $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd - - $(Q)$(if $($(quiet)cmd_sysmap), \ - echo ' $($(quiet)cmd_sysmap) System.map' &&) \ - $(cmd_sysmap) $@ System.map; \ - if [ $$? -ne 0 ]; then \ - rm -f $@; \ - /bin/false; \ - fi; - $(verify_kallsyms) -endef - - -ifdef CONFIG_KALLSYMS -# Generate section listing all symbols and add it into vmlinux $(kallsyms.o) -# It's a three stage process: -# o .tmp_vmlinux1 has all symbols and sections, but __kallsyms is -# empty -# Running kallsyms on that gives us .tmp_kallsyms1.o with -# the right size - vmlinux version (uname -v) is updated during this step -# o .tmp_vmlinux2 now has a __kallsyms section of the right size, -# but due to the added section, some addresses have shifted. -# From here, we generate a correct .tmp_kallsyms2.o -# o The correct .tmp_kallsyms2.o is linked into the final vmlinux. -# o Verify that the System.map from vmlinux matches the map from -# .tmp_vmlinux2, just in case we did not generate kallsyms correctly. -# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using -# .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a -# temporary bypass to allow the kernel to be built while the -# maintainers work out what went wrong with kallsyms. - -ifdef CONFIG_KALLSYMS_EXTRA_PASS -last_kallsyms := 3 -else -last_kallsyms := 2 -endif - -kallsyms.o := .tmp_kallsyms$(last_kallsyms).o - -define verify_kallsyms - $(Q)$(if $($(quiet)cmd_sysmap), \ - echo ' $($(quiet)cmd_sysmap) .tmp_System.map' &&) \ - $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map - $(Q)cmp -s System.map .tmp_System.map || \ - (echo Inconsistent kallsyms data; \ - echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \ - rm .tmp_kallsyms* ; /bin/false ) -endef - -# Update vmlinux version before link -# Use + in front of this rule to silent warning about make -j1 -# First command is ':' to allow us to use + in front of this rule -cmd_ksym_ld = $(cmd_vmlinux__) -define rule_ksym_ld - : - +$(call cmd,vmlinux_version) - $(call cmd,vmlinux__) - $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd -endef - -# Generate .S file with all kernel symbols -quiet_cmd_kallsyms = KSYM $@ - cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \ - $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@ - -.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE - $(call if_changed_dep,as_o_S) - -.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS) - $(call cmd,kallsyms) - -# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version -.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE - $(call if_changed_rule,ksym_ld) - -.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE - $(call if_changed,vmlinux__) - -.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE - $(call if_changed,vmlinux__) - -# Needs to visit scripts/ before $(KALLSYMS) can be used. -$(KALLSYMS): scripts ; - -# Generate some data for debugging strange kallsyms problems -debug_kallsyms: .tmp_map$(last_kallsyms) - -.tmp_map%: .tmp_vmlinux% FORCE - ($(OBJDUMP) -h $< | $(AWK) '/^ +[0-9]/{print $$4 " 0 " $$2}'; $(NM) $<) | sort > $@ - -.tmp_map3: .tmp_map2 - -.tmp_map2: .tmp_map1 - -endif # ifdef CONFIG_KALLSYMS - -# vmlinux image - including updated kernel symbols -vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE -ifdef CONFIG_HEADERS_CHECK - $(Q)$(MAKE) -f $(srctree)/Makefile headers_check -endif - $(call if_changed_rule,vmlinux__) - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ - $(Q)rm -f .old_version - -# The actual objects are generated when descending, -# make sure no implicit rule kicks in -$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; - -# Handle descending into subdirectories listed in $(vmlinux-dirs) -# Preset locale variables to speed up the build process. Limit locale -# tweaks to this spot to avoid wrong language settings when running -# make menuconfig etc. -# Error messages still appears in the original language - -PHONY += $(vmlinux-dirs) -$(vmlinux-dirs): prepare scripts - $(Q)$(MAKE) $(build)=$@ - -# Build the kernel release string -# -# The KERNELRELEASE value built here is stored in the file -# include/config/kernel.release, and is used when executing several -# make targets, such as "make install" or "make modules_install." -# -# The eventual kernel release string consists of the following fields, -# shown in a hierarchical format to show how smaller parts are concatenated -# to form the larger and final value, with values coming from places like -# the Makefile, kernel config options, make command line options and/or -# SCM tag information. -# -# $(KERNELVERSION) -# $(VERSION) eg, 2 -# $(PATCHLEVEL) eg, 6 -# $(SUBLEVEL) eg, 18 -# $(EXTRAVERSION) eg, -rc6 -# $(localver-full) -# $(localver) -# localversion* (files without backups, containing '~') -# $(CONFIG_LOCALVERSION) (from kernel config setting) -# $(localver-auto) (only if CONFIG_LOCALVERSION_AUTO is set) -# ./scripts/setlocalversion (SCM tag, if one exists) -# $(LOCALVERSION) (from make command line if provided) -# -# Note how the final $(localver-auto) string is included *only* if the -# kernel config option CONFIG_LOCALVERSION_AUTO is selected. Also, at the -# moment, only git is supported but other SCMs can edit the script -# scripts/setlocalversion and add the appropriate checks as needed. - -pattern = ".*/localversion[^~]*" -string = $(shell cat /dev/null \ - `find $(objtree) $(srctree) -maxdepth 1 -regex $(pattern) | sort -u`) - -localver = $(subst $(space),, $(string) \ - $(patsubst "%",%,$(CONFIG_LOCALVERSION))) - -# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called -# and if the SCM is know a tag from the SCM is appended. -# The appended tag is determined by the SCM used. -# -# Currently, only git is supported. -# Other SCMs can edit scripts/setlocalversion and add the appropriate -# checks as needed. -ifdef CONFIG_LOCALVERSION_AUTO - _localver-auto = $(shell $(CONFIG_SHELL) \ - $(srctree)/scripts/setlocalversion $(srctree)) - localver-auto = $(LOCALVERSION)$(_localver-auto) -endif - -localver-full = $(localver)$(localver-auto) - -# Store (new) KERNELRELASE string in include/config/kernel.release -kernelrelease = $(KERNELVERSION)$(localver-full) -include/config/kernel.release: include/config/auto.conf FORCE - $(Q)rm -f $@ - $(Q)echo $(kernelrelease) > $@ - - -# Things we need to do before we recursively start building the kernel -# or the modules are listed in "prepare". -# A multi level approach is used. prepareN is processed before prepareN-1. -# archprepare is used in arch Makefiles and when processed asm symlink, -# version.h and scripts_basic is processed / created. - -# Listed in dependency order -PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3 - -# prepare3 is used to check if we are building in a separate output directory, -# and if so do: -# 1) Check that make has not been executed in the kernel src $(srctree) -# 2) Create the include2 directory, used for the second asm symlink -prepare3: include/config/kernel.release -ifneq ($(KBUILD_SRC),) - @echo ' Using $(srctree) as source for kernel' - $(Q)if [ -f $(srctree)/.config -o -d $(srctree)/include/config ]; then \ - echo " $(srctree) is not clean, please run 'make mrproper'";\ - echo " in the '$(srctree)' directory.";\ - /bin/false; \ - fi; - $(Q)if [ ! -d include2 ]; then mkdir -p include2; fi; - $(Q)ln -fsn $(srctree)/include/asm-$(ARCH) include2/asm -endif - -# prepare2 creates a makefile if using a separate output directory -prepare2: prepare3 outputmakefile - -prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \ - include/asm include/config/auto.conf -ifneq ($(KBUILD_MODULES),) - $(Q)mkdir -p $(MODVERDIR) - $(Q)rm -f $(MODVERDIR)/* -endif - -archprepare: prepare1 scripts_basic - -prepare0: archprepare FORCE - $(Q)$(MAKE) $(build)=. - $(Q)$(MAKE) $(build)=. missing-syscalls - -# All the preparing.. -prepare: prepare0 - -# Leave this as default for preprocessing vmlinux.lds.S, which is now -# done in arch/$(ARCH)/kernel/Makefile - -export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) - -# FIXME: The asm symlink changes when $(ARCH) changes. That's -# hard to detect, but I suppose "make mrproper" is a good idea -# before switching between archs anyway. - -include/asm: - @echo ' SYMLINK $@ -> include/asm-$(ARCH)' - $(Q)if [ ! -d include ]; then mkdir -p include; fi; - @ln -fsn asm-$(ARCH) $@ - -# Generate some files -# --------------------------------------------------------------------------- - -# KERNELRELEASE can change from a few different places, meaning version.h -# needs to be updated, so this check is forced on all builds - -uts_len := 64 -define filechk_utsrelease.h - if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \ - echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \ - exit 1; \ - fi; \ - (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";) -endef - -define filechk_version.h - (echo \#define LINUX_VERSION_CODE $(shell \ - expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \ - echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';) -endef - -include/linux/version.h: $(srctree)/Makefile FORCE - $(call filechk,version.h) - -include/linux/utsrelease.h: include/config/kernel.release FORCE - $(call filechk,utsrelease.h) - -# --------------------------------------------------------------------------- - -PHONY += depend dep -depend dep: - @echo '*** Warning: make $@ is unnecessary now.' - -# --------------------------------------------------------------------------- -# Kernel headers -INSTALL_HDR_PATH=$(objtree)/usr -export INSTALL_HDR_PATH - -HDRARCHES=$(filter-out generic,$(patsubst $(srctree)/include/asm-%/Kbuild,%,$(wildcard $(srctree)/include/asm-*/Kbuild))) - -PHONY += headers_install_all -headers_install_all: include/linux/version.h scripts_basic FORCE - $(Q)$(MAKE) $(build)=scripts scripts/unifdef - $(Q)for arch in $(HDRARCHES); do \ - $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch ;\ - done - -PHONY += headers_install -headers_install: include/linux/version.h scripts_basic FORCE - @if [ ! -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ - echo '*** Error: Headers not exportable for this architecture ($(ARCH))'; \ - exit 1 ; fi - $(Q)$(MAKE) $(build)=scripts scripts/unifdef - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include - -PHONY += headers_check_all -headers_check_all: headers_install_all - $(Q)for arch in $(HDRARCHES); do \ - $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch HDRCHECK=1 ;\ - done - -PHONY += headers_check -headers_check: headers_install - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include HDRCHECK=1 - -# --------------------------------------------------------------------------- -# Modules - -ifdef CONFIG_MODULES - -# By default, build modules as well - -all: modules - -# Build modules - -PHONY += modules -modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) - @echo ' Building modules, stage 2.'; - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost - - -# Target to prepare building external modules -PHONY += modules_prepare -modules_prepare: prepare scripts - -# Target to install modules -PHONY += modules_install -modules_install: _modinst_ _modinst_post - -PHONY += _modinst_ -_modinst_: - @if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \ - echo "Warning: you may need to install module-init-tools"; \ - echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\ - sleep 1; \ - fi - @rm -rf $(MODLIB)/kernel - @rm -f $(MODLIB)/source - @mkdir -p $(MODLIB)/kernel - @ln -s $(srctree) $(MODLIB)/source - @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ - rm -f $(MODLIB)/build ; \ - ln -s $(objtree) $(MODLIB)/build ; \ - fi - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst - -# If System.map exists, run depmod. This deliberately does not have a -# dependency on System.map since that would run the dependency tree on -# vmlinux. This depmod is only for convenience to give the initial -# boot a modules.dep even before / is mounted read-write. However the -# boot script depmod is the master version. -ifeq "$(strip $(INSTALL_MOD_PATH))" "" -depmod_opts := -else -depmod_opts := -b $(INSTALL_MOD_PATH) -r -endif -PHONY += _modinst_post -_modinst_post: _modinst_ - if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi - -else # CONFIG_MODULES - -# Modules not configured -# --------------------------------------------------------------------------- - -modules modules_install: FORCE - @echo - @echo "The present kernel configuration has modules disabled." - @echo "Type 'make config' and enable loadable module support." - @echo "Then build a kernel with module support enabled." - @echo - @exit 1 - -endif # CONFIG_MODULES - -### -# Cleaning is done on three levels. -# make clean Delete most generated files -# Leave enough to build external modules -# make mrproper Delete the current configuration, and all generated files -# make distclean Remove editor backup files, patch leftover files and the like - -# Directories & files removed with 'make clean' -CLEAN_DIRS += $(MODVERDIR) -CLEAN_FILES += vmlinux System.map \ - .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map - -# Directories & files removed with 'make mrproper' -MRPROPER_DIRS += include/config include2 usr/include -MRPROPER_FILES += .config .config.old include/asm .version .old_version \ - include/linux/autoconf.h include/linux/version.h \ - include/linux/utsrelease.h \ - Module.symvers tags TAGS cscope* - -# clean - Delete most, but leave enough to build external modules -# -clean: rm-dirs := $(CLEAN_DIRS) -clean: rm-files := $(CLEAN_FILES) -clean-dirs := $(addprefix _clean_,$(srctree) $(vmlinux-alldirs)) - -PHONY += $(clean-dirs) clean archclean -$(clean-dirs): - $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) - -clean: archclean $(clean-dirs) - $(call cmd,rmdirs) - $(call cmd,rmfiles) - @find . $(RCS_FIND_IGNORE) \ - \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ - -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ - -o -name '*.symtypes' \) \ - -type f -print | xargs rm -f - -# mrproper - Delete all generated files, including .config -# -mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS)) -mrproper: rm-files := $(wildcard $(MRPROPER_FILES)) -mrproper-dirs := $(addprefix _mrproper_,Documentation/DocBook scripts) - -PHONY += $(mrproper-dirs) mrproper archmrproper -$(mrproper-dirs): - $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@) - -mrproper: clean archmrproper $(mrproper-dirs) - $(call cmd,rmdirs) - $(call cmd,rmfiles) - -# distclean -# -PHONY += distclean - -distclean: mrproper - @find $(srctree) $(RCS_FIND_IGNORE) \ - \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ - -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ - -o -name '.*.rej' -o -size 0 \ - -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \ - -type f -print | xargs rm -f - - -# Packaging of the kernel to various formats -# --------------------------------------------------------------------------- -# rpm target kept for backward compatibility -package-dir := $(srctree)/scripts/package - -%pkg: include/config/kernel.release FORCE - $(Q)$(MAKE) $(build)=$(package-dir) $@ -rpm: include/config/kernel.release FORCE - $(Q)$(MAKE) $(build)=$(package-dir) $@ - - -# Brief documentation of the typical targets used -# --------------------------------------------------------------------------- - -boards := $(wildcard $(srctree)/arch/$(ARCH)/configs/*_defconfig) -boards := $(notdir $(boards)) - -help: - @echo 'Cleaning targets:' - @echo ' clean - Remove most generated files but keep the config and' - @echo ' enough build support to build external modules' - @echo ' mrproper - Remove all generated files + config + various backup files' - @echo ' distclean - mrproper + remove editor backup and patch files' - @echo '' - @echo 'Configuration targets:' - @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help - @echo '' - @echo 'Other generic targets:' - @echo ' all - Build all targets marked with [*]' - @echo '* vmlinux - Build the bare kernel' - @echo '* modules - Build all modules' - @echo ' modules_install - Install all modules to INSTALL_MOD_PATH (default: /)' - @echo ' dir/ - Build all files in dir and below' - @echo ' dir/file.[ois] - Build specified target only' - @echo ' dir/file.ko - Build module including final link' - @echo ' rpm - Build a kernel as an RPM package' - @echo ' tags/TAGS - Generate tags file for editors' - @echo ' cscope - Generate cscope index' - @echo ' kernelrelease - Output the release version string' - @echo ' kernelversion - Output the version stored in Makefile' - @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ - echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ - echo ' (default: $(INSTALL_HDR_PATH))'; \ - fi - @echo '' - @echo 'Static analysers' - @echo ' checkstack - Generate a list of stack hogs' - @echo ' namespacecheck - Name space analysis on compiled kernel' - @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ - echo ' headers_check - Sanity check on exported headers'; \ - fi - @echo '' - @echo 'Kernel packaging:' - @$(MAKE) $(build)=$(package-dir) help - @echo '' - @echo 'Documentation targets:' - @$(MAKE) -f $(srctree)/Documentation/DocBook/Makefile dochelp - @echo '' - @echo 'Architecture specific targets ($(ARCH)):' - @$(if $(archhelp),$(archhelp),\ - echo ' No architecture specific help defined for $(ARCH)') - @echo '' - @$(if $(boards), \ - $(foreach b, $(boards), \ - printf " %-24s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \ - echo '') - - @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build' - @echo ' make V=2 [targets] 2 => give reason for rebuild of target' - @echo ' make O=dir [targets] Locate all output files in "dir", including .config' - @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' - @echo ' make C=2 [targets] Force check of all c source with $$CHECK' - @echo '' - @echo 'Execute "make" or "make all" to build all targets marked with [*] ' - @echo 'For further info see the ./README file' - - -# Documentation targets -# --------------------------------------------------------------------------- -%docs: scripts_basic FORCE - $(Q)$(MAKE) $(build)=Documentation/DocBook $@ - -else # KBUILD_EXTMOD - -### -# External module support. -# When building external modules the kernel used as basis is considered -# read-only, and no consistency checks are made and the make -# system is not used on the basis kernel. If updates are required -# in the basis kernel ordinary make commands (without M=...) must -# be used. -# -# The following are the only valid targets when building external -# modules. -# make M=dir clean Delete all automatically generated files -# make M=dir modules Make all modules in specified dir -# make M=dir Same as 'make M=dir modules' -# make M=dir modules_install -# Install the modules built in the module directory -# Assumes install directory is already created - -# We are always building modules -KBUILD_MODULES := 1 -PHONY += crmodverdir -crmodverdir: - $(Q)mkdir -p $(MODVERDIR) - $(Q)rm -f $(MODVERDIR)/* - -PHONY += $(objtree)/Module.symvers -$(objtree)/Module.symvers: - @test -e $(objtree)/Module.symvers || ( \ - echo; \ - echo " WARNING: Symbol version dump $(objtree)/Module.symvers"; \ - echo " is missing; modules will have no dependencies and modversions."; \ - echo ) - -module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD)) -PHONY += $(module-dirs) modules -$(module-dirs): crmodverdir $(objtree)/Module.symvers - $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) - -modules: $(module-dirs) - @echo ' Building modules, stage 2.'; - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost - -PHONY += modules_install -modules_install: _emodinst_ _emodinst_post - -install-dir := $(if $(INSTALL_MOD_DIR),$(INSTALL_MOD_DIR),extra) -PHONY += _emodinst_ -_emodinst_: - $(Q)mkdir -p $(MODLIB)/$(install-dir) - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst - -# Run depmod only is we have System.map and depmod is executable -quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) - cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \ - $(DEPMOD) -ae -F System.map \ - $(if $(strip $(INSTALL_MOD_PATH)), \ - -b $(INSTALL_MOD_PATH) -r) \ - $(KERNELRELEASE); \ - fi - -PHONY += _emodinst_post -_emodinst_post: _emodinst_ - $(call cmd,depmod) - -clean-dirs := $(addprefix _clean_,$(KBUILD_EXTMOD)) - -PHONY += $(clean-dirs) clean -$(clean-dirs): - $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) - -clean: rm-dirs := $(MODVERDIR) -clean: $(clean-dirs) - $(call cmd,rmdirs) - @find $(KBUILD_EXTMOD) $(RCS_FIND_IGNORE) \ - \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ - -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \ - -type f -print | xargs rm -f - -help: - @echo ' Building external modules.' - @echo ' Syntax: make -C path/to/kernel/src M=$$PWD target' - @echo '' - @echo ' modules - default target, build the module(s)' - @echo ' modules_install - install the module' - @echo ' clean - remove generated files in module directory only' - @echo '' - -# Dummies... -PHONY += prepare scripts -prepare: ; -scripts: ; -endif # KBUILD_EXTMOD - -# Generate tags for editors -# --------------------------------------------------------------------------- - -#We want __srctree to totally vanish out when KBUILD_OUTPUT is not set -#(which is the most common case IMHO) to avoid unneeded clutter in the big tags file. -#Adding $(srctree) adds about 20M on i386 to the size of the output file! - -ifeq ($(src),$(obj)) -__srctree = -else -__srctree = $(srctree)/ -endif - -ifeq ($(ALLSOURCE_ARCHS),) -ifeq ($(ARCH),um) -ALLINCLUDE_ARCHS := $(ARCH) $(SUBARCH) -else -ALLINCLUDE_ARCHS := $(ARCH) -endif -else -#Allow user to specify only ALLSOURCE_PATHS on the command line, keeping existing behavour. -ALLINCLUDE_ARCHS := $(ALLSOURCE_ARCHS) -endif - -ALLSOURCE_ARCHS := $(ARCH) - -define find-sources - ( for ARCH in $(ALLSOURCE_ARCHS) ; do \ - find $(__srctree)arch/$${ARCH} $(RCS_FIND_IGNORE) \ - -name $1 -print; \ - done ; \ - find $(__srctree)security/selinux/include $(RCS_FIND_IGNORE) \ - -name $1 -print; \ - find $(__srctree)include $(RCS_FIND_IGNORE) \ - \( -name config -o -name 'asm-*' \) -prune \ - -o -name $1 -print; \ - for ARCH in $(ALLINCLUDE_ARCHS) ; do \ - find $(__srctree)include/asm-$${ARCH} $(RCS_FIND_IGNORE) \ - -name $1 -print; \ - done ; \ - find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \ - -name $1 -print; \ - find $(__srctree) $(RCS_FIND_IGNORE) \ - \( -name include -o -name arch \) -prune -o \ - -name $1 -print; \ - ) -endef - -define all-sources - $(call find-sources,'*.[chS]') -endef -define all-kconfigs - $(call find-sources,'Kconfig*') -endef -define all-defconfigs - $(call find-sources,'defconfig') -endef - -define xtags - if $1 --version 2>&1 | grep -iq exuberant; then \ - $(all-sources) | xargs $1 -a \ - -I __initdata,__exitdata,__acquires,__releases \ - -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \ - --extra=+f --c-kinds=+px \ - --regex-asm='/ENTRY\(([^)]*)\).*/\1/'; \ - $(all-kconfigs) | xargs $1 -a \ - --langdef=kconfig \ - --language-force=kconfig \ - --regex-kconfig='/^[[:blank:]]*config[[:blank:]]+([[:alnum:]_]+)/\1/'; \ - $(all-defconfigs) | xargs -r $1 -a \ - --langdef=dotconfig \ - --language-force=dotconfig \ - --regex-dotconfig='/^#?[[:blank:]]*(CONFIG_[[:alnum:]_]+)/\1/'; \ - elif $1 --version 2>&1 | grep -iq emacs; then \ - $(all-sources) | xargs $1 -a; \ - $(all-kconfigs) | xargs $1 -a \ - --regex='/^[ \t]*config[ \t]+\([a-zA-Z0-9_]+\)/\1/'; \ - $(all-defconfigs) | xargs -r $1 -a \ - --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'; \ - else \ - $(all-sources) | xargs $1 -a; \ - fi -endef - -quiet_cmd_cscope-file = FILELST cscope.files - cmd_cscope-file = (echo \-k; echo \-q; $(all-sources)) > cscope.files - -quiet_cmd_cscope = MAKE cscope.out - cmd_cscope = cscope -b - -cscope: FORCE - $(call cmd,cscope-file) - $(call cmd,cscope) - -quiet_cmd_TAGS = MAKE $@ -define cmd_TAGS - rm -f $@; \ - $(call xtags,etags) -endef - -TAGS: FORCE - $(call cmd,TAGS) - -quiet_cmd_tags = MAKE $@ -define cmd_tags - rm -f $@; \ - $(call xtags,ctags) -endef - -tags: FORCE - $(call cmd,tags) - - -# Scripts to check various things for consistency -# --------------------------------------------------------------------------- - -includecheck: - find * $(RCS_FIND_IGNORE) \ - -name '*.[hcS]' -type f -print | sort \ - | xargs $(PERL) -w scripts/checkincludes.pl - -versioncheck: - find * $(RCS_FIND_IGNORE) \ - -name '*.[hcS]' -type f -print | sort \ - | xargs $(PERL) -w scripts/checkversion.pl - -namespacecheck: - $(PERL) $(srctree)/scripts/namespace.pl - -endif #ifeq ($(config-targets),1) -endif #ifeq ($(mixed-targets),1) - -PHONY += checkstack kernelrelease kernelversion - -# UML needs a little special treatment here. It wants to use the host -# toolchain, so needs $(SUBARCH) passed to checkstack.pl. Everyone -# else wants $(ARCH), including people doing cross-builds, which means -# that $(SUBARCH) doesn't work here. -ifeq ($(ARCH), um) -CHECKSTACK_ARCH := $(SUBARCH) -else -CHECKSTACK_ARCH := $(ARCH) -endif -checkstack: - $(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \ - $(PERL) $(src)/scripts/checkstack.pl $(CHECKSTACK_ARCH) - -kernelrelease: - $(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \ - $(error kernelrelease not valid - run 'make prepare' to update it)) -kernelversion: - @echo $(KERNELVERSION) - -# Single targets -# --------------------------------------------------------------------------- -# Single targets are compatible with: -# - build whith mixed source and output -# - build with separate output dir 'make O=...' -# - external modules -# -# target-dir => where to store outputfile -# build-dir => directory in kernel source tree to use - -ifeq ($(KBUILD_EXTMOD),) - build-dir = $(patsubst %/,%,$(dir $@)) - target-dir = $(dir $@) -else - zap-slash=$(filter-out .,$(patsubst %/,%,$(dir $@))) - build-dir = $(KBUILD_EXTMOD)$(if $(zap-slash),/$(zap-slash)) - target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@)) -endif - -%.s: %.c prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.i: %.c prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.o: %.c prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.lst: %.c prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.s: %.S prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.o: %.S prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.symtypes: %.c prepare scripts FORCE - $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) - -# Modules -/ %/: prepare scripts FORCE - $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ - $(build)=$(build-dir) -%.ko: prepare scripts FORCE - $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ - $(build)=$(build-dir) $(@:.ko=.o) - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost - -# FIXME Should go into a make.lib or something -# =========================================================================== - -quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs))) - cmd_rmdirs = rm -rf $(rm-dirs) - -quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files))) - cmd_rmfiles = rm -f $(rm-files) - - -a_flags = -Wp,-MD,$(depfile) $(AFLAGS) $(AFLAGS_KERNEL) \ - $(NOSTDINC_FLAGS) $(CPPFLAGS) \ - $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(basetarget).o) - -quiet_cmd_as_o_S = AS $@ -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< - -# read all saved command lines - -targets := $(wildcard $(sort $(targets))) -cmd_files := $(wildcard .*.cmd $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) - -ifneq ($(cmd_files),) - $(cmd_files): ; # Do not try to update included dependency files - include $(cmd_files) -endif - -# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir -# Usage: -# $(Q)$(MAKE) $(clean)=dir -clean := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.clean obj - -endif # skip-makefile - -PHONY += FORCE -FORCE: - -# Cancel implicit rules on top Makefile, `-rR' will apply to sub-makes. -Makefile: ; - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. -.PHONY: $(PHONY) diff -Nurb linux-2.6.22-570/arch/arm/Kconfig linux-2.6.22-591/arch/arm/Kconfig --- linux-2.6.22-570/arch/arm/Kconfig 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/arm/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -1034,6 +1034,8 @@ source "drivers/rtc/Kconfig" +source "drivers/dma/Kconfig" + endmenu source "fs/Kconfig" diff -Nurb linux-2.6.22-570/arch/arm/boot/.gitignore.rej linux-2.6.22-591/arch/arm/boot/.gitignore.rej --- linux-2.6.22-570/arch/arm/boot/.gitignore.rej 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/arm/boot/.gitignore.rej 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,10 @@ +*************** +*** 1,2 **** + Image + zImage +--- 1,5 ---- + Image + zImage ++ xipImage ++ bootpImage ++ uImage diff -Nurb linux-2.6.22-570/arch/arm/kernel/Makefile linux-2.6.22-591/arch/arm/kernel/Makefile --- linux-2.6.22-570/arch/arm/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o +obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S linux-2.6.22-591/arch/arm/kernel/kgdb-jmp.S --- linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/arm/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,32 @@ +/* + * arch/arm/kernel/kgdb-jmp.S + * + * Trivial setjmp and longjmp procedures to support bus error recovery + * which may occur during kgdb memory read/write operations. + * + * Author: MontaVista Software, Inc. + * source@mvista.com + * + * 2002-2005 (c) MontaVista Software, Inc. This file is licensed under the + * terms of the GNU General Public License version 2. This program as licensed + * "as is" without any warranty of any kind, whether express or implied. + */ +#include + +ENTRY (kgdb_fault_setjmp) + /* Save registers */ + stmia r0, {r0-r14} + str lr,[r0, #60] + mrs r1,cpsr + str r1,[r0,#64] + ldr r1,[r0,#4] + mov r0, #0 + mov pc,lr + +ENTRY (kgdb_fault_longjmp) + /* Restore registers */ + mov r1,#1 + str r1,[r0] + ldr r1,[r0, #64] + msr spsr,r1 + ldmia r0,{r0-pc}^ diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb.c linux-2.6.22-591/arch/arm/kernel/kgdb.c --- linux-2.6.22-570/arch/arm/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/arm/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,202 @@ +/* + * arch/arm/kernel/kgdb.c + * + * ARM KGDB support + * + * Copyright (c) 2002-2004 MontaVista Software, Inc + * + * Authors: George Davis + * Deepak Saxena + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Make a local copy of the registers passed into the handler (bletch) */ +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs) +{ + int regno; + + /* Initialize all to zero (??) */ + for (regno = 0; regno < GDB_MAX_REGS; regno++) + gdb_regs[regno] = 0; + + gdb_regs[_R0] = kernel_regs->ARM_r0; + gdb_regs[_R1] = kernel_regs->ARM_r1; + gdb_regs[_R2] = kernel_regs->ARM_r2; + gdb_regs[_R3] = kernel_regs->ARM_r3; + gdb_regs[_R4] = kernel_regs->ARM_r4; + gdb_regs[_R5] = kernel_regs->ARM_r5; + gdb_regs[_R6] = kernel_regs->ARM_r6; + gdb_regs[_R7] = kernel_regs->ARM_r7; + gdb_regs[_R8] = kernel_regs->ARM_r8; + gdb_regs[_R9] = kernel_regs->ARM_r9; + gdb_regs[_R10] = kernel_regs->ARM_r10; + gdb_regs[_FP] = kernel_regs->ARM_fp; + gdb_regs[_IP] = kernel_regs->ARM_ip; + gdb_regs[_SP] = kernel_regs->ARM_sp; + gdb_regs[_LR] = kernel_regs->ARM_lr; + gdb_regs[_PC] = kernel_regs->ARM_pc; + gdb_regs[_CPSR] = kernel_regs->ARM_cpsr; +} + +/* Copy local gdb registers back to kgdb regs, for later copy to kernel */ +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs) +{ + kernel_regs->ARM_r0 = gdb_regs[_R0]; + kernel_regs->ARM_r1 = gdb_regs[_R1]; + kernel_regs->ARM_r2 = gdb_regs[_R2]; + kernel_regs->ARM_r3 = gdb_regs[_R3]; + kernel_regs->ARM_r4 = gdb_regs[_R4]; + kernel_regs->ARM_r5 = gdb_regs[_R5]; + kernel_regs->ARM_r6 = gdb_regs[_R6]; + kernel_regs->ARM_r7 = gdb_regs[_R7]; + kernel_regs->ARM_r8 = gdb_regs[_R8]; + kernel_regs->ARM_r9 = gdb_regs[_R9]; + kernel_regs->ARM_r10 = gdb_regs[_R10]; + kernel_regs->ARM_fp = gdb_regs[_FP]; + kernel_regs->ARM_ip = gdb_regs[_IP]; + kernel_regs->ARM_sp = gdb_regs[_SP]; + kernel_regs->ARM_lr = gdb_regs[_LR]; + kernel_regs->ARM_pc = gdb_regs[_PC]; + kernel_regs->ARM_cpsr = gdb_regs[_CPSR]; +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, + struct task_struct *task) +{ + int regno; + struct pt_regs *thread_regs; + + /* Just making sure... */ + if (task == NULL) + return; + + /* Initialize to zero */ + for (regno = 0; regno < GDB_MAX_REGS; regno++) + gdb_regs[regno] = 0; + + /* Otherwise, we have only some registers from switch_to() */ + thread_regs = task_pt_regs(task); + gdb_regs[_R0] = thread_regs->ARM_r0; /* Not really valid? */ + gdb_regs[_R1] = thread_regs->ARM_r1; /* " " */ + gdb_regs[_R2] = thread_regs->ARM_r2; /* " " */ + gdb_regs[_R3] = thread_regs->ARM_r3; /* " " */ + gdb_regs[_R4] = thread_regs->ARM_r4; + gdb_regs[_R5] = thread_regs->ARM_r5; + gdb_regs[_R6] = thread_regs->ARM_r6; + gdb_regs[_R7] = thread_regs->ARM_r7; + gdb_regs[_R8] = thread_regs->ARM_r8; + gdb_regs[_R9] = thread_regs->ARM_r9; + gdb_regs[_R10] = thread_regs->ARM_r10; + gdb_regs[_FP] = thread_regs->ARM_fp; + gdb_regs[_IP] = thread_regs->ARM_ip; + gdb_regs[_SP] = thread_regs->ARM_sp; + gdb_regs[_LR] = thread_regs->ARM_lr; + gdb_regs[_PC] = thread_regs->ARM_pc; + gdb_regs[_CPSR] = thread_regs->ARM_cpsr; +} + +static int compiled_break; + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + long addr; + char *ptr; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + case 'c': + kgdb_contthread = NULL; + + /* + * Try to read optional parameter, pc unchanged if no parm. + * If this was a compiled breakpoint, we need to move + * to the next instruction or we will just breakpoint + * over and over again. + */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) { + linux_regs->ARM_pc = addr; + } else if (compiled_break == 1) { + linux_regs->ARM_pc += 4; + } + + compiled_break = 0; + + return 0; + } + + return -1; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static struct undef_hook kgdb_brkpt_hook = { + .instr_mask = 0xffffffff, + .instr_val = KGDB_BREAKINST, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_hook = { + .instr_mask = 0xffffffff, + .instr_val = KGDB_COMPILED_BREAK, + .fn = kgdb_compiled_brk_fn +}; + +/* + * Register our undef instruction hooks with ARM undef core. + * We regsiter a hook specifically looking for the KGB break inst + * and we handle the normal undef case within the do_undefinstr + * handler. + */ +int kgdb_arch_init(void) +{ + register_undef_hook(&kgdb_brkpt_hook); + register_undef_hook(&kgdb_compiled_brkpt_hook); + + return 0; +} + +struct kgdb_arch arch_kgdb_ops = { +#ifndef __ARMEB__ + .gdb_bpt_instr = {0xfe, 0xde, 0xff, 0xe7} +#else + .gdb_bpt_instr = {0xe7, 0xff, 0xde, 0xfe} +#endif +}; diff -Nurb linux-2.6.22-570/arch/arm/kernel/setup.c linux-2.6.22-591/arch/arm/kernel/setup.c --- linux-2.6.22-570/arch/arm/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -832,6 +832,11 @@ conswitchp = &dummy_con; #endif #endif + +#if defined(CONFIG_KGDB) + extern void __init early_trap_init(void); + early_trap_init(); +#endif } diff -Nurb linux-2.6.22-570/arch/arm/kernel/traps.c linux-2.6.22-591/arch/arm/kernel/traps.c --- linux-2.6.22-570/arch/arm/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/arm/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 @@ -301,6 +301,7 @@ unsigned int instr; struct undef_hook *hook; siginfo_t info; + mm_segment_t fs; void __user *pc; unsigned long flags; @@ -311,6 +312,8 @@ */ regs->ARM_pc -= correction; + fs = get_fs(); + set_fs(KERNEL_DS); pc = (void __user *)instruction_pointer(regs); if (processor_mode(regs) == SVC_MODE) { @@ -320,6 +323,7 @@ } else { get_user(instr, (u32 __user *)pc); } + set_fs(fs); spin_lock_irqsave(&undef_lock, flags); list_for_each_entry(hook, &undef_hook, node) { @@ -707,6 +711,13 @@ void __init trap_init(void) { +#if defined(CONFIG_KGDB) + return; +} + +void __init early_trap_init(void) +{ +#endif unsigned long vectors = CONFIG_VECTORS_BASE; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; diff -Nurb linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c linux-2.6.22-591/arch/arm/mach-iop13xx/setup.c --- linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop13xx/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -25,6 +25,7 @@ #include #include #include +#include #define IOP13XX_UART_XTAL 33334000 #define IOP13XX_SETUP_DEBUG 0 @@ -236,19 +237,143 @@ } #endif +/* ADMA Channels */ +static struct resource iop13xx_adma_0_resources[] = { + [0] = { + .start = IOP13XX_ADMA_PHYS_BASE(0), + .end = IOP13XX_ADMA_UPPER_PA(0), + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_IOP13XX_ADMA0_EOT, + .end = IRQ_IOP13XX_ADMA0_EOT, + .flags = IORESOURCE_IRQ + }, + [2] = { + .start = IRQ_IOP13XX_ADMA0_EOC, + .end = IRQ_IOP13XX_ADMA0_EOC, + .flags = IORESOURCE_IRQ + }, + [3] = { + .start = IRQ_IOP13XX_ADMA0_ERR, + .end = IRQ_IOP13XX_ADMA0_ERR, + .flags = IORESOURCE_IRQ + } +}; + +static struct resource iop13xx_adma_1_resources[] = { + [0] = { + .start = IOP13XX_ADMA_PHYS_BASE(1), + .end = IOP13XX_ADMA_UPPER_PA(1), + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_IOP13XX_ADMA1_EOT, + .end = IRQ_IOP13XX_ADMA1_EOT, + .flags = IORESOURCE_IRQ + }, + [2] = { + .start = IRQ_IOP13XX_ADMA1_EOC, + .end = IRQ_IOP13XX_ADMA1_EOC, + .flags = IORESOURCE_IRQ + }, + [3] = { + .start = IRQ_IOP13XX_ADMA1_ERR, + .end = IRQ_IOP13XX_ADMA1_ERR, + .flags = IORESOURCE_IRQ + } +}; + +static struct resource iop13xx_adma_2_resources[] = { + [0] = { + .start = IOP13XX_ADMA_PHYS_BASE(2), + .end = IOP13XX_ADMA_UPPER_PA(2), + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_IOP13XX_ADMA2_EOT, + .end = IRQ_IOP13XX_ADMA2_EOT, + .flags = IORESOURCE_IRQ + }, + [2] = { + .start = IRQ_IOP13XX_ADMA2_EOC, + .end = IRQ_IOP13XX_ADMA2_EOC, + .flags = IORESOURCE_IRQ + }, + [3] = { + .start = IRQ_IOP13XX_ADMA2_ERR, + .end = IRQ_IOP13XX_ADMA2_ERR, + .flags = IORESOURCE_IRQ + } +}; + +static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK; +static struct iop_adma_platform_data iop13xx_adma_0_data = { + .hw_id = 0, + .pool_size = PAGE_SIZE, +}; + +static struct iop_adma_platform_data iop13xx_adma_1_data = { + .hw_id = 1, + .pool_size = PAGE_SIZE, +}; + +static struct iop_adma_platform_data iop13xx_adma_2_data = { + .hw_id = 2, + .pool_size = PAGE_SIZE, +}; + +/* The ids are fixed up later in iop13xx_platform_init */ +static struct platform_device iop13xx_adma_0_channel = { + .name = "iop-adma", + .id = 0, + .num_resources = 4, + .resource = iop13xx_adma_0_resources, + .dev = { + .dma_mask = &iop13xx_adma_dmamask, + .coherent_dma_mask = DMA_64BIT_MASK, + .platform_data = (void *) &iop13xx_adma_0_data, + }, +}; + +static struct platform_device iop13xx_adma_1_channel = { + .name = "iop-adma", + .id = 0, + .num_resources = 4, + .resource = iop13xx_adma_1_resources, + .dev = { + .dma_mask = &iop13xx_adma_dmamask, + .coherent_dma_mask = DMA_64BIT_MASK, + .platform_data = (void *) &iop13xx_adma_1_data, + }, +}; + +static struct platform_device iop13xx_adma_2_channel = { + .name = "iop-adma", + .id = 0, + .num_resources = 4, + .resource = iop13xx_adma_2_resources, + .dev = { + .dma_mask = &iop13xx_adma_dmamask, + .coherent_dma_mask = DMA_64BIT_MASK, + .platform_data = (void *) &iop13xx_adma_2_data, + }, +}; + void __init iop13xx_map_io(void) { /* Initialize the Static Page Table maps */ iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc)); } -static int init_uart = 0; -static int init_i2c = 0; +static int init_uart; +static int init_i2c; +static int init_adma; void __init iop13xx_platform_init(void) { int i; - u32 uart_idx, i2c_idx, plat_idx; + u32 uart_idx, i2c_idx, adma_idx, plat_idx; struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES]; /* set the bases so we can read the device id */ @@ -294,6 +419,12 @@ } } + if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) { + init_adma |= IOP13XX_INIT_ADMA_0; + init_adma |= IOP13XX_INIT_ADMA_1; + init_adma |= IOP13XX_INIT_ADMA_2; + } + plat_idx = 0; uart_idx = 0; i2c_idx = 0; @@ -332,6 +463,56 @@ } } + /* initialize adma channel ids and capabilities */ + adma_idx = 0; + for (i = 0; i < IQ81340_NUM_ADMA; i++) { + struct iop_adma_platform_data *plat_data; + if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG) + printk(KERN_INFO + "Adding adma%d to platform device list\n", i); + switch (init_adma & (1 << i)) { + case IOP13XX_INIT_ADMA_0: + iop13xx_adma_0_channel.id = adma_idx++; + iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel; + plat_data = &iop13xx_adma_0_data; + dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); + dma_cap_set(DMA_XOR, plat_data->cap_mask); + dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); + dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_MEMSET, plat_data->cap_mask); + dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); + dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); + break; + case IOP13XX_INIT_ADMA_1: + iop13xx_adma_1_channel.id = adma_idx++; + iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel; + plat_data = &iop13xx_adma_1_data; + dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); + dma_cap_set(DMA_XOR, plat_data->cap_mask); + dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); + dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_MEMSET, plat_data->cap_mask); + dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); + dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); + break; + case IOP13XX_INIT_ADMA_2: + iop13xx_adma_2_channel.id = adma_idx++; + iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel; + plat_data = &iop13xx_adma_2_data; + dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); + dma_cap_set(DMA_XOR, plat_data->cap_mask); + dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); + dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_MEMSET, plat_data->cap_mask); + dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); + dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); + dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); + dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); + dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); + break; + } + } + #ifdef CONFIG_MTD_PHYSMAP iq8134x_flash_resource.end = iq8134x_flash_resource.start + iq8134x_probe_flash_size() - 1; @@ -399,5 +580,35 @@ return 1; } +static int __init iop13xx_init_adma_setup(char *str) +{ + if (str) { + while (*str != '\0') { + switch (*str) { + case '0': + init_adma |= IOP13XX_INIT_ADMA_0; + break; + case '1': + init_adma |= IOP13XX_INIT_ADMA_1; + break; + case '2': + init_adma |= IOP13XX_INIT_ADMA_2; + break; + case ',': + case '=': + break; + default: + PRINTK("\"iop13xx_init_adma\" malformed" + " at character: \'%c\'", *str); + *(str + 1) = '\0'; + init_adma = IOP13XX_INIT_ADMA_DEFAULT; + } + str++; + } + } + return 1; +} + +__setup("iop13xx_init_adma", iop13xx_init_adma_setup); __setup("iop13xx_init_uart", iop13xx_init_uart_setup); __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup); diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c linux-2.6.22-591/arch/arm/mach-iop32x/glantank.c --- linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop32x/glantank.c 2007-12-21 15:36:11.000000000 -0500 @@ -180,6 +180,8 @@ platform_device_register(&iop3xx_i2c1_device); platform_device_register(&glantank_flash_device); platform_device_register(&glantank_serial_device); + platform_device_register(&iop3xx_dma_0_channel); + platform_device_register(&iop3xx_dma_1_channel); pm_power_off = glantank_power_off; } diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c linux-2.6.22-591/arch/arm/mach-iop32x/iq31244.c --- linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop32x/iq31244.c 2007-12-21 15:36:11.000000000 -0500 @@ -298,9 +298,14 @@ platform_device_register(&iop3xx_i2c1_device); platform_device_register(&iq31244_flash_device); platform_device_register(&iq31244_serial_device); + platform_device_register(&iop3xx_dma_0_channel); + platform_device_register(&iop3xx_dma_1_channel); if (is_ep80219()) pm_power_off = ep80219_power_off; + + if (!is_80219()) + platform_device_register(&iop3xx_aau_channel); } static int __init force_ep80219_setup(char *str) diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c linux-2.6.22-591/arch/arm/mach-iop32x/iq80321.c --- linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop32x/iq80321.c 2007-12-21 15:36:11.000000000 -0500 @@ -181,6 +181,9 @@ platform_device_register(&iop3xx_i2c1_device); platform_device_register(&iq80321_flash_device); platform_device_register(&iq80321_serial_device); + platform_device_register(&iop3xx_dma_0_channel); + platform_device_register(&iop3xx_dma_1_channel); + platform_device_register(&iop3xx_aau_channel); } MACHINE_START(IQ80321, "Intel IQ80321") diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c linux-2.6.22-591/arch/arm/mach-iop32x/n2100.c --- linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop32x/n2100.c 2007-12-21 15:36:11.000000000 -0500 @@ -245,6 +245,8 @@ platform_device_register(&iop3xx_i2c0_device); platform_device_register(&n2100_flash_device); platform_device_register(&n2100_serial_device); + platform_device_register(&iop3xx_dma_0_channel); + platform_device_register(&iop3xx_dma_1_channel); pm_power_off = n2100_power_off; diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c linux-2.6.22-591/arch/arm/mach-iop33x/iq80331.c --- linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop33x/iq80331.c 2007-12-21 15:36:11.000000000 -0500 @@ -136,6 +136,9 @@ platform_device_register(&iop33x_uart0_device); platform_device_register(&iop33x_uart1_device); platform_device_register(&iq80331_flash_device); + platform_device_register(&iop3xx_dma_0_channel); + platform_device_register(&iop3xx_dma_1_channel); + platform_device_register(&iop3xx_aau_channel); } MACHINE_START(IQ80331, "Intel IQ80331") diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c linux-2.6.22-591/arch/arm/mach-iop33x/iq80332.c --- linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-iop33x/iq80332.c 2007-12-21 15:36:11.000000000 -0500 @@ -136,6 +136,9 @@ platform_device_register(&iop33x_uart0_device); platform_device_register(&iop33x_uart1_device); platform_device_register(&iq80332_flash_device); + platform_device_register(&iop3xx_dma_0_channel); + platform_device_register(&iop3xx_dma_1_channel); + platform_device_register(&iop3xx_aau_channel); } MACHINE_START(IQ80332, "Intel IQ80332") diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/core.c linux-2.6.22-591/arch/arm/mach-ixp2000/core.c --- linux-2.6.22-570/arch/arm/mach-ixp2000/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-ixp2000/core.c 2007-12-21 15:36:11.000000000 -0500 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,9 @@ void __init ixp2000_uart_init(void) { platform_device_register(&ixp2000_serial_device); +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(0, &ixp2000_serial_port); +#endif } diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c linux-2.6.22-591/arch/arm/mach-ixp2000/ixdp2x01.c --- linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-ixp2000/ixdp2x01.c 2007-12-21 15:36:11.000000000 -0500 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -413,6 +414,11 @@ platform_add_devices(ixdp2x01_devices, ARRAY_SIZE(ixdp2x01_devices)); ixp2000_uart_init(); ixdp2x01_uart_init(); + +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(0, ixdp2x01_serial_port1); + kgdb8250_add_platform_port(1, ixdp2x01_serial_port1); +#endif } diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c linux-2.6.22-591/arch/arm/mach-ixp4xx/coyote-setup.c --- linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-ixp4xx/coyote-setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -96,6 +96,10 @@ } platform_add_devices(coyote_devices, ARRAY_SIZE(coyote_devices)); + +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(0, &coyote_uart_data); +#endif } #ifdef CONFIG_ARCH_ADI_COYOTE diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c linux-2.6.22-591/arch/arm/mach-ixp4xx/ixdp425-setup.c --- linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-ixp4xx/ixdp425-setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -76,7 +76,8 @@ .mapbase = IXP4XX_UART1_BASE_PHYS, .membase = (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET, .irq = IRQ_IXP4XX_UART1, - .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, + .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | + UPF_SHARE_IRQ, .iotype = UPIO_MEM, .regshift = 2, .uartclk = IXP4XX_UART_XTAL, @@ -85,7 +86,8 @@ .mapbase = IXP4XX_UART2_BASE_PHYS, .membase = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET, .irq = IRQ_IXP4XX_UART2, - .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, + .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | + UPF_SHARE_IRQ, .iotype = UPIO_MEM, .regshift = 2, .uartclk = IXP4XX_UART_XTAL, @@ -123,12 +125,22 @@ platform_add_devices(ixdp425_devices, ARRAY_SIZE(ixdp425_devices)); } +static void __init ixdp425_map_io(void) +{ + ixp4xx_map_io(); + +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(0, &ixdp425_uart_data[0]); + kgdb8250_add_platform_port(1, &ixdp425_uart_data[1]); +#endif +} + #ifdef CONFIG_ARCH_IXDP425 MACHINE_START(IXDP425, "Intel IXDP425 Development Platform") /* Maintainer: MontaVista Software, Inc. */ .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS, .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc, - .map_io = ixp4xx_map_io, + .map_io = ixdp425_map_io, .init_irq = ixp4xx_init_irq, .timer = &ixp4xx_timer, .boot_params = 0x0100, @@ -141,7 +153,7 @@ /* Maintainer: MontaVista Software, Inc. */ .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS, .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc, - .map_io = ixp4xx_map_io, + .map_io = ixdp425_map_io, .init_irq = ixp4xx_init_irq, .timer = &ixp4xx_timer, .boot_params = 0x0100, diff -Nurb linux-2.6.22-570/arch/arm/mach-omap1/serial.c linux-2.6.22-591/arch/arm/mach-omap1/serial.c --- linux-2.6.22-570/arch/arm/mach-omap1/serial.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-omap1/serial.c 2007-12-21 15:36:11.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -199,6 +200,9 @@ break; } omap_serial_reset(&serial_platform_data[i]); +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(i, &serial_platform_data[i]); +#endif } } diff -Nurb linux-2.6.22-570/arch/arm/mach-pnx4008/core.c linux-2.6.22-591/arch/arm/mach-pnx4008/core.c --- linux-2.6.22-570/arch/arm/mach-pnx4008/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-pnx4008/core.c 2007-12-21 15:36:11.000000000 -0500 @@ -224,6 +224,10 @@ spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info)); /* Switch on the UART clocks */ pnx4008_uart_init(); +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(0, &platform_serial_ports[0]); + kgdb8250_add_platform_port(1, &platform_serial_ports[1]); +#endif } static struct map_desc pnx4008_io_desc[] __initdata = { diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/Makefile linux-2.6.22-591/arch/arm/mach-pxa/Makefile --- linux-2.6.22-570/arch/arm/mach-pxa/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-pxa/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -31,6 +31,7 @@ # Misc features obj-$(CONFIG_PM) += pm.o sleep.o obj-$(CONFIG_PXA_SSP) += ssp.o +obj-$(CONFIG_KGDB_PXA_SERIAL) += kgdb-serial.o ifeq ($(CONFIG_PXA27x),y) obj-$(CONFIG_PM) += standby.o diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c linux-2.6.22-591/arch/arm/mach-pxa/kgdb-serial.c --- linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/arm/mach-pxa/kgdb-serial.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,97 @@ +/* + * linux/arch/arm/mach-pxa/kgdb-serial.c + * + * Provides low level kgdb serial support hooks for PXA2xx boards + * + * Author: Nicolas Pitre + * Copyright: (C) 2002-2005 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#if defined(CONFIG_KGDB_PXA_FFUART) + +#define UART FFUART +#define CKEN_UART CKEN6_FFUART +#define GPIO_RX_MD GPIO34_FFRXD_MD +#define GPIO_TX_MD GPIO39_FFTXD_MD + +#elif defined(CONFIG_KGDB_PXA_BTUART) + +#define UART BTUART +#define CKEN_UART CKEN7_BTUART +#define GPIO_RX_MD GPIO42_BTRXD_MD +#define GPIO_TX_MD GPIO43_BTTXD_MD + +#elif defined(CONFIG_KGDB_PXA_STUART) + +#define UART STUART +#define CKEN_UART CKEN5_STUART +#define GPIO_RX_MD GPIO46_STRXD_MD +#define GPIO_TX_MD GPIO47_STTXD_MD + +#endif + +#define UART_BAUDRATE (CONFIG_KGDB_BAUDRATE) + +static volatile unsigned long *port = (unsigned long *)&UART; + +static int kgdb_serial_init(void) +{ + pxa_set_cken(CKEN_UART, 1); + pxa_gpio_mode(GPIO_RX_MD); + pxa_gpio_mode(GPIO_TX_MD); + + port[UART_IER] = 0; + port[UART_LCR] = LCR_DLAB; + port[UART_DLL] = ((921600 / UART_BAUDRATE) & 0xff); + port[UART_DLM] = ((921600 / UART_BAUDRATE) >> 8); + port[UART_LCR] = LCR_WLS1 | LCR_WLS0; + port[UART_MCR] = 0; + port[UART_IER] = IER_UUE; + port[UART_FCR] = FCR_ITL_16; + + return 0; +} + +static void kgdb_serial_putchar(u8 c) +{ + if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE) + kgdb_serial_init(); + while (!(port[UART_LSR] & LSR_TDRQ)) + cpu_relax(); + port[UART_TX] = c; +} + +static void kgdb_serial_flush(void) +{ + if ((CKEN & CKEN_UART) && (port[UART_IER] & IER_UUE)) + while (!(port[UART_LSR] & LSR_TEMT)) + cpu_relax(); +} + +static int kgdb_serial_getchar(void) +{ + unsigned char c; + if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE) + kgdb_serial_init(); + while (!(port[UART_LSR] & UART_LSR_DR)) + cpu_relax(); + c = port[UART_RX]; + return c; +} + +struct kgdb_io kgdb_io_ops = { + .init = kgdb_serial_init, + .write_char = kgdb_serial_putchar, + .flush = kgdb_serial_flush, + .read_char = kgdb_serial_getchar, +}; diff -Nurb linux-2.6.22-570/arch/arm/mach-versatile/core.c linux-2.6.22-591/arch/arm/mach-versatile/core.c --- linux-2.6.22-570/arch/arm/mach-versatile/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mach-versatile/core.c 2007-12-21 15:36:11.000000000 -0500 @@ -184,6 +184,14 @@ .type = MT_DEVICE }, #endif +#ifdef CONFIG_KGDB_AMBA_PL011 + { + .virtual = IO_ADDRESS(CONFIG_KGDB_AMBA_BASE), + .pfn = __phys_to_pfn(CONFIG_KGDB_AMBA_BASE), + .length = SZ_4K, + .type = MT_DEVICE + }, +#endif #ifdef CONFIG_PCI { .virtual = IO_ADDRESS(VERSATILE_PCI_CORE_BASE), diff -Nurb linux-2.6.22-570/arch/arm/mm/extable.c linux-2.6.22-591/arch/arm/mm/extable.c --- linux-2.6.22-570/arch/arm/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 @@ -2,6 +2,7 @@ * linux/arch/arm/mm/extable.c */ #include +#include #include int fixup_exception(struct pt_regs *regs) @@ -11,6 +12,12 @@ fixup = search_exception_tables(instruction_pointer(regs)); if (fixup) regs->ARM_pc = fixup->fixup; +#ifdef CONFIG_KGDB + if (atomic_read(&debugger_active) && kgdb_may_fault) + /* Restore our previous state. */ + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + /* Not reached. */ +#endif return fixup != NULL; } diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/Makefile linux-2.6.22-591/arch/arm/plat-iop/Makefile --- linux-2.6.22-570/arch/arm/plat-iop/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/arm/plat-iop/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -12,6 +12,7 @@ obj-$(CONFIG_ARCH_IOP32X) += time.o obj-$(CONFIG_ARCH_IOP32X) += io.o obj-$(CONFIG_ARCH_IOP32X) += cp6.o +obj-$(CONFIG_ARCH_IOP32X) += adma.o # IOP33X obj-$(CONFIG_ARCH_IOP33X) += gpio.o @@ -21,6 +22,7 @@ obj-$(CONFIG_ARCH_IOP33X) += time.o obj-$(CONFIG_ARCH_IOP33X) += io.o obj-$(CONFIG_ARCH_IOP33X) += cp6.o +obj-$(CONFIG_ARCH_IOP33X) += adma.o # IOP13XX obj-$(CONFIG_ARCH_IOP13XX) += cp6.o diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/adma.c linux-2.6.22-591/arch/arm/plat-iop/adma.c --- linux-2.6.22-570/arch/arm/plat-iop/adma.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/arm/plat-iop/adma.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,209 @@ +/* + * platform device definitions for the iop3xx dma/xor engines + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +#ifdef CONFIG_ARCH_IOP32X +#define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT +#define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC +#define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR + +#define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT +#define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC +#define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR + +#define IRQ_AA_EOT IRQ_IOP32X_AA_EOT +#define IRQ_AA_EOC IRQ_IOP32X_AA_EOC +#define IRQ_AA_ERR IRQ_IOP32X_AA_ERR +#endif +#ifdef CONFIG_ARCH_IOP33X +#define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT +#define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC +#define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR + +#define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT +#define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC +#define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR + +#define IRQ_AA_EOT IRQ_IOP33X_AA_EOT +#define IRQ_AA_EOC IRQ_IOP33X_AA_EOC +#define IRQ_AA_ERR IRQ_IOP33X_AA_ERR +#endif +/* AAU and DMA Channels */ +static struct resource iop3xx_dma_0_resources[] = { + [0] = { + .start = IOP3XX_DMA_PHYS_BASE(0), + .end = IOP3XX_DMA_UPPER_PA(0), + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_DMA0_EOT, + .end = IRQ_DMA0_EOT, + .flags = IORESOURCE_IRQ + }, + [2] = { + .start = IRQ_DMA0_EOC, + .end = IRQ_DMA0_EOC, + .flags = IORESOURCE_IRQ + }, + [3] = { + .start = IRQ_DMA0_ERR, + .end = IRQ_DMA0_ERR, + .flags = IORESOURCE_IRQ + } +}; + +static struct resource iop3xx_dma_1_resources[] = { + [0] = { + .start = IOP3XX_DMA_PHYS_BASE(1), + .end = IOP3XX_DMA_UPPER_PA(1), + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_DMA1_EOT, + .end = IRQ_DMA1_EOT, + .flags = IORESOURCE_IRQ + }, + [2] = { + .start = IRQ_DMA1_EOC, + .end = IRQ_DMA1_EOC, + .flags = IORESOURCE_IRQ + }, + [3] = { + .start = IRQ_DMA1_ERR, + .end = IRQ_DMA1_ERR, + .flags = IORESOURCE_IRQ + } +}; + + +static struct resource iop3xx_aau_resources[] = { + [0] = { + .start = IOP3XX_AAU_PHYS_BASE, + .end = IOP3XX_AAU_UPPER_PA, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_AA_EOT, + .end = IRQ_AA_EOT, + .flags = IORESOURCE_IRQ + }, + [2] = { + .start = IRQ_AA_EOC, + .end = IRQ_AA_EOC, + .flags = IORESOURCE_IRQ + }, + [3] = { + .start = IRQ_AA_ERR, + .end = IRQ_AA_ERR, + .flags = IORESOURCE_IRQ + } +}; + +static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK; + +static struct iop_adma_platform_data iop3xx_dma_0_data = { + .hw_id = DMA0_ID, + .pool_size = PAGE_SIZE, +}; + +static struct iop_adma_platform_data iop3xx_dma_1_data = { + .hw_id = DMA1_ID, + .pool_size = PAGE_SIZE, +}; + +static struct iop_adma_platform_data iop3xx_aau_data = { + .hw_id = AAU_ID, + .pool_size = 3 * PAGE_SIZE, +}; + +struct platform_device iop3xx_dma_0_channel = { + .name = "iop-adma", + .id = 0, + .num_resources = 4, + .resource = iop3xx_dma_0_resources, + .dev = { + .dma_mask = &iop3xx_adma_dmamask, + .coherent_dma_mask = DMA_64BIT_MASK, + .platform_data = (void *) &iop3xx_dma_0_data, + }, +}; + +struct platform_device iop3xx_dma_1_channel = { + .name = "iop-adma", + .id = 1, + .num_resources = 4, + .resource = iop3xx_dma_1_resources, + .dev = { + .dma_mask = &iop3xx_adma_dmamask, + .coherent_dma_mask = DMA_64BIT_MASK, + .platform_data = (void *) &iop3xx_dma_1_data, + }, +}; + +struct platform_device iop3xx_aau_channel = { + .name = "iop-adma", + .id = 2, + .num_resources = 4, + .resource = iop3xx_aau_resources, + .dev = { + .dma_mask = &iop3xx_adma_dmamask, + .coherent_dma_mask = DMA_64BIT_MASK, + .platform_data = (void *) &iop3xx_aau_data, + }, +}; + +static int __init iop3xx_adma_cap_init(void) +{ + #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */ + dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); + #else + dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); + dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask); + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); + #endif + + #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */ + dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); + #else + dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); + dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask); + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); + #endif + + #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */ + dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); + #else + dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); + #endif + + return 0; +} + +arch_initcall(iop3xx_adma_cap_init); diff -Nurb linux-2.6.22-570/arch/i386/Kconfig linux-2.6.22-591/arch/i386/Kconfig --- linux-2.6.22-570/arch/i386/Kconfig 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/i386/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -1053,6 +1053,8 @@ source "arch/i386/kernel/cpu/cpufreq/Kconfig" +source "drivers/cpuidle/Kconfig" + endmenu menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff -Nurb linux-2.6.22-570/arch/i386/kernel/Makefile linux-2.6.22-591/arch/i386/kernel/Makefile --- linux-2.6.22-570/arch/i386/kernel/Makefile 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -39,6 +39,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o +obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_VMI) += vmi.o vmiclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o diff -Nurb linux-2.6.22-570/arch/i386/kernel/acpi/boot.c linux-2.6.22-591/arch/i386/kernel/acpi/boot.c --- linux-2.6.22-570/arch/i386/kernel/acpi/boot.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/i386/kernel/acpi/boot.c 2007-12-21 15:36:11.000000000 -0500 @@ -950,14 +950,6 @@ }, { .callback = force_acpi_ht, - .ident = "DELL GX240", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - }, - }, - { - .callback = force_acpi_ht, .ident = "HP VISUALIZE NT Workstation", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), diff -Nurb linux-2.6.22-570/arch/i386/kernel/apm.c linux-2.6.22-591/arch/i386/kernel/apm.c --- linux-2.6.22-570/arch/i386/kernel/apm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/i386/kernel/apm.c 2007-12-21 15:36:11.000000000 -0500 @@ -222,6 +222,7 @@ #include #include #include +#include #include #include #include @@ -2311,7 +2312,6 @@ remove_proc_entry("apm", NULL); return err; } - kapmd_task->flags |= PF_NOFREEZE; wake_up_process(kapmd_task); if (num_online_cpus() > 1 && !smp ) { diff -Nurb linux-2.6.22-570/arch/i386/kernel/io_apic.c linux-2.6.22-591/arch/i386/kernel/io_apic.c --- linux-2.6.22-570/arch/i386/kernel/io_apic.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/io_apic.c 2007-12-21 15:36:11.000000000 -0500 @@ -667,6 +667,7 @@ set_pending_irq(i, cpumask_of_cpu(0)); } + set_freezable(); for ( ; ; ) { time_remaining = schedule_timeout_interruptible(time_remaining); try_to_freeze(); diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S linux-2.6.22-591/arch/i386/kernel/kgdb-jmp.S --- linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,74 @@ +/* + * arch/i386/kernel/kgdb-jmp.S + * + * Save and restore system registers so that within a limited frame we + * may have a fault and "jump back" to a known safe location. + * + * Author: George Anzinger + * + * Cribbed from glibc, which carries the following: + * Copyright (C) 1996, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. + * Copyright (C) 2005 by MontaVista Software. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#include + +#define PCOFF 0 +#define LINKAGE 4 /* just the return address */ +#define PTR_SIZE 4 +#define PARMS LINKAGE /* no space for saved regs */ +#define JMPBUF PARMS +#define VAL JMPBUF+PTR_SIZE + +#define JB_BX 0 +#define JB_SI 1 +#define JB_DI 2 +#define JB_BP 3 +#define JB_SP 4 +#define JB_PC 5 + +/* This must be called prior to kgdb_fault_longjmp and + * kgdb_fault_longjmp must not be called outside of the context of the + * last call to kgdb_fault_setjmp. + * kgdb_fault_setjmp(int *jmp_buf[6]) + */ +ENTRY(kgdb_fault_setjmp) + movl JMPBUF(%esp), %eax + + /* Save registers. */ + movl %ebx, (JB_BX*4)(%eax) + movl %esi, (JB_SI*4)(%eax) + movl %edi, (JB_DI*4)(%eax) + /* Save SP as it will be after we return. */ + leal JMPBUF(%esp), %ecx + movl %ecx, (JB_SP*4)(%eax) + movl PCOFF(%esp), %ecx /* Save PC we are returning to now. */ + movl %ecx, (JB_PC*4)(%eax) + movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */ + + /* Restore state so we can now try the access. */ + movl JMPBUF(%esp), %ecx /* User's jmp_buf in %ecx. */ + /* Save the return address now. */ + movl (JB_PC*4)(%ecx), %edx + /* Restore registers. */ + movl $0, %eax + movl (JB_SP*4)(%ecx), %esp + jmp *%edx /* Jump to saved PC. */ + +/* kgdb_fault_longjmp(int *jmp_buf[6]) */ +ENTRY(kgdb_fault_longjmp) + movl JMPBUF(%esp), %ecx /* User's jmp_buf in %ecx. */ + /* Save the return address now. */ + movl (JB_PC*4)(%ecx), %edx + /* Restore registers. */ + movl (JB_BX*4)(%ecx), %ebx + movl (JB_SI*4)(%ecx), %esi + movl (JB_DI*4)(%ecx), %edi + movl (JB_BP*4)(%ecx), %ebp + movl $1, %eax + movl (JB_SP*4)(%ecx), %esp + jmp *%edx /* Jump to saved PC. */ diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb.c linux-2.6.22-591/arch/i386/kernel/kgdb.c --- linux-2.6.22-570/arch/i386/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,388 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (C) 2000-2001 VERITAS Software Corporation. + * Copyright (C) 2007 Wind River Systems, Inc. + */ +/* + * Contributor: Lake Stevens Instrument Division$ + * Written by: Glenn Engel $ + * Updated by: Amit Kale + * Updated by: Tom Rini + * Updated by: Jason Wessel + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Origianl kgdb, compatibility with 2.1.xx kernel by + * David Grothe + * Additional support from Tigran Aivazian + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include + +#include "mach_ipi.h" + +/* Put the error code here just in case the user cares. */ +int gdb_i386errcode; +/* Likewise, the vector number here (since GDB only gets the signal + number through the usual means, and that's not very specific). */ +int gdb_i386vector = -1; + +extern atomic_t cpu_doing_single_step; + +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + gdb_regs[_ESP] = (int)(®s->esp); + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} + +/* + * Extracts ebp, esp and eip values understandable by gdb from the values + * saved by switch_to. + * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp + * prior to entering switch_to is 8 greater then the value that is saved. + * If switch_to changes, change following code appropriately. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + gdb_regs[_EAX] = 0; + gdb_regs[_EBX] = 0; + gdb_regs[_ECX] = 0; + gdb_regs[_EDX] = 0; + gdb_regs[_ESI] = 0; + gdb_regs[_EDI] = 0; + gdb_regs[_EBP] = *(unsigned long *)p->thread.esp; + gdb_regs[_DS] = __KERNEL_DS; + gdb_regs[_ES] = __KERNEL_DS; + gdb_regs[_PS] = 0; + gdb_regs[_CS] = __KERNEL_CS; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} + +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; +} + +static struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { + { .enabled = 0 }, + { .enabled = 0 }, + { .enabled = 0 }, + { .enabled = 0 }, +}; + +static void kgdb_correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned long dr7; + + get_debugreg(dr7, 7); + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + set_debugreg(breakinfo[breakno].addr, 0); + break; + + case 1: + set_debugreg(breakinfo[breakno].addr, 1); + break; + + case 2: + set_debugreg(breakinfo[breakno].addr, 2); + break; + + case 3: + set_debugreg(breakinfo[breakno].addr, 3); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) + set_debugreg(dr7, 7); +} + +static int kgdb_remove_hw_break(unsigned long addr, int len, + enum kgdb_bptype bptype) +{ + int i, idx = -1; + for (i = 0; i < 4; i++) { + if (breakinfo[i].addr == addr && breakinfo[i].enabled) { + idx = i; + break; + } + } + if (idx == -1) + return -1; + + breakinfo[idx].enabled = 0; + return 0; +} + +static void kgdb_remove_all_hw_break(void) +{ + int i; + + for (i = 0; i < 4; i++) { + memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); + } +} + +static int kgdb_set_hw_break(unsigned long addr, int len, + enum kgdb_bptype bptype) +{ + int i, idx = -1; + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) { + idx = i; + break; + } + } + if (idx == -1) + return -1; + if (bptype == bp_hardware_breakpoint) { + breakinfo[idx].type = 0; + breakinfo[idx].len = 0; + } else if (bptype == bp_write_watchpoint) { + breakinfo[idx].type = 1; + if (len == 1 || len == 2 || len == 4) + breakinfo[idx].len = len - 1; + else + return -1; + } else if (bptype == bp_access_watchpoint) { + breakinfo[idx].type = 3; + if (len == 1 || len == 2 || len == 4) + breakinfo[idx].len = len - 1; + else + return -1; + } else + return -1; + breakinfo[idx].enabled = 1; + breakinfo[idx].addr = addr; + return 0; +} + +void kgdb_disable_hw_debug(struct pt_regs *regs) +{ + /* Disable hardware debugging while we are in kgdb */ + set_debugreg(0, 7); +} + +void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code) +{ + /* Master processor is completely in the debugger */ + gdb_i386vector = e_vector; + gdb_i386errcode = err_code; +} + +#ifdef CONFIG_SMP +void kgdb_roundup_cpus(unsigned long flags) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif + +int kgdb_arch_handle_exception(int e_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + long addr; + char *ptr; + int newPC, dr6; + + switch (remcom_in_buffer[0]) { + case 'c': + case 's': + /* try to read optional parameter, pc unchanged if no parm */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->eip = addr; + newPC = linux_regs->eip; + + /* clear the trace bit */ + linux_regs->eflags &= ~TF_MASK; + atomic_set(&cpu_doing_single_step, -1); + + /* set the trace bit if we're stepping */ + if (remcom_in_buffer[0] == 's') { + linux_regs->eflags |= TF_MASK; + debugger_step = 1; + atomic_set(&cpu_doing_single_step,raw_smp_processor_id()); + } + + get_debugreg(dr6, 6); + if (!(dr6 & 0x4000)) { + long breakno; + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + breakinfo[breakno].type == 0) { + /* Set restore flag */ + linux_regs->eflags |= X86_EFLAGS_RF; + break; + } + } + } + set_debugreg(0, 6); + kgdb_correct_hw_break(); + + return (0); + } /* switch */ + /* this means that we do not want to exit from the handler */ + return -1; +} + +/* Register KGDB with the i386die_chain so that we hook into all of the right + * spots. */ +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = ptr; + struct pt_regs *regs = args->regs; + + /* Bad memory access? */ + if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active) + && kgdb_may_fault) { + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + return NOTIFY_STOP; + } else if (cmd == DIE_PAGE_FAULT) + /* A normal page fault, ignore. */ + return NOTIFY_DONE; + else if ((cmd == DIE_NMI || cmd == DIE_NMI_IPI || + cmd == DIE_NMIWATCHDOG) && atomic_read(&debugger_active)) { + /* CPU roundup */ + kgdb_nmihook(raw_smp_processor_id(), regs); + return NOTIFY_STOP; + } else if (cmd == DIE_DEBUG + && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id() + && user_mode(regs)) { + /* single step exception from kernel space to user space so + * eat the exception and continue the process + */ + printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n"); + kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs); + return NOTIFY_STOP; + } else if (cmd == DIE_NMI_IPI || cmd == DIE_NMI || user_mode(regs) || + (cmd == DIE_DEBUG && atomic_read(&debugger_active))) + /* Normal watchdog event or userspace debugging, or spurious + * debug exception, ignore. */ + return NOTIFY_DONE; + + kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); + + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, +}; + +int kgdb_arch_init(void) +{ + register_die_notifier(&kgdb_notifier); + return 0; +} + +/* + * Skip an int3 exception when it occurs after a breakpoint has been + * removed. Backtrack eip by 1 since the int3 would have caused it to + * increment by 1. + */ + +int kgdb_skipexception(int exception, struct pt_regs *regs) +{ + if (exception == 3 && kgdb_isremovedbreak(regs->eip - 1)) { + regs->eip -= 1; + return 1; + } + return 0; +} + +unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) +{ + if (exception == 3) { + return instruction_pointer(regs) - 1; + } + return instruction_pointer(regs); +} + +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0xcc}, + .flags = KGDB_HW_BREAKPOINT, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, +}; diff -Nurb linux-2.6.22-570/arch/i386/kernel/process.c linux-2.6.22-591/arch/i386/kernel/process.c --- linux-2.6.22-570/arch/i386/kernel/process.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/process.c 2007-12-21 15:36:11.000000000 -0500 @@ -179,13 +179,13 @@ /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(); while (!need_resched()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; + tick_nohz_stop_sched_tick(); check_pgt_cache(); rmb(); idle = pm_idle; diff -Nurb linux-2.6.22-570/arch/i386/kernel/setup.c linux-2.6.22-591/arch/i386/kernel/setup.c --- linux-2.6.22-570/arch/i386/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/i386/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -124,6 +124,7 @@ #endif extern void early_cpu_init(void); +extern void early_trap_init(void); extern int root_mountflags; unsigned long saved_videomode; @@ -514,6 +515,7 @@ memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); pre_setup_arch_hook(); early_cpu_init(); + early_trap_init(); /* * FIXME: This isn't an official loader_type right diff -Nurb linux-2.6.22-570/arch/i386/kernel/signal.c linux-2.6.22-591/arch/i386/kernel/signal.c --- linux-2.6.22-570/arch/i386/kernel/signal.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/i386/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500 @@ -199,6 +199,13 @@ return eax; badframe: + if (show_unhandled_signals && printk_ratelimit()) + printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" + " esp:%lx oeax:%lx\n", + current->pid > 1 ? KERN_INFO : KERN_EMERG, + current->comm, current->pid, frame, regs->eip, + regs->esp, regs->orig_eax); + force_sig(SIGSEGV, current); return 0; } diff -Nurb linux-2.6.22-570/arch/i386/kernel/syscall_table.S linux-2.6.22-591/arch/i386/kernel/syscall_table.S --- linux-2.6.22-570/arch/i386/kernel/syscall_table.S 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/syscall_table.S 2007-12-21 15:36:11.000000000 -0500 @@ -323,3 +323,6 @@ .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_revokeat + .long sys_frevoke /* 325 */ + .long sys_fallocate diff -Nurb linux-2.6.22-570/arch/i386/kernel/traps.c linux-2.6.22-591/arch/i386/kernel/traps.c --- linux-2.6.22-570/arch/i386/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 @@ -97,6 +97,11 @@ int kstack_depth_to_print = 24; static unsigned int code_bytes = 64; +#ifdef CONFIG_STACK_UNWIND +static int call_trace = 1; +#else +#define call_trace (-1) +#endif static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -136,6 +141,34 @@ return ebp; } +struct ops_and_data { + struct stacktrace_ops *ops; + void *data; +}; + +static asmlinkage int +dump_trace_unwind(struct unwind_frame_info *info, void *data) +{ + struct ops_and_data *oad = (struct ops_and_data *)data; + int n = 0; + unsigned long sp = UNW_SP(info); + + if (arch_unw_user_mode(info)) + return -1; + while (unwind(info) == 0 && UNW_PC(info)) { + n++; + oad->ops->address(oad->data, UNW_PC(info)); + if (arch_unw_user_mode(info)) + break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); + touch_nmi_watchdog(); + } + return n; +} + #define MSG(msg) ops->warning(data, msg) void dump_trace(struct task_struct *task, struct pt_regs *regs, @@ -147,6 +180,41 @@ if (!task) task = current; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + struct ops_and_data oad = { .ops = ops, .data = data }; + + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } else if (task == current) + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); + else { + if (unwind_init_blocked(&info, task) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s", + UNW_PC(&info)); + if (UNW_SP(&info) >= PAGE_OFFSET) { + MSG("Leftover inexact backtrace:"); + stack = (void *)UNW_SP(&info); + if (!stack) + return; + ebp = UNW_FP(&info); + } else + MSG("Full inexact backtrace again:"); + } else if (call_trace >= 1) + return; + else + MSG("Full inexact backtrace again:"); + } else + MSG("Inexact backtrace:"); + } if (!stack) { unsigned long dummy; stack = &dummy; @@ -614,6 +682,13 @@ current->thread.error_code = error_code; current->thread.trap_no = 13; + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && + printk_ratelimit()) + printk(KERN_INFO + "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", + current->comm, current->pid, + regs->eip, regs->esp, error_code); + force_sig(SIGSEGV, current); return; @@ -854,6 +929,7 @@ */ clear_dr7: set_debugreg(0, 7); + notify_die(DIE_DEBUG, "debug2", regs, condition, error_code, SIGTRAP); return; debug_vm86: @@ -1118,6 +1194,12 @@ _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); } +/* Some traps need to be set early. */ +void __init early_trap_init(void) { + set_intr_gate(1,&debug); + set_system_intr_gate(3, &int3); /* int3 can be called from all */ + set_intr_gate(14,&page_fault); +} void __init trap_init(void) { @@ -1134,10 +1216,8 @@ #endif set_trap_gate(0,÷_error); - set_intr_gate(1,&debug); set_intr_gate(2,&nmi); - set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ - set_system_gate(4,&overflow); + set_system_gate(4,&overflow); /* int4/5 can be called from all */ set_trap_gate(5,&bounds); set_trap_gate(6,&invalid_op); set_trap_gate(7,&device_not_available); @@ -1147,7 +1227,6 @@ set_trap_gate(11,&segment_not_present); set_trap_gate(12,&stack_segment); set_trap_gate(13,&general_protection); - set_intr_gate(14,&page_fault); set_trap_gate(15,&spurious_interrupt_bug); set_trap_gate(16,&coprocessor_error); set_trap_gate(17,&alignment_check); @@ -1204,3 +1283,19 @@ return 1; } __setup("code_bytes=", code_bytes_setup); + +#ifdef CONFIG_STACK_UNWIND +static int __init call_trace_setup(char *s) +{ + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "newfallback") == 0) + call_trace = 1; + else if (strcmp(s, "new") == 2) + call_trace = 2; + return 1; +} +__setup("call_trace=", call_trace_setup); +#endif diff -Nurb linux-2.6.22-570/arch/i386/kernel/unwind.S linux-2.6.22-591/arch/i386/kernel/unwind.S --- linux-2.6.22-570/arch/i386/kernel/unwind.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/i386/kernel/unwind.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,36 @@ +/* Assembler support code for dwarf2 unwinder */ +#include +#include +#include +#include +#include + +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movl 4(%esp), %edx + movl (%esp), %ecx + leal 4(%esp), %eax + movl %ebx, PT_EBX(%edx) + xorl %ebx, %ebx + movl %ebx, PT_ECX(%edx) + movl %ebx, PT_EDX(%edx) + movl %esi, PT_ESI(%edx) + movl %edi, PT_EDI(%edx) + movl %ebp, PT_EBP(%edx) + movl %ebx, PT_EAX(%edx) + movl $__USER_DS, PT_DS(%edx) + movl $__USER_DS, PT_ES(%edx) + movl $0, PT_FS(%edx) + movl %ebx, PT_ORIG_EAX(%edx) + movl %ecx, PT_EIP(%edx) + movl 12(%esp), %ecx + movl $__KERNEL_CS, PT_CS(%edx) + movl %ebx, PT_EFLAGS(%edx) + movl %eax, PT_OLDESP(%edx) + movl 8(%esp), %eax + movl %ecx, 8(%esp) + movl PT_EBX(%edx), %ebx + movl $__KERNEL_DS, PT_OLDSS(%edx) + jmpl *%eax + CFI_ENDPROC +ENDPROC(arch_unwind_init_running) diff -Nurb linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c linux-2.6.22-591/arch/i386/mach-voyager/voyager_thread.c --- linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/i386/mach-voyager/voyager_thread.c 2007-12-21 15:36:11.000000000 -0500 @@ -52,7 +52,7 @@ NULL, }; - if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) { + if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string, ret); } diff -Nurb linux-2.6.22-570/arch/i386/mm/fault.c linux-2.6.22-591/arch/i386/mm/fault.c --- linux-2.6.22-570/arch/i386/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/i386/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 @@ -284,6 +284,8 @@ return 0; } +int show_unhandled_signals = 1; + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -471,6 +473,14 @@ if (is_prefetch(regs, address, error_code)) return; + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk("%s%s[%d]: segfault at %08lx eip %08lx " + "esp %08lx error %lx\n", + tsk->pid > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, tsk->pid, address, regs->eip, + regs->esp, error_code); + } tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); @@ -508,6 +518,10 @@ if (is_prefetch(regs, address, error_code)) return; + if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs, + error_code, 14, SIGSEGV) == NOTIFY_STOP) + return; + /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. diff -Nurb linux-2.6.22-570/arch/ia64/hp/sim/simeth.c linux-2.6.22-591/arch/ia64/hp/sim/simeth.c --- linux-2.6.22-570/arch/ia64/hp/sim/simeth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ia64/hp/sim/simeth.c 2007-12-21 15:36:14.000000000 -0500 @@ -300,6 +300,9 @@ return NOTIFY_DONE; } + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; /* diff -Nurb linux-2.6.22-570/arch/ia64/kernel/Makefile linux-2.6.22-591/arch/ia64/kernel/Makefile --- linux-2.6.22-570/arch/ia64/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ia64/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -35,6 +35,7 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o +obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S linux-2.6.22-591/arch/ia64/kernel/kgdb-jmp.S --- linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/ia64/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,238 @@ +/* setjmp() and longjmp() assembler support for kdb on ia64. + + This code was copied from glibc CVS as of 2001-06-27 and modified where + necessary to fit the kernel. + Keith Owens 2001-06-27 + */ + +/* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + Contributed by David Mosberger-Tang . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +#include +GLOBAL_ENTRY(kgdb_fault_setjmp) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,2,2,0 + mov r16=ar.unat + ;; + mov r17=ar.fpsr + mov r2=in0 + add r3=8,in0 + ;; +.mem.offset 0,0; + st8.spill.nta [r2]=sp,16 // r12 (sp) +.mem.offset 8,0; + st8.spill.nta [r3]=gp,16 // r1 (gp) + ;; + st8.nta [r2]=r16,16 // save caller's unat + st8.nta [r3]=r17,16 // save fpsr + add r8=0xa0,in0 + ;; +.mem.offset 160,0; + st8.spill.nta [r2]=r4,16 // r4 +.mem.offset 168,0; + st8.spill.nta [r3]=r5,16 // r5 + add r9=0xb0,in0 + ;; + stf.spill.nta [r8]=f2,32 + stf.spill.nta [r9]=f3,32 + mov loc0=rp + .body + ;; + stf.spill.nta [r8]=f4,32 + stf.spill.nta [r9]=f5,32 + mov r17=b1 + ;; + stf.spill.nta [r8]=f16,32 + stf.spill.nta [r9]=f17,32 + mov r18=b2 + ;; + stf.spill.nta [r8]=f18,32 + stf.spill.nta [r9]=f19,32 + mov r19=b3 + ;; + stf.spill.nta [r8]=f20,32 + stf.spill.nta [r9]=f21,32 + mov r20=b4 + ;; + stf.spill.nta [r8]=f22,32 + stf.spill.nta [r9]=f23,32 + mov r21=b5 + ;; + stf.spill.nta [r8]=f24,32 + stf.spill.nta [r9]=f25,32 + mov r22=ar.lc + ;; + stf.spill.nta [r8]=f26,32 + stf.spill.nta [r9]=f27,32 + mov r24=pr + ;; + stf.spill.nta [r8]=f28,32 + stf.spill.nta [r9]=f29,32 + ;; + stf.spill.nta [r8]=f30 + stf.spill.nta [r9]=f31 + +.mem.offset 0,0; + st8.spill.nta [r2]=r6,16 // r6 +.mem.offset 8,0; + st8.spill.nta [r3]=r7,16 // r7 + ;; + mov r23=ar.bsp + mov r25=ar.unat + st8.nta [r2]=loc0,16 // b0 + st8.nta [r3]=r17,16 // b1 + ;; + st8.nta [r2]=r18,16 // b2 + st8.nta [r3]=r19,16 // b3 + ;; + st8.nta [r2]=r20,16 // b4 + st8.nta [r3]=r21,16 // b5 + ;; + st8.nta [r2]=loc1,16 // ar.pfs + st8.nta [r3]=r22,16 // ar.lc + ;; + st8.nta [r2]=r24,16 // pr + st8.nta [r3]=r23,16 // ar.bsp + ;; + st8.nta [r2]=r25 // ar.unat + st8.nta [r3]=in0 // &__jmp_buf + mov r8=0 + mov rp=loc0 + mov ar.pfs=loc1 + br.ret.sptk.few rp +END(kdba_setjmp) +#define pPos p6 /* is rotate count positive? */ +#define pNeg p7 /* is rotate count negative? */ +GLOBAL_ENTRY(kgdb_fault_longjmp) + alloc r8=ar.pfs,2,1,0,0 + mov r27=ar.rsc + add r2=0x98,in0 // r2 <- &jmpbuf.orig_jmp_buf_addr + ;; + ld8 r8=[r2],-16 // r8 <- orig_jmp_buf_addr + mov r10=ar.bsp + and r11=~0x3,r27 // clear ar.rsc.mode + ;; + flushrs // flush dirty regs to backing store (must be first in insn grp) + ld8 r23=[r2],8 // r23 <- jmpbuf.ar_bsp + sub r8=r8,in0 // r8 <- &orig_jmpbuf - &jmpbuf + ;; + ld8 r25=[r2] // r25 <- jmpbuf.ar_unat + extr.u r8=r8,3,6 // r8 <- (&orig_jmpbuf - &jmpbuf)/8 & 0x3f + ;; + cmp.lt pNeg,pPos=r8,r0 + mov r2=in0 + ;; +(pPos) mov r16=r8 +(pNeg) add r16=64,r8 +(pPos) sub r17=64,r8 +(pNeg) sub r17=r0,r8 + ;; + mov ar.rsc=r11 // put RSE in enforced lazy mode + shr.u r8=r25,r16 + add r3=8,in0 // r3 <- &jmpbuf.r1 + shl r9=r25,r17 + ;; + or r25=r8,r9 + ;; + mov r26=ar.rnat + mov ar.unat=r25 // setup ar.unat (NaT bits for r1, r4-r7, and r12) + ;; + ld8.fill.nta sp=[r2],16 // r12 (sp) + ld8.fill.nta gp=[r3],16 // r1 (gp) + dep r11=-1,r23,3,6 // r11 <- ia64_rse_rnat_addr(jmpbuf.ar_bsp) + ;; + ld8.nta r16=[r2],16 // caller's unat + ld8.nta r17=[r3],16 // fpsr + ;; + ld8.fill.nta r4=[r2],16 // r4 + ld8.fill.nta r5=[r3],16 // r5 (gp) + cmp.geu p8,p0=r10,r11 // p8 <- (ar.bsp >= jmpbuf.ar_bsp) + ;; + ld8.fill.nta r6=[r2],16 // r6 + ld8.fill.nta r7=[r3],16 // r7 + ;; + mov ar.unat=r16 // restore caller's unat + mov ar.fpsr=r17 // restore fpsr + ;; + ld8.nta r16=[r2],16 // b0 + ld8.nta r17=[r3],16 // b1 + ;; +(p8) ld8 r26=[r11] // r26 <- *ia64_rse_rnat_addr(jmpbuf.ar_bsp) + mov ar.bspstore=r23 // restore ar.bspstore + ;; + ld8.nta r18=[r2],16 // b2 + ld8.nta r19=[r3],16 // b3 + ;; + ld8.nta r20=[r2],16 // b4 + ld8.nta r21=[r3],16 // b5 + ;; + ld8.nta r11=[r2],16 // ar.pfs + ld8.nta r22=[r3],56 // ar.lc + ;; + ld8.nta r24=[r2],32 // pr + mov b0=r16 + ;; + ldf.fill.nta f2=[r2],32 + ldf.fill.nta f3=[r3],32 + mov b1=r17 + ;; + ldf.fill.nta f4=[r2],32 + ldf.fill.nta f5=[r3],32 + mov b2=r18 + ;; + ldf.fill.nta f16=[r2],32 + ldf.fill.nta f17=[r3],32 + mov b3=r19 + ;; + ldf.fill.nta f18=[r2],32 + ldf.fill.nta f19=[r3],32 + mov b4=r20 + ;; + ldf.fill.nta f20=[r2],32 + ldf.fill.nta f21=[r3],32 + mov b5=r21 + ;; + ldf.fill.nta f22=[r2],32 + ldf.fill.nta f23=[r3],32 + mov ar.lc=r22 + ;; + ldf.fill.nta f24=[r2],32 + ldf.fill.nta f25=[r3],32 + cmp.eq p8,p9=0,in1 + ;; + ldf.fill.nta f26=[r2],32 + ldf.fill.nta f27=[r3],32 + mov ar.pfs=r11 + ;; + ldf.fill.nta f28=[r2],32 + ldf.fill.nta f29=[r3],32 + ;; + ldf.fill.nta f30=[r2] + ldf.fill.nta f31=[r3] +(p8) mov r8=1 + + mov ar.rnat=r26 // restore ar.rnat + ;; + mov ar.rsc=r27 // restore ar.rsc +(p9) mov r8=in1 + + invala // virt. -> phys. regnum mapping may change + mov pr=r24,-1 + br.ret.sptk.few rp +END(kgdb_fault_longjmp) diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb.c linux-2.6.22-591/arch/ia64/kernel/kgdb.c --- linux-2.6.22-570/arch/ia64/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/ia64/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,944 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (C) 2000-2001 VERITAS Software Corporation. + * (c) Copyright 2005 Hewlett-Packard Development Company, L.P. + * Bob Picco + */ +/* + * Contributor: Lake Stevens Instrument Division$ + * Written by: Glenn Engel $ + * Updated by: Amit Kale + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Origianl kgdb, compatibility with 2.1.xx kernel by David Grothe + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include + +#define NUM_REGS 590 +#define REGISTER_BYTES (NUM_REGS*8+128*8) +#define REGISTER_BYTE(N) (((N) * 8) \ + + ((N) <= IA64_FR0_REGNUM ? \ + 0 : 8 * (((N) > IA64_FR127_REGNUM) ? 128 : (N) - IA64_FR0_REGNUM))) +#define REGISTER_SIZE(N) \ + (((N) >= IA64_FR0_REGNUM && (N) <= IA64_FR127_REGNUM) ? 16 : 8) +#define IA64_GR0_REGNUM 0 +#define IA64_FR0_REGNUM 128 +#define IA64_FR127_REGNUM (IA64_FR0_REGNUM+127) +#define IA64_PR0_REGNUM 256 +#define IA64_BR0_REGNUM 320 +#define IA64_VFP_REGNUM 328 +#define IA64_PR_REGNUM 330 +#define IA64_IP_REGNUM 331 +#define IA64_PSR_REGNUM 332 +#define IA64_CFM_REGNUM 333 +#define IA64_AR0_REGNUM 334 +#define IA64_NAT0_REGNUM 462 +#define IA64_NAT31_REGNUM (IA64_NAT0_REGNUM+31) +#define IA64_NAT32_REGNUM (IA64_NAT0_REGNUM+32) +#define IA64_RSC_REGNUM (IA64_AR0_REGNUM+16) +#define IA64_BSP_REGNUM (IA64_AR0_REGNUM+17) +#define IA64_BSPSTORE_REGNUM (IA64_AR0_REGNUM+18) +#define IA64_RNAT_REGNUM (IA64_AR0_REGNUM+19) +#define IA64_FCR_REGNUM (IA64_AR0_REGNUM+21) +#define IA64_EFLAG_REGNUM (IA64_AR0_REGNUM+24) +#define IA64_CSD_REGNUM (IA64_AR0_REGNUM+25) +#define IA64_SSD_REGNUM (IA64_AR0_REGNUM+26) +#define IA64_CFLG_REGNUM (IA64_AR0_REGNUM+27) +#define IA64_FSR_REGNUM (IA64_AR0_REGNUM+28) +#define IA64_FIR_REGNUM (IA64_AR0_REGNUM+29) +#define IA64_FDR_REGNUM (IA64_AR0_REGNUM+30) +#define IA64_CCV_REGNUM (IA64_AR0_REGNUM+32) +#define IA64_UNAT_REGNUM (IA64_AR0_REGNUM+36) +#define IA64_FPSR_REGNUM (IA64_AR0_REGNUM+40) +#define IA64_ITC_REGNUM (IA64_AR0_REGNUM+44) +#define IA64_PFS_REGNUM (IA64_AR0_REGNUM+64) +#define IA64_LC_REGNUM (IA64_AR0_REGNUM+65) +#define IA64_EC_REGNUM (IA64_AR0_REGNUM+66) + +#define REGISTER_INDEX(N) (REGISTER_BYTE(N) / sizeof (unsigned long)) +#define BREAK_INSTR_ALIGN (~0xfULL) + +#define ptoff(V) ((unsigned int) &((struct pt_regs *)0x0)->V) +struct reg_to_ptreg_index { + unsigned int reg; + unsigned int ptregoff; +}; + +static struct reg_to_ptreg_index gr_reg_to_ptreg_index[] = { + {IA64_GR0_REGNUM + 1, ptoff(r1)}, + {IA64_GR0_REGNUM + 2, ptoff(r2)}, + {IA64_GR0_REGNUM + 3, ptoff(r3)}, + {IA64_GR0_REGNUM + 8, ptoff(r8)}, + {IA64_GR0_REGNUM + 9, ptoff(r9)}, + {IA64_GR0_REGNUM + 10, ptoff(r10)}, + {IA64_GR0_REGNUM + 11, ptoff(r11)}, + {IA64_GR0_REGNUM + 12, ptoff(r12)}, + {IA64_GR0_REGNUM + 13, ptoff(r13)}, + {IA64_GR0_REGNUM + 14, ptoff(r14)}, + {IA64_GR0_REGNUM + 15, ptoff(r15)}, + {IA64_GR0_REGNUM + 16, ptoff(r16)}, + {IA64_GR0_REGNUM + 17, ptoff(r17)}, + {IA64_GR0_REGNUM + 18, ptoff(r18)}, + {IA64_GR0_REGNUM + 19, ptoff(r19)}, + {IA64_GR0_REGNUM + 20, ptoff(r20)}, + {IA64_GR0_REGNUM + 21, ptoff(r21)}, + {IA64_GR0_REGNUM + 22, ptoff(r22)}, + {IA64_GR0_REGNUM + 23, ptoff(r23)}, + {IA64_GR0_REGNUM + 24, ptoff(r24)}, + {IA64_GR0_REGNUM + 25, ptoff(r25)}, + {IA64_GR0_REGNUM + 26, ptoff(r26)}, + {IA64_GR0_REGNUM + 27, ptoff(r27)}, + {IA64_GR0_REGNUM + 28, ptoff(r28)}, + {IA64_GR0_REGNUM + 29, ptoff(r29)}, + {IA64_GR0_REGNUM + 30, ptoff(r30)}, + {IA64_GR0_REGNUM + 31, ptoff(r31)}, +}; + +static struct reg_to_ptreg_index br_reg_to_ptreg_index[] = { + {IA64_BR0_REGNUM, ptoff(b0)}, + {IA64_BR0_REGNUM + 6, ptoff(b6)}, + {IA64_BR0_REGNUM + 7, ptoff(b7)}, +}; + +static struct reg_to_ptreg_index ar_reg_to_ptreg_index[] = { + {IA64_PFS_REGNUM, ptoff(ar_pfs)}, + {IA64_UNAT_REGNUM, ptoff(ar_unat)}, + {IA64_RNAT_REGNUM, ptoff(ar_rnat)}, + {IA64_BSPSTORE_REGNUM, ptoff(ar_bspstore)}, + {IA64_RSC_REGNUM, ptoff(ar_rsc)}, + {IA64_CSD_REGNUM, ptoff(ar_csd)}, + {IA64_SSD_REGNUM, ptoff(ar_ssd)}, + {IA64_FPSR_REGNUM, ptoff(ar_fpsr)}, + {IA64_CCV_REGNUM, ptoff(ar_ccv)}, +}; + +extern atomic_t cpu_doing_single_step; + +static int kgdb_gr_reg(int regnum, struct unw_frame_info *info, + unsigned long *reg, int rw) +{ + char nat; + + if ((regnum >= IA64_GR0_REGNUM && regnum <= (IA64_GR0_REGNUM + 1)) || + (regnum >= (IA64_GR0_REGNUM + 4) && + regnum <= (IA64_GR0_REGNUM + 7))) + return !unw_access_gr(info, regnum - IA64_GR0_REGNUM, + reg, &nat, rw); + else + return 0; +} +static int kgdb_gr_ptreg(int regnum, struct pt_regs * ptregs, + struct unw_frame_info *info, unsigned long *reg, int rw) +{ + int i, result = 1; + char nat; + + if (!((regnum >= (IA64_GR0_REGNUM + 2) && + regnum <= (IA64_GR0_REGNUM + 3)) || + (regnum >= (IA64_GR0_REGNUM + 8) && + regnum <= (IA64_GR0_REGNUM + 15)) || + (regnum >= (IA64_GR0_REGNUM + 16) && + regnum <= (IA64_GR0_REGNUM + 31)))) + return 0; + else if (rw && ptregs) { + for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++) + if (gr_reg_to_ptreg_index[i].reg == regnum) { + *((unsigned long *)(((void *)ptregs) + + gr_reg_to_ptreg_index[i].ptregoff)) = *reg; + break; + } + } else if (!rw && ptregs) { + for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++) + if (gr_reg_to_ptreg_index[i].reg == regnum) { + *reg = *((unsigned long *) + (((void *)ptregs) + + gr_reg_to_ptreg_index[i].ptregoff)); + break; + } + } else + result = !unw_access_gr(info, regnum - IA64_GR0_REGNUM, + reg, &nat, rw); + return result; +} + +static int kgdb_br_reg(int regnum, struct pt_regs * ptregs, + struct unw_frame_info *info, unsigned long *reg, int rw) +{ + int i, result = 1; + + if (!(regnum >= IA64_BR0_REGNUM && regnum <= (IA64_BR0_REGNUM + 7))) + return 0; + + switch (regnum) { + case IA64_BR0_REGNUM: + case IA64_BR0_REGNUM + 6: + case IA64_BR0_REGNUM + 7: + if (rw) { + for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++) + if (br_reg_to_ptreg_index[i].reg == regnum) { + *((unsigned long *) + (((void *)ptregs) + + br_reg_to_ptreg_index[i].ptregoff)) = + *reg; + break; + } + } else + for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++) + if (br_reg_to_ptreg_index[i].reg == regnum) { + *reg = *((unsigned long *) + (((void *)ptregs) + + br_reg_to_ptreg_index[i]. + ptregoff)); + break; + } + break; + case IA64_BR0_REGNUM + 1: + case IA64_BR0_REGNUM + 2: + case IA64_BR0_REGNUM + 3: + case IA64_BR0_REGNUM + 4: + case IA64_BR0_REGNUM + 5: + result = !unw_access_br(info, regnum - IA64_BR0_REGNUM, + reg, rw); + break; + } + + return result; +} + +static int kgdb_fr_reg(int regnum, char *inbuffer, struct pt_regs * ptregs, + struct unw_frame_info *info, unsigned long *reg, + struct ia64_fpreg *freg, int rw) +{ + int result = 1; + + if (!(regnum >= IA64_FR0_REGNUM && regnum <= (IA64_FR0_REGNUM + 127))) + return 0; + + switch (regnum) { + case IA64_FR0_REGNUM + 6: + case IA64_FR0_REGNUM + 7: + case IA64_FR0_REGNUM + 8: + case IA64_FR0_REGNUM + 9: + case IA64_FR0_REGNUM + 10: + case IA64_FR0_REGNUM + 11: + case IA64_FR0_REGNUM + 12: + if (rw) { + char *ptr = inbuffer; + + freg->u.bits[0] = *reg; + kgdb_hex2long(&ptr, &freg->u.bits[1]); + *(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6))) = + *freg; + break; + } else if (!ptregs) + result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM, + freg, rw); + else + *freg = + *(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6))); + break; + default: + if (!rw) + result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM, + freg, rw); + else + result = 0; + break; + } + + return result; +} + +static int kgdb_ar_reg(int regnum, struct pt_regs * ptregs, + struct unw_frame_info *info, unsigned long *reg, int rw) +{ + int result = 0, i; + + if (!(regnum >= IA64_AR0_REGNUM && regnum <= IA64_EC_REGNUM)) + return 0; + + if (rw && ptregs) { + for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++) + if (ar_reg_to_ptreg_index[i].reg == regnum) { + *((unsigned long *) (((void *)ptregs) + + ar_reg_to_ptreg_index[i].ptregoff)) = + *reg; + result = 1; + break; + } + } else if (ptregs) { + for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++) + if (ar_reg_to_ptreg_index[i].reg == regnum) { + *reg = *((unsigned long *) (((void *)ptregs) + + ar_reg_to_ptreg_index[i].ptregoff)); + result = 1; + break; + } + } + + if (result) + return result; + + result = 1; + + switch (regnum) { + case IA64_CSD_REGNUM: + result = !unw_access_ar(info, UNW_AR_CSD, reg, rw); + break; + case IA64_SSD_REGNUM: + result = !unw_access_ar(info, UNW_AR_SSD, reg, rw); + break; + case IA64_UNAT_REGNUM: + result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); + break; + case IA64_RNAT_REGNUM: + result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); + break; + case IA64_BSPSTORE_REGNUM: + result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); + break; + case IA64_PFS_REGNUM: + result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); + break; + case IA64_LC_REGNUM: + result = !unw_access_ar(info, UNW_AR_LC, reg, rw); + break; + case IA64_EC_REGNUM: + result = !unw_access_ar(info, UNW_AR_EC, reg, rw); + break; + case IA64_FPSR_REGNUM: + result = !unw_access_ar(info, UNW_AR_FPSR, reg, rw); + break; + case IA64_RSC_REGNUM: + result = !unw_access_ar(info, UNW_AR_RSC, reg, rw); + break; + case IA64_CCV_REGNUM: + result = !unw_access_ar(info, UNW_AR_CCV, reg, rw); + break; + default: + result = 0; + } + + return result; +} + +void kgdb_get_reg(char *outbuffer, int regnum, struct unw_frame_info *info, + struct pt_regs *ptregs) +{ + unsigned long reg, size = 0, *mem = ® + struct ia64_fpreg freg; + + if (kgdb_gr_reg(regnum, info, ®, 0) || + kgdb_gr_ptreg(regnum, ptregs, info, ®, 0) || + kgdb_br_reg(regnum, ptregs, info, ®, 0) || + kgdb_ar_reg(regnum, ptregs, info, ®, 0)) + size = sizeof(reg); + else if (kgdb_fr_reg(regnum, NULL, ptregs, info, ®, &freg, 0)) { + size = sizeof(freg); + mem = (unsigned long *)&freg; + } else if (regnum == IA64_IP_REGNUM) { + if (!ptregs) { + unw_get_ip(info, ®); + size = sizeof(reg); + } else { + reg = ptregs->cr_iip; + size = sizeof(reg); + } + } else if (regnum == IA64_CFM_REGNUM) { + if (!ptregs) + unw_get_cfm(info, ®); + else + reg = ptregs->cr_ifs; + size = sizeof(reg); + } else if (regnum == IA64_PSR_REGNUM) { + if (!ptregs && kgdb_usethread) + ptregs = (struct pt_regs *) + ((unsigned long)kgdb_usethread + + IA64_STK_OFFSET) - 1; + if (ptregs) + reg = ptregs->cr_ipsr; + size = sizeof(reg); + } else if (regnum == IA64_PR_REGNUM) { + if (ptregs) + reg = ptregs->pr; + else + unw_access_pr(info, ®, 0); + size = sizeof(reg); + } else if (regnum == IA64_BSP_REGNUM) { + unw_get_bsp(info, ®); + size = sizeof(reg); + } + + if (size) { + kgdb_mem2hex((char *) mem, outbuffer, size); + outbuffer[size*2] = 0; + } + else + strcpy(outbuffer, "E0"); + + return; +} + +void kgdb_put_reg(char *inbuffer, char *outbuffer, int regnum, + struct unw_frame_info *info, struct pt_regs *ptregs) +{ + unsigned long reg; + struct ia64_fpreg freg; + char *ptr = inbuffer; + + kgdb_hex2long(&ptr, ®); + strcpy(outbuffer, "OK"); + + if (kgdb_gr_reg(regnum, info, ®, 1) || + kgdb_gr_ptreg(regnum, ptregs, info, ®, 1) || + kgdb_br_reg(regnum, ptregs, info, ®, 1) || + kgdb_fr_reg(regnum, inbuffer, ptregs, info, ®, &freg, 1) || + kgdb_ar_reg(regnum, ptregs, info, ®, 1)) ; + else if (regnum == IA64_IP_REGNUM) + ptregs->cr_iip = reg; + else if (regnum == IA64_CFM_REGNUM) + ptregs->cr_ifs = reg; + else if (regnum == IA64_PSR_REGNUM) + ptregs->cr_ipsr = reg; + else if (regnum == IA64_PR_REGNUM) + ptregs->pr = reg; + else + strcpy(outbuffer, "E01"); + return; +} + +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ +} + +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + +} + +#define MAX_HW_BREAKPOINT (20) +long hw_break_total_dbr, hw_break_total_ibr; +#define HW_BREAKPOINT (hw_break_total_dbr + hw_break_total_ibr) +#define WATCH_INSTRUCTION 0x0 +#define WATCH_WRITE 0x1 +#define WATCH_READ 0x2 +#define WATCH_ACCESS 0x3 + +#define HWCAP_DBR ((1 << WATCH_WRITE) | (1 << WATCH_READ)) +#define HWCAP_IBR (1 << WATCH_INSTRUCTION) +struct hw_breakpoint { + unsigned enabled; + unsigned long capable; + unsigned long type; + unsigned long mask; + unsigned long addr; +} *breakinfo; + +static struct hw_breakpoint hwbreaks[MAX_HW_BREAKPOINT]; + +enum instruction_type { A, I, M, F, B, L, X, u }; + +static enum instruction_type bundle_encoding[32][3] = { + {M, I, I}, /* 00 */ + {M, I, I}, /* 01 */ + {M, I, I}, /* 02 */ + {M, I, I}, /* 03 */ + {M, L, X}, /* 04 */ + {M, L, X}, /* 05 */ + {u, u, u}, /* 06 */ + {u, u, u}, /* 07 */ + {M, M, I}, /* 08 */ + {M, M, I}, /* 09 */ + {M, M, I}, /* 0A */ + {M, M, I}, /* 0B */ + {M, F, I}, /* 0C */ + {M, F, I}, /* 0D */ + {M, M, F}, /* 0E */ + {M, M, F}, /* 0F */ + {M, I, B}, /* 10 */ + {M, I, B}, /* 11 */ + {M, B, B}, /* 12 */ + {M, B, B}, /* 13 */ + {u, u, u}, /* 14 */ + {u, u, u}, /* 15 */ + {B, B, B}, /* 16 */ + {B, B, B}, /* 17 */ + {M, M, B}, /* 18 */ + {M, M, B}, /* 19 */ + {u, u, u}, /* 1A */ + {u, u, u}, /* 1B */ + {M, F, B}, /* 1C */ + {M, F, B}, /* 1D */ + {u, u, u}, /* 1E */ + {u, u, u}, /* 1F */ +}; + +int kgdb_validate_break_address(unsigned long addr) +{ + int error; + char tmp_variable[BREAK_INSTR_SIZE]; + error = kgdb_get_mem((char *)(addr & BREAK_INSTR_ALIGN), tmp_variable, + BREAK_INSTR_SIZE); + return error; +} + +int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) +{ + extern unsigned long _start[]; + unsigned long slot = addr & BREAK_INSTR_ALIGN, bundle_addr; + unsigned long template; + struct bundle { + struct { + unsigned long long template:5; + unsigned long long slot0:41; + unsigned long long slot1_p0:64 - 46; + } quad0; + struct { + unsigned long long slot1_p1:41 - (64 - 46); + unsigned long long slot2:41; + } quad1; + } bundle; + int ret; + + bundle_addr = addr & ~0xFULL; + + if (bundle_addr == (unsigned long)_start) + return 0; + + ret = kgdb_get_mem((char *)bundle_addr, (char *)&bundle, + BREAK_INSTR_SIZE); + if (ret < 0) + return ret; + + if (slot > 2) + slot = 0; + + memcpy(saved_instr, &bundle, BREAK_INSTR_SIZE); + template = bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + switch (slot) { + case 0: + bundle.quad0.slot0 = BREAKNUM; + break; + case 1: + bundle.quad0.slot1_p0 = BREAKNUM; + bundle.quad1.slot1_p1 = (BREAKNUM >> (64 - 46)); + break; + case 2: + bundle.quad1.slot2 = BREAKNUM; + break; + } + + return kgdb_set_mem((char *)bundle_addr, (char *)&bundle, + BREAK_INSTR_SIZE); +} + +int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) +{ + extern unsigned long _start[]; + + addr = addr & BREAK_INSTR_ALIGN; + if (addr == (unsigned long)_start) + return 0; + return kgdb_set_mem((char *)addr, (char *)bundle, BREAK_INSTR_SIZE); +} + +volatile static struct smp_unw { + struct unw_frame_info *unw; + struct task_struct *task; +} smp_unw[NR_CPUS]; + +static int inline kgdb_get_blocked_state(struct task_struct *p, + struct unw_frame_info *unw) +{ + unsigned long ip; + int count = 0; + + unw_init_from_blocked_task(unw, p); + ip = 0UL; + do { + if (unw_unwind(unw) < 0) + return -1; + unw_get_ip(unw, &ip); + if (!in_sched_functions(ip)) + break; + } while (count++ < 16); + + if (!ip) + return -1; + else + return 0; +} + +static void inline kgdb_wait(struct pt_regs *regs) +{ + unsigned long hw_breakpoint_status = ia64_getreg(_IA64_REG_PSR); + if (hw_breakpoint_status & IA64_PSR_DB) + ia64_setreg(_IA64_REG_PSR_L, + hw_breakpoint_status ^ IA64_PSR_DB); + kgdb_nmihook(smp_processor_id(), regs); + if (hw_breakpoint_status & IA64_PSR_DB) + ia64_setreg(_IA64_REG_PSR_L, hw_breakpoint_status); + + return; +} + +static void inline normalize(struct unw_frame_info *running, + struct pt_regs *regs) +{ + unsigned long sp; + + do { + unw_get_sp(running, &sp); + if ((sp + 0x10) >= (unsigned long)regs) + break; + } while (unw_unwind(running) >= 0); + + return; +} + +static void kgdb_init_running(struct unw_frame_info *unw, void *data) +{ + struct pt_regs *regs; + + regs = data; + normalize(unw, regs); + smp_unw[smp_processor_id()].unw = unw; + kgdb_wait(regs); +} + +void kgdb_wait_ipi(struct pt_regs *regs) +{ + struct unw_frame_info unw; + + smp_unw[smp_processor_id()].task = current; + + if (user_mode(regs)) { + smp_unw[smp_processor_id()].unw = (struct unw_frame_info *)1; + kgdb_wait(regs); + } else { + if (current->state == TASK_RUNNING) + unw_init_running(kgdb_init_running, regs); + else { + if (kgdb_get_blocked_state(current, &unw)) + smp_unw[smp_processor_id()].unw = + (struct unw_frame_info *)1; + else + smp_unw[smp_processor_id()].unw = &unw; + kgdb_wait(regs); + } + } + + smp_unw[smp_processor_id()].unw = NULL; + return; +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + if (num_online_cpus() > 1) + smp_send_nmi_allbutself(); +} + +static volatile int kgdb_hwbreak_sstep[NR_CPUS]; + +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = ptr; + struct pt_regs *regs = args->regs; + unsigned long err = args->err; + + switch (cmd) { + default: + return NOTIFY_DONE; + case DIE_PAGE_FAULT_NO_CONTEXT: + if (atomic_read(&debugger_active) && kgdb_may_fault) { + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + return NOTIFY_STOP; + } + break; + case DIE_BREAK: + if (user_mode(regs) || err == 0x80001) + return NOTIFY_DONE; + break; + case DIE_FAULT: + if (user_mode(regs)) + return NOTIFY_DONE; + else if (err == 36 && kgdb_hwbreak_sstep[smp_processor_id()]) { + kgdb_hwbreak_sstep[smp_processor_id()] = 0; + regs->cr_ipsr &= ~IA64_PSR_SS; + return NOTIFY_STOP; + } + case DIE_MCA_MONARCH_PROCESS: + case DIE_INIT_MONARCH_PROCESS: + break; + } + + kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, +}; + +int kgdb_arch_init(void) +{ + atomic_notifier_chain_register(&ia64die_chain, &kgdb_notifier); + return 0; +} + +static void do_kgdb_handle_exception(struct unw_frame_info *, void *data); + +struct kgdb_state { + int e_vector; + int signo; + unsigned long err_code; + struct pt_regs *regs; + struct unw_frame_info *unw; + char *inbuf; + char *outbuf; + int unwind; + int ret; +}; + +static void inline kgdb_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->cr_iip = pc & ~0xf; + ia64_psr(regs)->ri = pc & 0x3; + return; +} + +int kgdb_arch_handle_exception(int e_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + struct kgdb_state info; + + info.e_vector = e_vector; + info.signo = signo; + info.err_code = err_code; + info.unw = (void *)0; + info.inbuf = remcom_in_buffer; + info.outbuf = remcom_out_buffer; + info.unwind = 0; + info.ret = -1; + + if (remcom_in_buffer[0] == 'c' || remcom_in_buffer[0] == 's') { + info.regs = linux_regs; + do_kgdb_handle_exception(NULL, &info); + } else if (kgdb_usethread == current) { + info.regs = linux_regs; + info.unwind = 1; + unw_init_running(do_kgdb_handle_exception, &info); + } else if (kgdb_usethread->state != TASK_RUNNING) { + struct unw_frame_info unw_info; + + if (kgdb_get_blocked_state(kgdb_usethread, &unw_info)) { + info.ret = 1; + goto bad; + } + info.regs = NULL; + do_kgdb_handle_exception(&unw_info, &info); + } else { + int i; + + for (i = 0; i < NR_CPUS; i++) + if (smp_unw[i].task == kgdb_usethread && smp_unw[i].unw + && smp_unw[i].unw != (struct unw_frame_info *)1) { + info.regs = NULL; + do_kgdb_handle_exception(smp_unw[i].unw, &info); + break; + } else { + info.ret = 1; + goto bad; + } + } + + bad: + if (info.ret != -1 && remcom_in_buffer[0] == 'p') { + unsigned long bad = 0xbad4badbadbadbadUL; + + printk("kgdb_arch_handle_exception: p packet bad (%s)\n", + remcom_in_buffer); + kgdb_mem2hex((char *)&bad, remcom_out_buffer, sizeof(bad)); + remcom_out_buffer[sizeof(bad) * 2] = 0; + info.ret = -1; + } + return info.ret; +} + +/* + * This is done because I evidently made an incorrect 'p' encoding + * when my patch for gdb was committed. It was later corrected. This + * check supports both my wrong encoding of the register number and + * the correct encoding. Eventually this should be eliminated and + * kgdb_hex2long should be demarshalling the regnum. + */ +static inline int check_packet(unsigned int regnum, char *packet) +{ + static int check_done, swap; + unsigned long reglong; + + if (likely(check_done)) { + if (swap) { + kgdb_hex2long(&packet, ®long); + regnum = (int) reglong; + } + + } else { + if (regnum > NUM_REGS) { + kgdb_hex2long(&packet, ®long); + regnum = (int) reglong; + swap = 1; + } + check_done = 1; + } + return regnum; +} + +static void do_kgdb_handle_exception(struct unw_frame_info *unw_info, + void *data) +{ + long addr; + char *ptr; + unsigned long newPC; + int e_vector, signo; + unsigned long err_code; + struct pt_regs *linux_regs; + struct kgdb_state *info; + char *remcom_in_buffer, *remcom_out_buffer; + + info = data; + info->unw = unw_info; + e_vector = info->e_vector; + signo = info->signo; + err_code = info->err_code; + remcom_in_buffer = info->inbuf; + remcom_out_buffer = info->outbuf; + linux_regs = info->regs; + + if (info->unwind) + normalize(unw_info, linux_regs); + + switch (remcom_in_buffer[0]) { + case 'p': + { + unsigned int regnum; + + kgdb_hex2mem(&remcom_in_buffer[1], (char *)®num, + sizeof(regnum)); + regnum = check_packet(regnum, &remcom_in_buffer[1]); + if (regnum >= NUM_REGS) { + remcom_out_buffer[0] = 'E'; + remcom_out_buffer[1] = 0; + } else + kgdb_get_reg(remcom_out_buffer, regnum, + unw_info, linux_regs); + break; + } + case 'P': + { + unsigned int regno; + long v; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if ((!kgdb_usethread || kgdb_usethread == current) && + kgdb_hex2long(&ptr, &v) && + *ptr++ == '=' && (v >= 0)) { + regno = (unsigned int)v; + regno = (regno >= NUM_REGS ? 0 : regno); + kgdb_put_reg(ptr, remcom_out_buffer, regno, + unw_info, linux_regs); + } else + strcpy(remcom_out_buffer, "E01"); + break; + } + case 'c': + case 's': + if (e_vector == TRAP_BRKPT && err_code == KGDBBREAKNUM) { + if (ia64_psr(linux_regs)->ri < 2) + kgdb_pc(linux_regs, linux_regs->cr_iip + + ia64_psr(linux_regs)->ri + 1); + else + kgdb_pc(linux_regs, linux_regs->cr_iip + 16); + } + + /* try to read optional parameter, pc unchanged if no parm */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) { + linux_regs->cr_iip = addr; + } + newPC = linux_regs->cr_iip; + + /* clear the trace bit */ + linux_regs->cr_ipsr &= ~IA64_PSR_SS; + + atomic_set(&cpu_doing_single_step, -1); + + /* set the trace bit if we're stepping or took a hardware break */ + if (remcom_in_buffer[0] == 's' || e_vector == TRAP_HWBKPT) { + linux_regs->cr_ipsr |= IA64_PSR_SS; + debugger_step = 1; + if (kgdb_contthread) + atomic_set(&cpu_doing_single_step, + smp_processor_id()); + } + + kgdb_correct_hw_break(); + + /* if not hardware breakpoint, then reenable them */ + if (e_vector != TRAP_HWBKPT) + linux_regs->cr_ipsr |= IA64_PSR_DB; + else { + kgdb_hwbreak_sstep[smp_processor_id()] = 1; + linux_regs->cr_ipsr &= ~IA64_PSR_DB; + } + + info->ret = 0; + break; + default: + break; + } + + return; +} + +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0xcc}, +}; diff -Nurb linux-2.6.22-570/arch/ia64/kernel/smp.c linux-2.6.22-591/arch/ia64/kernel/smp.c --- linux-2.6.22-570/arch/ia64/kernel/smp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ia64/kernel/smp.c 2007-12-21 15:36:11.000000000 -0500 @@ -48,6 +48,7 @@ #include #include #include +#include /* * Note: alignment of 4 entries/cacheline was empirically determined @@ -79,6 +80,9 @@ #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#ifdef CONFIG_KGDB +#define IPI_KGDB_INTERRUPT 2 +#endif #define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ @@ -169,6 +173,11 @@ case IPI_CPU_STOP: stop_this_cpu(); break; +#ifdef CONFIG_KGDB + case IPI_KGDB_INTERRUPT: + kgdb_wait_ipi(get_irq_regs()); + break; +#endif #ifdef CONFIG_KEXEC case IPI_KDUMP_CPU_STOP: unw_init_running(kdump_cpu_freeze, NULL); @@ -399,6 +408,14 @@ } EXPORT_SYMBOL(smp_call_function_single); +#ifdef CONFIG_KGDB +void +smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(IPI_KGDB_INTERRUPT); +} +#endif + /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. diff -Nurb linux-2.6.22-570/arch/ia64/kernel/traps.c linux-2.6.22-591/arch/ia64/kernel/traps.c --- linux-2.6.22-570/arch/ia64/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/ia64/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 @@ -155,8 +155,12 @@ break; default: - if (break_num < 0x40000 || break_num > 0x100000) + if (break_num < 0x40000 || break_num > 0x100000) { + if (notify_die(DIE_BREAK, "bad break", regs, + break_num, TRAP_BRKPT, SIGTRAP) == NOTIFY_STOP) + return; die_if_kernel("Bad break", regs, break_num); + } if (break_num < 0x80000) { sig = SIGILL; code = __ILL_BREAK; diff -Nurb linux-2.6.22-570/arch/ia64/mm/extable.c linux-2.6.22-591/arch/ia64/mm/extable.c --- linux-2.6.22-570/arch/ia64/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ia64/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 @@ -6,6 +6,7 @@ */ #include +#include #include #include @@ -73,6 +74,11 @@ else last = mid - 1; } +#ifdef CONFIG_KGDB + if (atomic_read(&debugger_active) && kgdb_may_fault) + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + /* Not reached. */ +#endif return NULL; } diff -Nurb linux-2.6.22-570/arch/ia64/mm/fault.c linux-2.6.22-591/arch/ia64/mm/fault.c --- linux-2.6.22-570/arch/ia64/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/ia64/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 @@ -255,6 +255,10 @@ */ bust_spinlocks(1); + if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs, + isr, 14, SIGSEGV) == NOTIFY_STOP) + return; + if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); else diff -Nurb linux-2.6.22-570/arch/mips/Kconfig linux-2.6.22-591/arch/mips/Kconfig --- linux-2.6.22-570/arch/mips/Kconfig 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/mips/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -30,7 +30,6 @@ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB help The eXcite is a smart camera platform manufactured by Basler Vision Technologies AG. @@ -98,7 +97,6 @@ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB help This is an evaluation board based on the Galileo GT-64120 single-chip system controller that contains a MIPS R5000 compatible @@ -269,7 +267,6 @@ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB help The Ocelot is a MIPS-based Single Board Computer (SBC) made by Momentum Computer . @@ -331,8 +328,6 @@ select SYS_HAS_CPU_R5432 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL if EXPERIMENTAL - select SYS_SUPPORTS_KGDB - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_LITTLE_ENDIAN help This enables support for the R5432-based NEC DDB Vrc-5477, @@ -360,7 +355,6 @@ select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_SMP help Yosemite is an evaluation board for the RM9000x2 processor @@ -440,7 +434,6 @@ select SYS_HAS_CPU_R10000 select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_SMP select GENERIC_HARDIRQS_NO__DO_IRQ @@ -490,7 +483,6 @@ select SYS_HAS_CPU_SB1 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM - select SYS_SUPPORTS_KGDB select SYS_SUPPORTS_LITTLE_ENDIAN config SIBYTE_SENTOSA @@ -631,7 +623,6 @@ select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB select GENERIC_HARDIRQS_NO__DO_IRQ help This Toshiba board is based on the TX4927 processor. Say Y here to @@ -650,7 +641,6 @@ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_KGDB select GENERIC_HARDIRQS_NO__DO_IRQ help This Toshiba board is based on the TX4938 processor. Say Y here to @@ -826,7 +816,6 @@ config DDB5XXX_COMMON bool - select SYS_SUPPORTS_KGDB config MIPS_BOARDS_GEN bool @@ -862,7 +851,6 @@ select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select GENERIC_HARDIRQS_NO__DO_IRQ - select SYS_SUPPORTS_KGDB config SWAP_IO_SPACE bool diff -Nurb linux-2.6.22-570/arch/mips/Kconfig.debug linux-2.6.22-591/arch/mips/Kconfig.debug --- linux-2.6.22-570/arch/mips/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 @@ -46,28 +46,6 @@ arch/mips/kernel/smtc.c. This debugging option result in significant overhead so should be disabled in production kernels. -config KGDB - bool "Remote GDB kernel debugging" - depends on DEBUG_KERNEL && SYS_SUPPORTS_KGDB - select DEBUG_INFO - help - If you say Y here, it will be possible to remotely debug the MIPS - kernel using gdb. This enlarges your kernel image disk size by - several megabytes and requires a machine with more than 16 MB, - better 32 MB RAM to avoid excessive linking time. This is only - useful for kernel hackers. If unsure, say N. - -config SYS_SUPPORTS_KGDB - bool - -config GDB_CONSOLE - bool "Console output to GDB" - depends on KGDB - help - If you are using GDB for remote debugging over a serial port and - would like kernel messages to be formatted into GDB $O packets so - that GDB prints them as program output, say 'Y'. - config SB1XXX_CORELIS bool "Corelis Debugger" depends on SIBYTE_SB1xxx_SOC diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/Makefile linux-2.6.22-591/arch/mips/au1000/common/Makefile --- linux-2.6.22-570/arch/mips/au1000/common/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/au1000/common/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -10,5 +10,4 @@ au1xxx_irqmap.o clocks.o platform.o power.o setup.o \ sleeper.o cputable.o dma.o dbdma.o gpio.o -obj-$(CONFIG_KGDB) += dbg_io.o obj-$(CONFIG_PCI) += pci.o diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c linux-2.6.22-591/arch/mips/au1000/common/dbg_io.c --- linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/au1000/common/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,121 +0,0 @@ - -#include -#include - -#ifdef CONFIG_KGDB - -/* - * FIXME the user should be able to select the - * uart to be used for debugging. - */ -#define DEBUG_BASE UART_DEBUG_BASE -/**/ - -/* we need uint32 uint8 */ -/* #include "types.h" */ -typedef unsigned char uint8; -typedef unsigned int uint32; - -#define UART16550_BAUD_2400 2400 -#define UART16550_BAUD_4800 4800 -#define UART16550_BAUD_9600 9600 -#define UART16550_BAUD_19200 19200 -#define UART16550_BAUD_38400 38400 -#define UART16550_BAUD_57600 57600 -#define UART16550_BAUD_115200 115200 - -#define UART16550_PARITY_NONE 0 -#define UART16550_PARITY_ODD 0x08 -#define UART16550_PARITY_EVEN 0x18 -#define UART16550_PARITY_MARK 0x28 -#define UART16550_PARITY_SPACE 0x38 - -#define UART16550_DATA_5BIT 0x0 -#define UART16550_DATA_6BIT 0x1 -#define UART16550_DATA_7BIT 0x2 -#define UART16550_DATA_8BIT 0x3 - -#define UART16550_STOP_1BIT 0x0 -#define UART16550_STOP_2BIT 0x4 - - -#define UART_RX 0 /* Receive buffer */ -#define UART_TX 4 /* Transmit buffer */ -#define UART_IER 8 /* Interrupt Enable Register */ -#define UART_IIR 0xC /* Interrupt ID Register */ -#define UART_FCR 0x10 /* FIFO Control Register */ -#define UART_LCR 0x14 /* Line Control Register */ -#define UART_MCR 0x18 /* Modem Control Register */ -#define UART_LSR 0x1C /* Line Status Register */ -#define UART_MSR 0x20 /* Modem Status Register */ -#define UART_CLK 0x28 /* Baud Rat4e Clock Divider */ -#define UART_MOD_CNTRL 0x100 /* Module Control */ - -/* memory-mapped read/write of the port */ -#define UART16550_READ(y) (au_readl(DEBUG_BASE + y) & 0xff) -#define UART16550_WRITE(y,z) (au_writel(z&0xff, DEBUG_BASE + y)) - -extern unsigned long get_au1x00_uart_baud_base(void); -extern unsigned long cal_r4koff(void); - -void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) -{ - - if (UART16550_READ(UART_MOD_CNTRL) != 0x3) { - UART16550_WRITE(UART_MOD_CNTRL, 3); - } - cal_r4koff(); - - /* disable interrupts */ - UART16550_WRITE(UART_IER, 0); - - /* set up baud rate */ - { - uint32 divisor; - - /* set divisor */ - divisor = get_au1x00_uart_baud_base() / baud; - UART16550_WRITE(UART_CLK, divisor & 0xffff); - } - - /* set data format */ - UART16550_WRITE(UART_LCR, (data | parity | stop)); -} - -static int remoteDebugInitialized = 0; - -uint8 getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_115200, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, - UART16550_STOP_1BIT); - } - - while((UART16550_READ(UART_LSR) & 0x1) == 0); - return UART16550_READ(UART_RX); -} - - -int putDebugChar(uint8 byte) -{ -// int i; - - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_115200, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, - UART16550_STOP_1BIT); - } - - while ((UART16550_READ(UART_LSR)&0x40) == 0); - UART16550_WRITE(UART_TX, byte); - //for (i=0;i<0xfff;i++); - - return 1; -} - -#endif diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/Makefile linux-2.6.22-591/arch/mips/basler/excite/Makefile --- linux-2.6.22-570/arch/mips/basler/excite/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/basler/excite/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -5,5 +5,4 @@ obj-$(CONFIG_BASLER_EXCITE) += excite_irq.o excite_prom.o excite_setup.o \ excite_device.o excite_procfs.o -obj-$(CONFIG_KGDB) += excite_dbg_io.o obj-m += excite_iodev.o diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c linux-2.6.22-591/arch/mips/basler/excite/excite_dbg_io.c --- linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/basler/excite/excite_dbg_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2004 by Basler Vision Technologies AG - * Author: Thomas Koeller - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_SERIAL_8250) && CONFIG_SERIAL_8250_NR_UARTS > 1 -#error Debug port used by serial driver -#endif - -#define UART_CLK 25000000 -#define BASE_BAUD (UART_CLK / 16) -#define REGISTER_BASE_0 0x0208UL -#define REGISTER_BASE_1 0x0238UL - -#define REGISTER_BASE_DBG REGISTER_BASE_1 - -#define CPRR 0x0004 -#define UACFG 0x0200 -#define UAINTS 0x0204 -#define UARBR (REGISTER_BASE_DBG + 0x0000) -#define UATHR (REGISTER_BASE_DBG + 0x0004) -#define UADLL (REGISTER_BASE_DBG + 0x0008) -#define UAIER (REGISTER_BASE_DBG + 0x000c) -#define UADLH (REGISTER_BASE_DBG + 0x0010) -#define UAIIR (REGISTER_BASE_DBG + 0x0014) -#define UAFCR (REGISTER_BASE_DBG + 0x0018) -#define UALCR (REGISTER_BASE_DBG + 0x001c) -#define UAMCR (REGISTER_BASE_DBG + 0x0020) -#define UALSR (REGISTER_BASE_DBG + 0x0024) -#define UAMSR (REGISTER_BASE_DBG + 0x0028) -#define UASCR (REGISTER_BASE_DBG + 0x002c) - -#define PARITY_NONE 0 -#define PARITY_ODD 0x08 -#define PARITY_EVEN 0x18 -#define PARITY_MARK 0x28 -#define PARITY_SPACE 0x38 - -#define DATA_5BIT 0x0 -#define DATA_6BIT 0x1 -#define DATA_7BIT 0x2 -#define DATA_8BIT 0x3 - -#define STOP_1BIT 0x0 -#define STOP_2BIT 0x4 - -#define BAUD_DBG 57600 -#define PARITY_DBG PARITY_NONE -#define DATA_DBG DATA_8BIT -#define STOP_DBG STOP_1BIT - -/* Initialize the serial port for KGDB debugging */ -void __init excite_kgdb_init(void) -{ - const u32 divisor = BASE_BAUD / BAUD_DBG; - - /* Take the UART out of reset */ - titan_writel(0x00ff1cff, CPRR); - titan_writel(0x00000000, UACFG); - titan_writel(0x00000002, UACFG); - - titan_writel(0x0, UALCR); - titan_writel(0x0, UAIER); - - /* Disable FIFOs */ - titan_writel(0x00, UAFCR); - - titan_writel(0x80, UALCR); - titan_writel(divisor & 0xff, UADLL); - titan_writel((divisor & 0xff00) >> 8, UADLH); - titan_writel(0x0, UALCR); - - titan_writel(DATA_DBG | PARITY_DBG | STOP_DBG, UALCR); - - /* Enable receiver interrupt */ - titan_readl(UARBR); - titan_writel(0x1, UAIER); -} - -int getDebugChar(void) -{ - while (!(titan_readl(UALSR) & 0x1)); - return titan_readl(UARBR); -} - -int putDebugChar(int data) -{ - while (!(titan_readl(UALSR) & 0x20)); - titan_writel(data, UATHR); - return 1; -} - -/* KGDB interrupt handler */ -asmlinkage void excite_kgdb_inthdl(void) -{ - if (unlikely( - ((titan_readl(UAIIR) & 0x7) == 4) - && ((titan_readl(UARBR) & 0xff) == 0x3))) - set_async_breakpoint(®s->cp0_epc); -} diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c linux-2.6.22-591/arch/mips/basler/excite/excite_irq.c --- linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/basler/excite/excite_irq.c 2007-12-21 15:36:11.000000000 -0500 @@ -50,10 +50,6 @@ mips_cpu_irq_init(); rm7k_cpu_irq_init(); rm9k_cpu_irq_init(); - -#ifdef CONFIG_KGDB - excite_kgdb_init(); -#endif } asmlinkage void plat_irq_dispatch(void) @@ -90,9 +86,6 @@ msgint = msgintflags & msgintmask & (0x1 << (TITAN_MSGINT % 0x20)); if ((pending & (1 << TITAN_IRQ)) && msgint) { ocd_writel(msgint, INTP0Clear0 + (TITAN_MSGINT / 0x20 * 0x10)); -#if defined(CONFIG_KGDB) - excite_kgdb_inthdl(); -#endif do_IRQ(TITAN_IRQ); return; } diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c linux-2.6.22-591/arch/mips/basler/excite/excite_setup.c --- linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/basler/excite/excite_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -95,13 +95,13 @@ /* Take the DUART out of reset */ titan_writel(0x00ff1cff, CPRR); -#if defined(CONFIG_KGDB) || (CONFIG_SERIAL_8250_NR_UARTS > 1) +#if (CONFIG_SERIAL_8250_NR_UARTS > 1) /* Enable both ports */ titan_writel(MASK_SER0 | MASK_SER1, UACFG); #else /* Enable port #0 only */ titan_writel(MASK_SER0, UACFG); -#endif /* defined(CONFIG_KGDB) */ +#endif /* * Set up serial port #0. Do not use autodetection; the result is diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/Makefile --- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -5,4 +5,3 @@ obj-y += irq.o irq_5477.o setup.o lcd44780.o obj-$(CONFIG_RUNTIME_DEBUG) += debug.o -obj-$(CONFIG_KGDB) += kgdb_io.o diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/kgdb_io.c --- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/kgdb_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,136 +0,0 @@ -/* - * kgdb io functions for DDB5477. We use the second serial port (upper one). - * - * Copyright (C) 2001 MontaVista Software Inc. - * Author: jsun@mvista.com or jsun@junsun.net - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -/* ======================= CONFIG ======================== */ - -/* [jsun] we use the second serial port for kdb */ -#define BASE 0xbfa04240 -#define MAX_BAUD 115200 - -/* distance in bytes between two serial registers */ -#define REG_OFFSET 8 - -/* - * 0 - kgdb does serial init - * 1 - kgdb skip serial init - */ -static int remoteDebugInitialized = 0; - -/* - * the default baud rate *if* kgdb does serial init - */ -#define BAUD_DEFAULT UART16550_BAUD_38400 - -/* ======================= END OF CONFIG ======================== */ - -typedef unsigned char uint8; -typedef unsigned int uint32; - -#define UART16550_BAUD_2400 2400 -#define UART16550_BAUD_4800 4800 -#define UART16550_BAUD_9600 9600 -#define UART16550_BAUD_19200 19200 -#define UART16550_BAUD_38400 38400 -#define UART16550_BAUD_57600 57600 -#define UART16550_BAUD_115200 115200 - -#define UART16550_PARITY_NONE 0 -#define UART16550_PARITY_ODD 0x08 -#define UART16550_PARITY_EVEN 0x18 -#define UART16550_PARITY_MARK 0x28 -#define UART16550_PARITY_SPACE 0x38 - -#define UART16550_DATA_5BIT 0x0 -#define UART16550_DATA_6BIT 0x1 -#define UART16550_DATA_7BIT 0x2 -#define UART16550_DATA_8BIT 0x3 - -#define UART16550_STOP_1BIT 0x0 -#define UART16550_STOP_2BIT 0x4 - -/* register offset */ -#define OFS_RCV_BUFFER 0 -#define OFS_TRANS_HOLD 0 -#define OFS_SEND_BUFFER 0 -#define OFS_INTR_ENABLE (1*REG_OFFSET) -#define OFS_INTR_ID (2*REG_OFFSET) -#define OFS_DATA_FORMAT (3*REG_OFFSET) -#define OFS_LINE_CONTROL (3*REG_OFFSET) -#define OFS_MODEM_CONTROL (4*REG_OFFSET) -#define OFS_RS232_OUTPUT (4*REG_OFFSET) -#define OFS_LINE_STATUS (5*REG_OFFSET) -#define OFS_MODEM_STATUS (6*REG_OFFSET) -#define OFS_RS232_INPUT (6*REG_OFFSET) -#define OFS_SCRATCH_PAD (7*REG_OFFSET) - -#define OFS_DIVISOR_LSB (0*REG_OFFSET) -#define OFS_DIVISOR_MSB (1*REG_OFFSET) - - -/* memory-mapped read/write of the port */ -#define UART16550_READ(y) (*((volatile uint8*)(BASE + y))) -#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z) - -void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) -{ - /* disable interrupts */ - UART16550_WRITE(OFS_INTR_ENABLE, 0); - - /* set up baud rate */ - { - uint32 divisor; - - /* set DIAB bit */ - UART16550_WRITE(OFS_LINE_CONTROL, 0x80); - - /* set divisor */ - divisor = MAX_BAUD / baud; - UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff); - UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8); - - /* clear DIAB bit */ - UART16550_WRITE(OFS_LINE_CONTROL, 0x0); - } - - /* set data format */ - UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop); -} - - -uint8 getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(BAUD_DEFAULT, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, UART16550_STOP_1BIT); - } - - while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0); - return UART16550_READ(OFS_RCV_BUFFER); -} - - -int putDebugChar(uint8 byte) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(BAUD_DEFAULT, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, UART16550_STOP_1BIT); - } - - while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0); - UART16550_WRITE(OFS_SEND_BUFFER, byte); - return 1; -} diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/Makefile --- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -3,5 +3,3 @@ # obj-y += irq.o prom.o reset.o setup.o - -obj-$(CONFIG_KGDB) += dbg_io.o diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/dbg_io.c --- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,121 +0,0 @@ - -#include /* For the serial port location and base baud */ - -/* --- CONFIG --- */ - -typedef unsigned char uint8; -typedef unsigned int uint32; - -/* --- END OF CONFIG --- */ - -#define UART16550_BAUD_2400 2400 -#define UART16550_BAUD_4800 4800 -#define UART16550_BAUD_9600 9600 -#define UART16550_BAUD_19200 19200 -#define UART16550_BAUD_38400 38400 -#define UART16550_BAUD_57600 57600 -#define UART16550_BAUD_115200 115200 - -#define UART16550_PARITY_NONE 0 -#define UART16550_PARITY_ODD 0x08 -#define UART16550_PARITY_EVEN 0x18 -#define UART16550_PARITY_MARK 0x28 -#define UART16550_PARITY_SPACE 0x38 - -#define UART16550_DATA_5BIT 0x0 -#define UART16550_DATA_6BIT 0x1 -#define UART16550_DATA_7BIT 0x2 -#define UART16550_DATA_8BIT 0x3 - -#define UART16550_STOP_1BIT 0x0 -#define UART16550_STOP_2BIT 0x4 - -/* ----------------------------------------------------- */ - -/* === CONFIG === */ - -/* [jsun] we use the second serial port for kdb */ -#define BASE OCELOT_SERIAL1_BASE -#define MAX_BAUD OCELOT_BASE_BAUD - -/* === END OF CONFIG === */ - -#define REG_OFFSET 4 - -/* register offset */ -#define OFS_RCV_BUFFER 0 -#define OFS_TRANS_HOLD 0 -#define OFS_SEND_BUFFER 0 -#define OFS_INTR_ENABLE (1*REG_OFFSET) -#define OFS_INTR_ID (2*REG_OFFSET) -#define OFS_DATA_FORMAT (3*REG_OFFSET) -#define OFS_LINE_CONTROL (3*REG_OFFSET) -#define OFS_MODEM_CONTROL (4*REG_OFFSET) -#define OFS_RS232_OUTPUT (4*REG_OFFSET) -#define OFS_LINE_STATUS (5*REG_OFFSET) -#define OFS_MODEM_STATUS (6*REG_OFFSET) -#define OFS_RS232_INPUT (6*REG_OFFSET) -#define OFS_SCRATCH_PAD (7*REG_OFFSET) - -#define OFS_DIVISOR_LSB (0*REG_OFFSET) -#define OFS_DIVISOR_MSB (1*REG_OFFSET) - - -/* memory-mapped read/write of the port */ -#define UART16550_READ(y) (*((volatile uint8*)(BASE + y))) -#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z) - -void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) -{ - /* disable interrupts */ - UART16550_WRITE(OFS_INTR_ENABLE, 0); - - /* set up baud rate */ - { - uint32 divisor; - - /* set DIAB bit */ - UART16550_WRITE(OFS_LINE_CONTROL, 0x80); - - /* set divisor */ - divisor = MAX_BAUD / baud; - UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff); - UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8); - - /* clear DIAB bit */ - UART16550_WRITE(OFS_LINE_CONTROL, 0x0); - } - - /* set data format */ - UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop); -} - -static int remoteDebugInitialized = 0; - -uint8 getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_38400, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, UART16550_STOP_1BIT); - } - - while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0); - return UART16550_READ(OFS_RCV_BUFFER); -} - - -int putDebugChar(uint8 byte) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_38400, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, UART16550_STOP_1BIT); - } - - while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0); - UART16550_WRITE(OFS_SEND_BUFFER, byte); - return 1; -} diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/Makefile --- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -3,4 +3,3 @@ # obj-y += init.o irq.o setup.o -obj-$(CONFIG_KGDB) += kgdb_io.o diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/kgdb_io.c --- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/kgdb_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,105 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * Low level uart routines to directly access a TX[34]927 SIO. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ahennessy@mvista.com or source@mvista.com - * - * Based on arch/mips/ddb5xxx/ddb5477/kgdb_io.c - * - * Copyright (C) 2000-2001 Toshiba Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -#define TIMEOUT 0xffffff - -static int remoteDebugInitialized = 0; -static void debugInit(int baud); - -int putDebugChar(unsigned char c) -{ - int i = 0; - - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(38400); - } - - do { - slow_down(); - i++; - if (i>TIMEOUT) { - break; - } - } while (!(tx3927_sioptr(0)->cisr & TXx927_SICISR_TXALS)); - tx3927_sioptr(0)->tfifo = c; - - return 1; -} - -unsigned char getDebugChar(void) -{ - int i = 0; - int dicr; - char c; - - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(38400); - } - - /* diable RX int. */ - dicr = tx3927_sioptr(0)->dicr; - tx3927_sioptr(0)->dicr = 0; - - do { - slow_down(); - i++; - if (i>TIMEOUT) { - break; - } - } while (tx3927_sioptr(0)->disr & TXx927_SIDISR_UVALID) - ; - c = tx3927_sioptr(0)->rfifo; - - /* clear RX int. status */ - tx3927_sioptr(0)->disr &= ~TXx927_SIDISR_RDIS; - /* enable RX int. */ - tx3927_sioptr(0)->dicr = dicr; - - return c; -} - -static void debugInit(int baud) -{ - tx3927_sioptr(0)->lcr = 0x020; - tx3927_sioptr(0)->dicr = 0; - tx3927_sioptr(0)->disr = 0x4100; - tx3927_sioptr(0)->cisr = 0x014; - tx3927_sioptr(0)->fcr = 0; - tx3927_sioptr(0)->flcr = 0x02; - tx3927_sioptr(0)->bgr = ((JMR3927_BASE_BAUD + baud / 2) / baud) | - TXx927_SIBGR_BCLK_T0; -} diff -Nurb linux-2.6.22-570/arch/mips/kernel/Makefile linux-2.6.22-591/arch/mips/kernel/Makefile --- linux-2.6.22-570/arch/mips/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -57,7 +57,8 @@ obj-$(CONFIG_MIPS32_N32) += binfmt_elfn32.o scall64-n32.o signal_n32.o obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o -obj-$(CONFIG_KGDB) += gdb-low.o gdb-stub.o +obj-$(CONFIG_KGDB) += kgdb_handler.o kgdb.o kgdb-jmp.o \ + kgdb-setjmp.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_64BIT) += cpu-bugs64.o diff -Nurb linux-2.6.22-570/arch/mips/kernel/cpu-probe.c linux-2.6.22-591/arch/mips/kernel/cpu-probe.c --- linux-2.6.22-570/arch/mips/kernel/cpu-probe.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/kernel/cpu-probe.c 2007-12-21 15:36:11.000000000 -0500 @@ -177,6 +177,17 @@ cpu_wait = r4k_wait; break; + case CPU_20KC: + /* + * WAIT on Rev1.0 has E1, E2, E3 and E16. + * WAIT on Rev2.0 and Rev3.0 has E16. + * Rev3.1 WAIT is nop, why bother + */ + if ((c->processor_id & 0xff) <= 0x64) + break; + + cpu_wait = r4k_wait; + break; case CPU_RM9000: if ((c->processor_id & 0x00ff) >= 0x40) cpu_wait = r4k_wait; diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-low.S linux-2.6.22-591/arch/mips/kernel/gdb-low.S --- linux-2.6.22-570/arch/mips/kernel/gdb-low.S 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/kernel/gdb-low.S 1969-12-31 19:00:00.000000000 -0500 @@ -1,394 +0,0 @@ -/* - * gdb-low.S contains the low-level trap handler for the GDB stub. - * - * Copyright (C) 1995 Andreas Busse - */ -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_32BIT -#define DMFC0 mfc0 -#define DMTC0 mtc0 -#define LDC1 lwc1 -#define SDC1 lwc1 -#endif -#ifdef CONFIG_64BIT -#define DMFC0 dmfc0 -#define DMTC0 dmtc0 -#define LDC1 ldc1 -#define SDC1 ldc1 -#endif - -/* - * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed) - * part is used to store registers and passed to exception handler. - * The upper part is reserved for "call func" feature where gdb client - * saves some of the regs, setups call frame and passes args. - * - * A trace shows about 200 bytes are used to store about half of all regs. - * The rest should be big enough for frame setup and passing args. - */ - -/* - * The low level trap handler - */ - .align 5 - NESTED(trap_low, GDB_FR_SIZE, sp) - .set noat - .set noreorder - - mfc0 k0, CP0_STATUS - sll k0, 3 /* extract cu0 bit */ - bltz k0, 1f - move k1, sp - - /* - * Called from user mode, go somewhere else. - */ - mfc0 k0, CP0_CAUSE - andi k0, k0, 0x7c -#ifdef CONFIG_64BIT - dsll k0, k0, 1 -#endif - PTR_L k1, saved_vectors(k0) - jr k1 - nop -1: - move k0, sp - PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above - LONG_S k0, GDB_FR_REG29(sp) - LONG_S $2, GDB_FR_REG2(sp) - -/* - * First save the CP0 and special registers - */ - - mfc0 v0, CP0_STATUS - LONG_S v0, GDB_FR_STATUS(sp) - mfc0 v0, CP0_CAUSE - LONG_S v0, GDB_FR_CAUSE(sp) - DMFC0 v0, CP0_EPC - LONG_S v0, GDB_FR_EPC(sp) - DMFC0 v0, CP0_BADVADDR - LONG_S v0, GDB_FR_BADVADDR(sp) - mfhi v0 - LONG_S v0, GDB_FR_HI(sp) - mflo v0 - LONG_S v0, GDB_FR_LO(sp) - -/* - * Now the integer registers - */ - - LONG_S zero, GDB_FR_REG0(sp) /* I know... */ - LONG_S $1, GDB_FR_REG1(sp) - /* v0 already saved */ - LONG_S $3, GDB_FR_REG3(sp) - LONG_S $4, GDB_FR_REG4(sp) - LONG_S $5, GDB_FR_REG5(sp) - LONG_S $6, GDB_FR_REG6(sp) - LONG_S $7, GDB_FR_REG7(sp) - LONG_S $8, GDB_FR_REG8(sp) - LONG_S $9, GDB_FR_REG9(sp) - LONG_S $10, GDB_FR_REG10(sp) - LONG_S $11, GDB_FR_REG11(sp) - LONG_S $12, GDB_FR_REG12(sp) - LONG_S $13, GDB_FR_REG13(sp) - LONG_S $14, GDB_FR_REG14(sp) - LONG_S $15, GDB_FR_REG15(sp) - LONG_S $16, GDB_FR_REG16(sp) - LONG_S $17, GDB_FR_REG17(sp) - LONG_S $18, GDB_FR_REG18(sp) - LONG_S $19, GDB_FR_REG19(sp) - LONG_S $20, GDB_FR_REG20(sp) - LONG_S $21, GDB_FR_REG21(sp) - LONG_S $22, GDB_FR_REG22(sp) - LONG_S $23, GDB_FR_REG23(sp) - LONG_S $24, GDB_FR_REG24(sp) - LONG_S $25, GDB_FR_REG25(sp) - LONG_S $26, GDB_FR_REG26(sp) - LONG_S $27, GDB_FR_REG27(sp) - LONG_S $28, GDB_FR_REG28(sp) - /* sp already saved */ - LONG_S $30, GDB_FR_REG30(sp) - LONG_S $31, GDB_FR_REG31(sp) - - CLI /* disable interrupts */ - TRACE_IRQS_OFF - -/* - * Followed by the floating point registers - */ - mfc0 v0, CP0_STATUS /* FPU enabled? */ - srl v0, v0, 16 - andi v0, v0, (ST0_CU1 >> 16) - - beqz v0,2f /* disabled, skip */ - nop - - SDC1 $0, GDB_FR_FPR0(sp) - SDC1 $1, GDB_FR_FPR1(sp) - SDC1 $2, GDB_FR_FPR2(sp) - SDC1 $3, GDB_FR_FPR3(sp) - SDC1 $4, GDB_FR_FPR4(sp) - SDC1 $5, GDB_FR_FPR5(sp) - SDC1 $6, GDB_FR_FPR6(sp) - SDC1 $7, GDB_FR_FPR7(sp) - SDC1 $8, GDB_FR_FPR8(sp) - SDC1 $9, GDB_FR_FPR9(sp) - SDC1 $10, GDB_FR_FPR10(sp) - SDC1 $11, GDB_FR_FPR11(sp) - SDC1 $12, GDB_FR_FPR12(sp) - SDC1 $13, GDB_FR_FPR13(sp) - SDC1 $14, GDB_FR_FPR14(sp) - SDC1 $15, GDB_FR_FPR15(sp) - SDC1 $16, GDB_FR_FPR16(sp) - SDC1 $17, GDB_FR_FPR17(sp) - SDC1 $18, GDB_FR_FPR18(sp) - SDC1 $19, GDB_FR_FPR19(sp) - SDC1 $20, GDB_FR_FPR20(sp) - SDC1 $21, GDB_FR_FPR21(sp) - SDC1 $22, GDB_FR_FPR22(sp) - SDC1 $23, GDB_FR_FPR23(sp) - SDC1 $24, GDB_FR_FPR24(sp) - SDC1 $25, GDB_FR_FPR25(sp) - SDC1 $26, GDB_FR_FPR26(sp) - SDC1 $27, GDB_FR_FPR27(sp) - SDC1 $28, GDB_FR_FPR28(sp) - SDC1 $29, GDB_FR_FPR29(sp) - SDC1 $30, GDB_FR_FPR30(sp) - SDC1 $31, GDB_FR_FPR31(sp) - -/* - * FPU control registers - */ - - cfc1 v0, CP1_STATUS - LONG_S v0, GDB_FR_FSR(sp) - cfc1 v0, CP1_REVISION - LONG_S v0, GDB_FR_FIR(sp) - -/* - * Current stack frame ptr - */ - -2: - LONG_S sp, GDB_FR_FRP(sp) - -/* - * CP0 registers (R4000/R4400 unused registers skipped) - */ - - mfc0 v0, CP0_INDEX - LONG_S v0, GDB_FR_CP0_INDEX(sp) - mfc0 v0, CP0_RANDOM - LONG_S v0, GDB_FR_CP0_RANDOM(sp) - DMFC0 v0, CP0_ENTRYLO0 - LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp) - DMFC0 v0, CP0_ENTRYLO1 - LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp) - DMFC0 v0, CP0_CONTEXT - LONG_S v0, GDB_FR_CP0_CONTEXT(sp) - mfc0 v0, CP0_PAGEMASK - LONG_S v0, GDB_FR_CP0_PAGEMASK(sp) - mfc0 v0, CP0_WIRED - LONG_S v0, GDB_FR_CP0_WIRED(sp) - DMFC0 v0, CP0_ENTRYHI - LONG_S v0, GDB_FR_CP0_ENTRYHI(sp) - mfc0 v0, CP0_PRID - LONG_S v0, GDB_FR_CP0_PRID(sp) - - .set at - -/* - * Continue with the higher level handler - */ - - move a0,sp - - jal handle_exception - nop - -/* - * Restore all writable registers, in reverse order - */ - - .set noat - - LONG_L v0, GDB_FR_CP0_ENTRYHI(sp) - LONG_L v1, GDB_FR_CP0_WIRED(sp) - DMTC0 v0, CP0_ENTRYHI - mtc0 v1, CP0_WIRED - LONG_L v0, GDB_FR_CP0_PAGEMASK(sp) - LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp) - mtc0 v0, CP0_PAGEMASK - DMTC0 v1, CP0_ENTRYLO1 - LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp) - LONG_L v1, GDB_FR_CP0_INDEX(sp) - DMTC0 v0, CP0_ENTRYLO0 - LONG_L v0, GDB_FR_CP0_CONTEXT(sp) - mtc0 v1, CP0_INDEX - DMTC0 v0, CP0_CONTEXT - - -/* - * Next, the floating point registers - */ - mfc0 v0, CP0_STATUS /* check if the FPU is enabled */ - srl v0, v0, 16 - andi v0, v0, (ST0_CU1 >> 16) - - beqz v0, 3f /* disabled, skip */ - nop - - LDC1 $31, GDB_FR_FPR31(sp) - LDC1 $30, GDB_FR_FPR30(sp) - LDC1 $29, GDB_FR_FPR29(sp) - LDC1 $28, GDB_FR_FPR28(sp) - LDC1 $27, GDB_FR_FPR27(sp) - LDC1 $26, GDB_FR_FPR26(sp) - LDC1 $25, GDB_FR_FPR25(sp) - LDC1 $24, GDB_FR_FPR24(sp) - LDC1 $23, GDB_FR_FPR23(sp) - LDC1 $22, GDB_FR_FPR22(sp) - LDC1 $21, GDB_FR_FPR21(sp) - LDC1 $20, GDB_FR_FPR20(sp) - LDC1 $19, GDB_FR_FPR19(sp) - LDC1 $18, GDB_FR_FPR18(sp) - LDC1 $17, GDB_FR_FPR17(sp) - LDC1 $16, GDB_FR_FPR16(sp) - LDC1 $15, GDB_FR_FPR15(sp) - LDC1 $14, GDB_FR_FPR14(sp) - LDC1 $13, GDB_FR_FPR13(sp) - LDC1 $12, GDB_FR_FPR12(sp) - LDC1 $11, GDB_FR_FPR11(sp) - LDC1 $10, GDB_FR_FPR10(sp) - LDC1 $9, GDB_FR_FPR9(sp) - LDC1 $8, GDB_FR_FPR8(sp) - LDC1 $7, GDB_FR_FPR7(sp) - LDC1 $6, GDB_FR_FPR6(sp) - LDC1 $5, GDB_FR_FPR5(sp) - LDC1 $4, GDB_FR_FPR4(sp) - LDC1 $3, GDB_FR_FPR3(sp) - LDC1 $2, GDB_FR_FPR2(sp) - LDC1 $1, GDB_FR_FPR1(sp) - LDC1 $0, GDB_FR_FPR0(sp) - -/* - * Now the CP0 and integer registers - */ - -3: -#ifdef CONFIG_MIPS_MT_SMTC - /* Read-modify write of Status must be atomic */ - mfc0 t2, CP0_TCSTATUS - ori t1, t2, TCSTATUS_IXMT - mtc0 t1, CP0_TCSTATUS - andi t2, t2, TCSTATUS_IXMT - _ehb - DMT 9 # dmt t1 - jal mips_ihb - nop -#endif /* CONFIG_MIPS_MT_SMTC */ - mfc0 t0, CP0_STATUS - ori t0, 0x1f - xori t0, 0x1f - mtc0 t0, CP0_STATUS -#ifdef CONFIG_MIPS_MT_SMTC - andi t1, t1, VPECONTROL_TE - beqz t1, 9f - nop - EMT # emt -9: - mfc0 t1, CP0_TCSTATUS - xori t1, t1, TCSTATUS_IXMT - or t1, t1, t2 - mtc0 t1, CP0_TCSTATUS - _ehb -#endif /* CONFIG_MIPS_MT_SMTC */ - LONG_L v0, GDB_FR_STATUS(sp) - LONG_L v1, GDB_FR_EPC(sp) - mtc0 v0, CP0_STATUS - DMTC0 v1, CP0_EPC - LONG_L v0, GDB_FR_HI(sp) - LONG_L v1, GDB_FR_LO(sp) - mthi v0 - mtlo v1 - LONG_L $31, GDB_FR_REG31(sp) - LONG_L $30, GDB_FR_REG30(sp) - LONG_L $28, GDB_FR_REG28(sp) - LONG_L $27, GDB_FR_REG27(sp) - LONG_L $26, GDB_FR_REG26(sp) - LONG_L $25, GDB_FR_REG25(sp) - LONG_L $24, GDB_FR_REG24(sp) - LONG_L $23, GDB_FR_REG23(sp) - LONG_L $22, GDB_FR_REG22(sp) - LONG_L $21, GDB_FR_REG21(sp) - LONG_L $20, GDB_FR_REG20(sp) - LONG_L $19, GDB_FR_REG19(sp) - LONG_L $18, GDB_FR_REG18(sp) - LONG_L $17, GDB_FR_REG17(sp) - LONG_L $16, GDB_FR_REG16(sp) - LONG_L $15, GDB_FR_REG15(sp) - LONG_L $14, GDB_FR_REG14(sp) - LONG_L $13, GDB_FR_REG13(sp) - LONG_L $12, GDB_FR_REG12(sp) - LONG_L $11, GDB_FR_REG11(sp) - LONG_L $10, GDB_FR_REG10(sp) - LONG_L $9, GDB_FR_REG9(sp) - LONG_L $8, GDB_FR_REG8(sp) - LONG_L $7, GDB_FR_REG7(sp) - LONG_L $6, GDB_FR_REG6(sp) - LONG_L $5, GDB_FR_REG5(sp) - LONG_L $4, GDB_FR_REG4(sp) - LONG_L $3, GDB_FR_REG3(sp) - LONG_L $2, GDB_FR_REG2(sp) - LONG_L $1, GDB_FR_REG1(sp) -#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) - LONG_L k0, GDB_FR_EPC(sp) - LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */ - jr k0 - rfe -#else - LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */ - - .set mips3 - eret - .set mips0 -#endif - .set at - .set reorder - END(trap_low) - -LEAF(kgdb_read_byte) -4: lb t0, (a0) - sb t0, (a1) - li v0, 0 - jr ra - .section __ex_table,"a" - PTR 4b, kgdbfault - .previous - END(kgdb_read_byte) - -LEAF(kgdb_write_byte) -5: sb a0, (a1) - li v0, 0 - jr ra - .section __ex_table,"a" - PTR 5b, kgdbfault - .previous - END(kgdb_write_byte) - - .type kgdbfault@function - .ent kgdbfault - -kgdbfault: li v0, -EFAULT - jr ra - .end kgdbfault diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-stub.c linux-2.6.22-591/arch/mips/kernel/gdb-stub.c --- linux-2.6.22-570/arch/mips/kernel/gdb-stub.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/kernel/gdb-stub.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,1154 +0,0 @@ -/* - * arch/mips/kernel/gdb-stub.c - * - * Originally written by Glenn Engel, Lake Stevens Instrument Division - * - * Contributed by HP Systems - * - * Modified for SPARC by Stu Grossman, Cygnus Support. - * - * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse - * Send complaints, suggestions etc. to - * - * Copyright (C) 1995 Andreas Busse - * - * Copyright (C) 2003 MontaVista Software Inc. - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - */ - -/* - * To enable debugger support, two things need to happen. One, a - * call to set_debug_traps() is necessary in order to allow any breakpoints - * or error conditions to be properly intercepted and reported to gdb. - * Two, a breakpoint needs to be generated to begin communication. This - * is most easily accomplished by a call to breakpoint(). Breakpoint() - * simulates a breakpoint by executing a BREAK instruction. - * - * - * The following gdb commands are supported: - * - * command function Return value - * - * g return the value of the CPU registers hex data or ENN - * G set the value of the CPU registers OK or ENN - * - * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN - * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN - * - * c Resume at current address SNN ( signal NN) - * cAA..AA Continue at address AA..AA SNN - * - * s Step one instruction SNN - * sAA..AA Step one instruction from AA..AA SNN - * - * k kill - * - * ? What was the last sigval ? SNN (signal NN) - * - * bBB..BB Set baud rate to BB..BB OK or BNN, then sets - * baud rate - * - * All commands and responses are sent with a packet which includes a - * checksum. A packet consists of - * - * $#. - * - * where - * :: - * :: < two hex digits computed as modulo 256 sum of > - * - * When a packet is received, it is first acknowledged with either '+' or '-'. - * '+' indicates a successful transfer. '-' indicates a failed transfer. - * - * Example: - * - * Host: Reply: - * $m0,10#2a +$00010203040506070809101112131415#42 - * - * - * ============== - * MORE EXAMPLES: - * ============== - * - * For reference -- the following are the steps that one - * company took (RidgeRun Inc) to get remote gdb debugging - * going. In this scenario the host machine was a PC and the - * target platform was a Galileo EVB64120A MIPS evaluation - * board. - * - * Step 1: - * First download gdb-5.0.tar.gz from the internet. - * and then build/install the package. - * - * Example: - * $ tar zxf gdb-5.0.tar.gz - * $ cd gdb-5.0 - * $ ./configure --target=mips-linux-elf - * $ make - * $ install - * $ which mips-linux-elf-gdb - * /usr/local/bin/mips-linux-elf-gdb - * - * Step 2: - * Configure linux for remote debugging and build it. - * - * Example: - * $ cd ~/linux - * $ make menuconfig - * $ make - * - * Step 3: - * Download the kernel to the remote target and start - * the kernel running. It will promptly halt and wait - * for the host gdb session to connect. It does this - * since the "Kernel Hacking" option has defined - * CONFIG_KGDB which in turn enables your calls - * to: - * set_debug_traps(); - * breakpoint(); - * - * Step 4: - * Start the gdb session on the host. - * - * Example: - * $ mips-linux-elf-gdb vmlinux - * (gdb) set remotebaud 115200 - * (gdb) target remote /dev/ttyS1 - * ...at this point you are connected to - * the remote target and can use gdb - * in the normal fasion. Setting - * breakpoints, single stepping, - * printing variables, etc. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * external low-level support routines - */ - -extern int putDebugChar(char c); /* write a single character */ -extern char getDebugChar(void); /* read and return a single char */ -extern void trap_low(void); - -/* - * breakpoint and test functions - */ -extern void breakpoint(void); -extern void breakinst(void); -extern void async_breakpoint(void); -extern void async_breakinst(void); -extern void adel(void); - -/* - * local prototypes - */ - -static void getpacket(char *buffer); -static void putpacket(char *buffer); -static int computeSignal(int tt); -static int hex(unsigned char ch); -static int hexToInt(char **ptr, int *intValue); -static int hexToLong(char **ptr, long *longValue); -static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault); -void handle_exception(struct gdb_regs *regs); - -int kgdb_enabled; - -/* - * spin locks for smp case - */ -static DEFINE_SPINLOCK(kgdb_lock); -static raw_spinlock_t kgdb_cpulock[NR_CPUS] = { - [0 ... NR_CPUS-1] = __RAW_SPIN_LOCK_UNLOCKED, -}; - -/* - * BUFMAX defines the maximum number of characters in inbound/outbound buffers - * at least NUMREGBYTES*2 are needed for register packets - */ -#define BUFMAX 2048 - -static char input_buffer[BUFMAX]; -static char output_buffer[BUFMAX]; -static int initialized; /* !0 means we've been initialized */ -static int kgdb_started; -static const char hexchars[]="0123456789abcdef"; - -/* Used to prevent crashes in memory access. Note that they'll crash anyway if - we haven't set up fault handlers yet... */ -int kgdb_read_byte(unsigned char *address, unsigned char *dest); -int kgdb_write_byte(unsigned char val, unsigned char *dest); - -/* - * Convert ch from a hex digit to an int - */ -static int hex(unsigned char ch) -{ - if (ch >= 'a' && ch <= 'f') - return ch-'a'+10; - if (ch >= '0' && ch <= '9') - return ch-'0'; - if (ch >= 'A' && ch <= 'F') - return ch-'A'+10; - return -1; -} - -/* - * scan for the sequence $# - */ -static void getpacket(char *buffer) -{ - unsigned char checksum; - unsigned char xmitcsum; - int i; - int count; - unsigned char ch; - - do { - /* - * wait around for the start character, - * ignore all other characters - */ - while ((ch = (getDebugChar() & 0x7f)) != '$') ; - - checksum = 0; - xmitcsum = -1; - count = 0; - - /* - * now, read until a # or end of buffer is found - */ - while (count < BUFMAX) { - ch = getDebugChar(); - if (ch == '#') - break; - checksum = checksum + ch; - buffer[count] = ch; - count = count + 1; - } - - if (count >= BUFMAX) - continue; - - buffer[count] = 0; - - if (ch == '#') { - xmitcsum = hex(getDebugChar() & 0x7f) << 4; - xmitcsum |= hex(getDebugChar() & 0x7f); - - if (checksum != xmitcsum) - putDebugChar('-'); /* failed checksum */ - else { - putDebugChar('+'); /* successful transfer */ - - /* - * if a sequence char is present, - * reply the sequence ID - */ - if (buffer[2] == ':') { - putDebugChar(buffer[0]); - putDebugChar(buffer[1]); - - /* - * remove sequence chars from buffer - */ - count = strlen(buffer); - for (i=3; i <= count; i++) - buffer[i-3] = buffer[i]; - } - } - } - } - while (checksum != xmitcsum); -} - -/* - * send the packet in buffer. - */ -static void putpacket(char *buffer) -{ - unsigned char checksum; - int count; - unsigned char ch; - - /* - * $#. - */ - - do { - putDebugChar('$'); - checksum = 0; - count = 0; - - while ((ch = buffer[count]) != 0) { - if (!(putDebugChar(ch))) - return; - checksum += ch; - count += 1; - } - - putDebugChar('#'); - putDebugChar(hexchars[checksum >> 4]); - putDebugChar(hexchars[checksum & 0xf]); - - } - while ((getDebugChar() & 0x7f) != '+'); -} - - -/* - * Convert the memory pointed to by mem into hex, placing result in buf. - * Return a pointer to the last char put in buf (null), in case of mem fault, - * return 0. - * may_fault is non-zero if we are reading from arbitrary memory, but is currently - * not used. - */ -static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault) -{ - unsigned char ch; - - while (count-- > 0) { - if (kgdb_read_byte(mem++, &ch) != 0) - return 0; - *buf++ = hexchars[ch >> 4]; - *buf++ = hexchars[ch & 0xf]; - } - - *buf = 0; - - return buf; -} - -/* - * convert the hex array pointed to by buf into binary to be placed in mem - * return a pointer to the character AFTER the last byte written - * may_fault is non-zero if we are reading from arbitrary memory, but is currently - * not used. - */ -static char *hex2mem(char *buf, char *mem, int count, int binary, int may_fault) -{ - int i; - unsigned char ch; - - for (i=0; itt && ht->signo; ht++) - saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); - - putDebugChar('+'); /* 'hello world' */ - /* - * In case GDB is started before us, ack any packets - * (presumably "$?#xx") sitting there. - */ - while((c = getDebugChar()) != '$'); - while((c = getDebugChar()) != '#'); - c = getDebugChar(); /* eat first csum byte */ - c = getDebugChar(); /* eat second csum byte */ - putDebugChar('+'); /* ack it */ - - initialized = 1; - local_irq_restore(flags); -} - -void restore_debug_traps(void) -{ - struct hard_trap_info *ht; - unsigned long flags; - - local_irq_save(flags); - for (ht = hard_trap_info; ht->tt && ht->signo; ht++) - set_except_vector(ht->tt, saved_vectors[ht->tt]); - local_irq_restore(flags); -} - -/* - * Convert the MIPS hardware trap type code to a Unix signal number. - */ -static int computeSignal(int tt) -{ - struct hard_trap_info *ht; - - for (ht = hard_trap_info; ht->tt && ht->signo; ht++) - if (ht->tt == tt) - return ht->signo; - - return SIGHUP; /* default for things we don't know about */ -} - -/* - * While we find nice hex chars, build an int. - * Return number of chars processed. - */ -static int hexToInt(char **ptr, int *intValue) -{ - int numChars = 0; - int hexValue; - - *intValue = 0; - - while (**ptr) { - hexValue = hex(**ptr); - if (hexValue < 0) - break; - - *intValue = (*intValue << 4) | hexValue; - numChars ++; - - (*ptr)++; - } - - return (numChars); -} - -static int hexToLong(char **ptr, long *longValue) -{ - int numChars = 0; - int hexValue; - - *longValue = 0; - - while (**ptr) { - hexValue = hex(**ptr); - if (hexValue < 0) - break; - - *longValue = (*longValue << 4) | hexValue; - numChars ++; - - (*ptr)++; - } - - return numChars; -} - - -#if 0 -/* - * Print registers (on target console) - * Used only to debug the stub... - */ -void show_gdbregs(struct gdb_regs * regs) -{ - /* - * Saved main processor registers - */ - printk("$0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg0, regs->reg1, regs->reg2, regs->reg3, - regs->reg4, regs->reg5, regs->reg6, regs->reg7); - printk("$8 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg8, regs->reg9, regs->reg10, regs->reg11, - regs->reg12, regs->reg13, regs->reg14, regs->reg15); - printk("$16: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg16, regs->reg17, regs->reg18, regs->reg19, - regs->reg20, regs->reg21, regs->reg22, regs->reg23); - printk("$24: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", - regs->reg24, regs->reg25, regs->reg26, regs->reg27, - regs->reg28, regs->reg29, regs->reg30, regs->reg31); - - /* - * Saved cp0 registers - */ - printk("epc : %08lx\nStatus: %08lx\nCause : %08lx\n", - regs->cp0_epc, regs->cp0_status, regs->cp0_cause); -} -#endif /* dead code */ - -/* - * We single-step by setting breakpoints. When an exception - * is handled, we need to restore the instructions hoisted - * when the breakpoints were set. - * - * This is where we save the original instructions. - */ -static struct gdb_bp_save { - unsigned long addr; - unsigned int val; -} step_bp[2]; - -#define BP 0x0000000d /* break opcode */ - -/* - * Set breakpoint instructions for single stepping. - */ -static void single_step(struct gdb_regs *regs) -{ - union mips_instruction insn; - unsigned long targ; - int is_branch, is_cond, i; - - targ = regs->cp0_epc; - insn.word = *(unsigned int *)targ; - is_branch = is_cond = 0; - - switch (insn.i_format.opcode) { - /* - * jr and jalr are in r_format format. - */ - case spec_op: - switch (insn.r_format.func) { - case jalr_op: - case jr_op: - targ = *(®s->reg0 + insn.r_format.rs); - is_branch = 1; - break; - } - break; - - /* - * This group contains: - * bltz_op, bgez_op, bltzl_op, bgezl_op, - * bltzal_op, bgezal_op, bltzall_op, bgezall_op. - */ - case bcond_op: - is_branch = is_cond = 1; - targ += 4 + (insn.i_format.simmediate << 2); - break; - - /* - * These are unconditional and in j_format. - */ - case jal_op: - case j_op: - is_branch = 1; - targ += 4; - targ >>= 28; - targ <<= 28; - targ |= (insn.j_format.target << 2); - break; - - /* - * These are conditional. - */ - case beq_op: - case beql_op: - case bne_op: - case bnel_op: - case blez_op: - case blezl_op: - case bgtz_op: - case bgtzl_op: - case cop0_op: - case cop1_op: - case cop2_op: - case cop1x_op: - is_branch = is_cond = 1; - targ += 4 + (insn.i_format.simmediate << 2); - break; - } - - if (is_branch) { - i = 0; - if (is_cond && targ != (regs->cp0_epc + 8)) { - step_bp[i].addr = regs->cp0_epc + 8; - step_bp[i++].val = *(unsigned *)(regs->cp0_epc + 8); - *(unsigned *)(regs->cp0_epc + 8) = BP; - } - step_bp[i].addr = targ; - step_bp[i].val = *(unsigned *)targ; - *(unsigned *)targ = BP; - } else { - step_bp[0].addr = regs->cp0_epc + 4; - step_bp[0].val = *(unsigned *)(regs->cp0_epc + 4); - *(unsigned *)(regs->cp0_epc + 4) = BP; - } -} - -/* - * If asynchronously interrupted by gdb, then we need to set a breakpoint - * at the interrupted instruction so that we wind up stopped with a - * reasonable stack frame. - */ -static struct gdb_bp_save async_bp; - -/* - * Swap the interrupted EPC with our asynchronous breakpoint routine. - * This is safer than stuffing the breakpoint in-place, since no cache - * flushes (or resulting smp_call_functions) are required. The - * assumption is that only one CPU will be handling asynchronous bp's, - * and only one can be active at a time. - */ -extern spinlock_t smp_call_lock; - -void set_async_breakpoint(unsigned long *epc) -{ - /* skip breaking into userland */ - if ((*epc & 0x80000000) == 0) - return; - -#ifdef CONFIG_SMP - /* avoid deadlock if someone is make IPC */ - if (spin_is_locked(&smp_call_lock)) - return; -#endif - - async_bp.addr = *epc; - *epc = (unsigned long)async_breakpoint; -} - -static void kgdb_wait(void *arg) -{ - unsigned flags; - int cpu = smp_processor_id(); - - local_irq_save(flags); - - __raw_spin_lock(&kgdb_cpulock[cpu]); - __raw_spin_unlock(&kgdb_cpulock[cpu]); - - local_irq_restore(flags); -} - -/* - * GDB stub needs to call kgdb_wait on all processor with interrupts - * disabled, so it uses it's own special variant. - */ -static int kgdb_smp_call_kgdb_wait(void) -{ -#ifdef CONFIG_SMP - struct call_data_struct data; - int i, cpus = num_online_cpus() - 1; - int cpu = smp_processor_id(); - - /* - * Can die spectacularly if this CPU isn't yet marked online - */ - BUG_ON(!cpu_online(cpu)); - - if (!cpus) - return 0; - - if (spin_is_locked(&smp_call_lock)) { - /* - * Some other processor is trying to make us do something - * but we're not going to respond... give up - */ - return -1; - } - - /* - * We will continue here, accepting the fact that - * the kernel may deadlock if another CPU attempts - * to call smp_call_function now... - */ - - data.func = kgdb_wait; - data.info = NULL; - atomic_set(&data.started, 0); - data.wait = 0; - - spin_lock(&smp_call_lock); - call_data = &data; - mb(); - - /* Send a message to all other CPUs and wait for them to respond */ - for (i = 0; i < NR_CPUS; i++) - if (cpu_online(i) && i != cpu) - core_send_ipi(i, SMP_CALL_FUNCTION); - - /* Wait for response */ - /* FIXME: lock-up detection, backtrace on lock-up */ - while (atomic_read(&data.started) != cpus) - barrier(); - - call_data = NULL; - spin_unlock(&smp_call_lock); -#endif - - return 0; -} - -/* - * This function does all command processing for interfacing to gdb. It - * returns 1 if you should skip the instruction at the trap address, 0 - * otherwise. - */ -void handle_exception (struct gdb_regs *regs) -{ - int trap; /* Trap type */ - int sigval; - long addr; - int length; - char *ptr; - unsigned long *stack; - int i; - int bflag = 0; - - kgdb_started = 1; - - /* - * acquire the big kgdb spinlock - */ - if (!spin_trylock(&kgdb_lock)) { - /* - * some other CPU has the lock, we should go back to - * receive the gdb_wait IPC - */ - return; - } - - /* - * If we're in async_breakpoint(), restore the real EPC from - * the breakpoint. - */ - if (regs->cp0_epc == (unsigned long)async_breakinst) { - regs->cp0_epc = async_bp.addr; - async_bp.addr = 0; - } - - /* - * acquire the CPU spinlocks - */ - for (i = num_online_cpus()-1; i >= 0; i--) - if (__raw_spin_trylock(&kgdb_cpulock[i]) == 0) - panic("kgdb: couldn't get cpulock %d\n", i); - - /* - * force other cpus to enter kgdb - */ - kgdb_smp_call_kgdb_wait(); - - /* - * If we're in breakpoint() increment the PC - */ - trap = (regs->cp0_cause & 0x7c) >> 2; - if (trap == 9 && regs->cp0_epc == (unsigned long)breakinst) - regs->cp0_epc += 4; - - /* - * If we were single_stepping, restore the opcodes hoisted - * for the breakpoint[s]. - */ - if (step_bp[0].addr) { - *(unsigned *)step_bp[0].addr = step_bp[0].val; - step_bp[0].addr = 0; - - if (step_bp[1].addr) { - *(unsigned *)step_bp[1].addr = step_bp[1].val; - step_bp[1].addr = 0; - } - } - - stack = (long *)regs->reg29; /* stack ptr */ - sigval = computeSignal(trap); - - /* - * reply to host that an exception has occurred - */ - ptr = output_buffer; - - /* - * Send trap type (converted to signal) - */ - *ptr++ = 'T'; - *ptr++ = hexchars[sigval >> 4]; - *ptr++ = hexchars[sigval & 0xf]; - - /* - * Send Error PC - */ - *ptr++ = hexchars[REG_EPC >> 4]; - *ptr++ = hexchars[REG_EPC & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->cp0_epc, ptr, sizeof(long), 0); - *ptr++ = ';'; - - /* - * Send frame pointer - */ - *ptr++ = hexchars[REG_FP >> 4]; - *ptr++ = hexchars[REG_FP & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->reg30, ptr, sizeof(long), 0); - *ptr++ = ';'; - - /* - * Send stack pointer - */ - *ptr++ = hexchars[REG_SP >> 4]; - *ptr++ = hexchars[REG_SP & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->reg29, ptr, sizeof(long), 0); - *ptr++ = ';'; - - *ptr++ = 0; - putpacket(output_buffer); /* send it off... */ - - /* - * Wait for input from remote GDB - */ - while (1) { - output_buffer[0] = 0; - getpacket(input_buffer); - - switch (input_buffer[0]) - { - case '?': - output_buffer[0] = 'S'; - output_buffer[1] = hexchars[sigval >> 4]; - output_buffer[2] = hexchars[sigval & 0xf]; - output_buffer[3] = 0; - break; - - /* - * Detach debugger; let CPU run - */ - case 'D': - putpacket(output_buffer); - goto finish_kgdb; - break; - - case 'd': - /* toggle debug flag */ - break; - - /* - * Return the value of the CPU registers - */ - case 'g': - ptr = output_buffer; - ptr = mem2hex((char *)®s->reg0, ptr, 32*sizeof(long), 0); /* r0...r31 */ - ptr = mem2hex((char *)®s->cp0_status, ptr, 6*sizeof(long), 0); /* cp0 */ - ptr = mem2hex((char *)®s->fpr0, ptr, 32*sizeof(long), 0); /* f0...31 */ - ptr = mem2hex((char *)®s->cp1_fsr, ptr, 2*sizeof(long), 0); /* cp1 */ - ptr = mem2hex((char *)®s->frame_ptr, ptr, 2*sizeof(long), 0); /* frp */ - ptr = mem2hex((char *)®s->cp0_index, ptr, 16*sizeof(long), 0); /* cp0 */ - break; - - /* - * set the value of the CPU registers - return OK - */ - case 'G': - { - ptr = &input_buffer[1]; - hex2mem(ptr, (char *)®s->reg0, 32*sizeof(long), 0, 0); - ptr += 32*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->cp0_status, 6*sizeof(long), 0, 0); - ptr += 6*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->fpr0, 32*sizeof(long), 0, 0); - ptr += 32*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->cp1_fsr, 2*sizeof(long), 0, 0); - ptr += 2*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->frame_ptr, 2*sizeof(long), 0, 0); - ptr += 2*(2*sizeof(long)); - hex2mem(ptr, (char *)®s->cp0_index, 16*sizeof(long), 0, 0); - strcpy(output_buffer,"OK"); - } - break; - - /* - * mAA..AA,LLLL Read LLLL bytes at address AA..AA - */ - case 'm': - ptr = &input_buffer[1]; - - if (hexToLong(&ptr, &addr) - && *ptr++ == ',' - && hexToInt(&ptr, &length)) { - if (mem2hex((char *)addr, output_buffer, length, 1)) - break; - strcpy (output_buffer, "E03"); - } else - strcpy(output_buffer,"E01"); - break; - - /* - * XAA..AA,LLLL: Write LLLL escaped binary bytes at address AA.AA - */ - case 'X': - bflag = 1; - /* fall through */ - - /* - * MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK - */ - case 'M': - ptr = &input_buffer[1]; - - if (hexToLong(&ptr, &addr) - && *ptr++ == ',' - && hexToInt(&ptr, &length) - && *ptr++ == ':') { - if (hex2mem(ptr, (char *)addr, length, bflag, 1)) - strcpy(output_buffer, "OK"); - else - strcpy(output_buffer, "E03"); - } - else - strcpy(output_buffer, "E02"); - break; - - /* - * cAA..AA Continue at address AA..AA(optional) - */ - case 'c': - /* try to read optional parameter, pc unchanged if no parm */ - - ptr = &input_buffer[1]; - if (hexToLong(&ptr, &addr)) - regs->cp0_epc = addr; - - goto exit_kgdb_exception; - break; - - /* - * kill the program; let us try to restart the machine - * Reset the whole machine. - */ - case 'k': - case 'r': - machine_restart("kgdb restarts machine"); - break; - - /* - * Step to next instruction - */ - case 's': - /* - * There is no single step insn in the MIPS ISA, so we - * use breakpoints and continue, instead. - */ - single_step(regs); - goto exit_kgdb_exception; - /* NOTREACHED */ - break; - - /* - * Set baud rate (bBB) - * FIXME: Needs to be written - */ - case 'b': - { -#if 0 - int baudrate; - extern void set_timer_3(); - - ptr = &input_buffer[1]; - if (!hexToInt(&ptr, &baudrate)) - { - strcpy(output_buffer,"B01"); - break; - } - - /* Convert baud rate to uart clock divider */ - - switch (baudrate) - { - case 38400: - baudrate = 16; - break; - case 19200: - baudrate = 33; - break; - case 9600: - baudrate = 65; - break; - default: - baudrate = 0; - strcpy(output_buffer,"B02"); - goto x1; - } - - if (baudrate) { - putpacket("OK"); /* Ack before changing speed */ - set_timer_3(baudrate); /* Set it */ - } -#endif - } - break; - - } /* switch */ - - /* - * reply to the request - */ - - putpacket(output_buffer); - - } /* while */ - - return; - -finish_kgdb: - restore_debug_traps(); - -exit_kgdb_exception: - /* release locks so other CPUs can go */ - for (i = num_online_cpus()-1; i >= 0; i--) - __raw_spin_unlock(&kgdb_cpulock[i]); - spin_unlock(&kgdb_lock); - - __flush_cache_all(); - return; -} - -/* - * This function will generate a breakpoint exception. It is used at the - * beginning of a program to sync up with a debugger and can be used - * otherwise as a quick means to stop program execution and "break" into - * the debugger. - */ -void breakpoint(void) -{ - if (!initialized) - return; - - __asm__ __volatile__( - ".globl breakinst\n\t" - ".set\tnoreorder\n\t" - "nop\n" - "breakinst:\tbreak\n\t" - "nop\n\t" - ".set\treorder" - ); -} - -/* Nothing but the break; don't pollute any registers */ -void async_breakpoint(void) -{ - __asm__ __volatile__( - ".globl async_breakinst\n\t" - ".set\tnoreorder\n\t" - "nop\n" - "async_breakinst:\tbreak\n\t" - "nop\n\t" - ".set\treorder" - ); -} - -void adel(void) -{ - __asm__ __volatile__( - ".globl\tadel\n\t" - "lui\t$8,0x8000\n\t" - "lw\t$9,1($8)\n\t" - ); -} - -/* - * malloc is needed by gdb client in "call func()", even a private one - * will make gdb happy - */ -static void * __attribute_used__ malloc(size_t size) -{ - return kmalloc(size, GFP_ATOMIC); -} - -static void __attribute_used__ free (void *where) -{ - kfree(where); -} - -#ifdef CONFIG_GDB_CONSOLE - -void gdb_putsn(const char *str, int l) -{ - char outbuf[18]; - - if (!kgdb_started) - return; - - outbuf[0]='O'; - - while(l) { - int i = (l>8)?8:l; - mem2hex((char *)str, &outbuf[1], i, 0); - outbuf[(i*2)+1]=0; - putpacket(outbuf); - str += i; - l -= i; - } -} - -static void gdb_console_write(struct console *con, const char *s, unsigned n) -{ - gdb_putsn(s, n); -} - -static struct console gdb_console = { - .name = "gdb", - .write = gdb_console_write, - .flags = CON_PRINTBUFFER, - .index = -1 -}; - -static int __init register_gdb_console(void) -{ - register_console(&gdb_console); - - return 0; -} - -console_initcall(register_gdb_console); - -#endif diff -Nurb linux-2.6.22-570/arch/mips/kernel/irq.c linux-2.6.22-591/arch/mips/kernel/irq.c --- linux-2.6.22-570/arch/mips/kernel/irq.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/kernel/irq.c 2007-12-21 15:36:11.000000000 -0500 @@ -25,6 +25,10 @@ #include #include #include +#include + +/* Keep track of if we've done certain initialization already or not. */ +int kgdb_early_setup; static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; @@ -138,28 +142,23 @@ atomic_inc(&irq_err_count); } -#ifdef CONFIG_KGDB -extern void breakpoint(void); -extern void set_debug_traps(void); - -static int kgdb_flag = 1; -static int __init nokgdb(char *str) +void __init init_IRQ(void) { - kgdb_flag = 0; - return 1; -} -__setup("nokgdb", nokgdb); + +#ifdef CONFIG_KGDB + if (kgdb_early_setup) + return; #endif -void __init init_IRQ(void) -{ arch_init_irq(); + #ifdef CONFIG_KGDB - if (kgdb_flag) { - printk("Wait for gdb client connection ...\n"); - set_debug_traps(); - breakpoint(); - } + /* + * We have been called before kgdb_arch_init(). Hence, + * we dont want the traps to be reinitialized + */ + if (kgdb_early_setup == 0) + kgdb_early_setup = 1; #endif } diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c linux-2.6.22-591/arch/mips/kernel/kgdb-jmp.c --- linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/mips/kernel/kgdb-jmp.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,110 @@ +/* + * arch/mips/kernel/kgdb-jmp.c + * + * Save and restore system registers so that within a limited frame we + * may have a fault and "jump back" to a known safe location. + * + * Author: Tom Rini + * Author: Manish Lachwani + * + * Cribbed from glibc, which carries the following: + * Copyright (C) 1996, 1997, 2000, 2002, 2003 Free Software Foundation, Inc. + * Copyright (C) 2005-2006 by MontaVista Software. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#include + +#ifdef CONFIG_64BIT +/* + * MIPS 64-bit + */ + +int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp) +{ + __asm__ __volatile__ ("sd $gp, %0" : : "m" (curr_context[0])); + __asm__ __volatile__ ("sd $16, %0" : : "m" (curr_context[1])); + __asm__ __volatile__ ("sd $17, %0" : : "m" (curr_context[2])); + __asm__ __volatile__ ("sd $18, %0" : : "m" (curr_context[3])); + __asm__ __volatile__ ("sd $19, %0" : : "m" (curr_context[4])); + __asm__ __volatile__ ("sd $20, %0" : : "m" (curr_context[5])); + __asm__ __volatile__ ("sd $21, %0" : : "m" (curr_context[6])); + __asm__ __volatile__ ("sd $22, %0" : : "m" (curr_context[7])); + __asm__ __volatile__ ("sd $23, %0" : : "m" (curr_context[8])); + __asm__ __volatile__ ("sd $31, %0" : : "m" (curr_context[9])); + curr_context[10] = sp; + curr_context[11] = fp; + + return 0; +} + +void kgdb_fault_longjmp(unsigned long *curr_context) +{ + __asm__ __volatile__ ("ld $gp, %0" : : "m" (curr_context[0])); + __asm__ __volatile__ ("ld $16, %0" : : "m" (curr_context[1])); + __asm__ __volatile__ ("ld $17, %0" : : "m" (curr_context[2])); + __asm__ __volatile__ ("ld $18, %0" : : "m" (curr_context[3])); + __asm__ __volatile__ ("ld $19, %0" : : "m" (curr_context[4])); + __asm__ __volatile__ ("ld $20, %0" : : "m" (curr_context[5])); + __asm__ __volatile__ ("ld $21, %0" : : "m" (curr_context[6])); + __asm__ __volatile__ ("ld $22, %0" : : "m" (curr_context[7])); + __asm__ __volatile__ ("ld $23, %0" : : "m" (curr_context[8])); + __asm__ __volatile__ ("ld $25, %0" : : "m" (curr_context[9])); + __asm__ __volatile__ ("ld $29, %0\n\t" + "ld $30, %1\n\t" : : + "m" (curr_context[10]), "m" (curr_context[11])); + + __asm__ __volatile__ ("dli $2, 1"); + __asm__ __volatile__ ("j $25"); + + for (;;); +} +#else +/* + * MIPS 32-bit + */ + +int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp) +{ + __asm__ __volatile__("sw $gp, %0" : : "m" (curr_context[0])); + __asm__ __volatile__("sw $16, %0" : : "m" (curr_context[1])); + __asm__ __volatile__("sw $17, %0" : : "m" (curr_context[2])); + __asm__ __volatile__("sw $18, %0" : : "m" (curr_context[3])); + __asm__ __volatile__("sw $19, %0" : : "m" (curr_context[4])); + __asm__ __volatile__("sw $20, %0" : : "m" (curr_context[5])); + __asm__ __volatile__("sw $21, %0" : : "m" (curr_context[6])); + __asm__ __volatile__("sw $22, %0" : : "m" (curr_context[7])); + __asm__ __volatile__("sw $23, %0" : : "m" (curr_context[8])); + __asm__ __volatile__("sw $31, %0" : : "m" (curr_context[9])); + curr_context[10] = sp; + curr_context[11] = fp; + + return 0; +} + +void kgdb_fault_longjmp(unsigned long *curr_context) +{ + __asm__ __volatile__("lw $gp, %0" : : "m" (curr_context[0])); + __asm__ __volatile__("lw $16, %0" : : "m" (curr_context[1])); + __asm__ __volatile__("lw $17, %0" : : "m" (curr_context[2])); + __asm__ __volatile__("lw $18, %0" : : "m" (curr_context[3])); + __asm__ __volatile__("lw $19, %0" : : "m" (curr_context[4])); + __asm__ __volatile__("lw $20, %0" : : "m" (curr_context[5])); + __asm__ __volatile__("lw $21, %0" : : "m" (curr_context[6])); + __asm__ __volatile__("lw $22, %0" : : "m" (curr_context[7])); + __asm__ __volatile__("lw $23, %0" : : "m" (curr_context[8])); + __asm__ __volatile__("lw $25, %0" : : "m" (curr_context[9])); + + __asm__ __volatile__("lw $29, %0\n\t" + "lw $30, %1\n\t" : : + "m" (curr_context[10]), "m" (curr_context[11])); + + __asm__ __volatile__("li $2, 1"); + __asm__ __volatile__("jr $25"); + + for (;;); +} +#endif diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S linux-2.6.22-591/arch/mips/kernel/kgdb-setjmp.S --- linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/mips/kernel/kgdb-setjmp.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,28 @@ +/* + * arch/mips/kernel/kgdb-jmp.c + * + * Save and restore system registers so that within a limited frame we + * may have a fault and "jump back" to a known safe location. + * + * Copyright (C) 2005 by MontaVista Software. + * Author: Manish Lachwani (mlachwani@mvista.com) + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#include +#include +#include +#include + + .ent kgdb_fault_setjmp,0 +ENTRY (kgdb_fault_setjmp) + move a1, sp + move a2, fp +#ifdef CONFIG_64BIT + nop +#endif + j kgdb_fault_setjmp_aux + .end kgdb_fault_setjmp diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb.c linux-2.6.22-591/arch/mips/kernel/kgdb.c --- linux-2.6.22-570/arch/mips/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/mips/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,299 @@ +/* + * arch/mips/kernel/kgdb.c + * + * Originally written by Glenn Engel, Lake Stevens Instrument Division + * + * Contributed by HP Systems + * + * Modified for SPARC by Stu Grossman, Cygnus Support. + * + * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse + * Send complaints, suggestions etc. to + * + * Copyright (C) 1995 Andreas Busse + * + * Copyright (C) 2003 MontaVista Software Inc. + * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net + * + * Copyright (C) 2004-2005 MontaVista Software Inc. + * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include + +static struct hard_trap_info { + unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 6, SIGBUS }, /* instruction bus error */ + { 7, SIGBUS }, /* data bus error */ + { 9, SIGTRAP }, /* break */ +/* { 11, SIGILL }, */ /* CPU unusable */ + { 12, SIGFPE }, /* overflow */ + { 13, SIGTRAP }, /* trap */ + { 14, SIGSEGV }, /* virtual instruction cache coherency */ + { 15, SIGFPE }, /* floating point exception */ + { 23, SIGSEGV }, /* watch */ + { 31, SIGSEGV }, /* virtual data cache coherency */ + { 0, 0} /* Must be last */ +}; + +/* Save the normal trap handlers for user-mode traps. */ +void *saved_vectors[32]; + +extern void trap_low(void); +extern void breakinst(void); +extern void init_IRQ(void); + +void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmihook(smp_processor_id(), (void *)0); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, 0, 0, 0); + local_irq_disable(); +} + +static int compute_signal(int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +/* + * Set up exception handlers for tracing and breakpoints + */ +void handle_exception(struct pt_regs *regs) +{ + int trap = (regs->cp0_cause & 0x7c) >> 2; + + if (fixup_exception(regs)) { + return; + } + + if (atomic_read(&debugger_active)) + kgdb_nmihook(smp_processor_id(), regs); + + if (atomic_read(&kgdb_setting_breakpoint)) + if ((trap == 9) && (regs->cp0_epc == (unsigned long)breakinst)) + regs->cp0_epc += 4; + + kgdb_handle_exception(0, compute_signal(trap), 0, regs); + + /* In SMP mode, __flush_cache_all does IPI */ + local_irq_enable(); + __flush_cache_all(); +} + +void set_debug_traps(void) +{ + struct hard_trap_info *ht; + unsigned long flags; + + local_irq_save(flags); + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); + + local_irq_restore(flags); +} + +#if 0 +/* This should be called before we exit kgdb_handle_exception() I believe. + * -- Tom + */ +void restore_debug_traps(void) +{ + struct hard_trap_info *ht; + unsigned long flags; + + local_irq_save(flags); + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + set_except_vector(ht->tt, saved_vectors[ht->tt]); + local_irq_restore(flags); +} +#endif + +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + int reg; + gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs; + + for (reg = 0; reg < 32; reg++) + *(ptr++) = regs->regs[reg]; + + *(ptr++) = regs->cp0_status; + *(ptr++) = regs->lo; + *(ptr++) = regs->hi; + *(ptr++) = regs->cp0_badvaddr; + *(ptr++) = regs->cp0_cause; + *(ptr++) = regs->cp0_epc; + + return; +} + +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + + int reg; + const gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs; + + for (reg = 0; reg < 32; reg++) + regs->regs[reg] = *(ptr++); + + regs->cp0_status = *(ptr++); + regs->lo = *(ptr++); + regs->hi = *(ptr++); + regs->cp0_badvaddr = *(ptr++); + regs->cp0_cause = *(ptr++); + regs->cp0_epc = *(ptr++); + + return; +} + +/* + * Similar to regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + int reg; + struct thread_info *ti = task_thread_info(p); + unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32; + struct pt_regs *regs = (struct pt_regs *)ksp - 1; + gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs; + + for (reg = 0; reg < 16; reg++) + *(ptr++) = regs->regs[reg]; + + /* S0 - S7 */ + for (reg = 16; reg < 24; reg++) + *(ptr++) = regs->regs[reg]; + + for (reg = 24; reg < 28; reg++) + *(ptr++) = 0; + + /* GP, SP, FP, RA */ + for (reg = 28; reg < 32; reg++) + *(ptr++) = regs->regs[reg]; + + *(ptr++) = regs->cp0_status; + *(ptr++) = regs->lo; + *(ptr++) = regs->hi; + *(ptr++) = regs->cp0_badvaddr; + *(ptr++) = regs->cp0_cause; + *(ptr++) = regs->cp0_epc; + + return; +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger + */ +static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + int trap = (regs->cp0_cause & 0x7c) >> 2; + + /* See if KGDB is interested. */ + if (user_mode(regs)) + /* Userpace events, ignore. */ + return NOTIFY_DONE; + + kgdb_handle_exception(trap, compute_signal(trap), 0, regs); + return NOTIFY_OK; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_mips_notify, +}; + +/* + * Handle the 's' and 'c' commands + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + char *ptr; + unsigned long address; + int cpu = smp_processor_id(); + + switch (remcom_in_buffer[0]) { + case 's': + case 'c': + /* handle the optional parameter */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + regs->cp0_epc = address; + + atomic_set(&cpu_doing_single_step, -1); + if (remcom_in_buffer[0] == 's') + if (kgdb_contthread) + atomic_set(&cpu_doing_single_step, cpu); + + return 0; + } + + return -1; +} + +struct kgdb_arch arch_kgdb_ops = { +#ifdef CONFIG_CPU_LITTLE_ENDIAN + .gdb_bpt_instr = {0xd}, +#else + .gdb_bpt_instr = {0x00, 0x00, 0x00, 0x0d}, +#endif +}; + +/* + * We use kgdb_early_setup so that functions we need to call now don't + * cause trouble when called again later. + */ +__init int kgdb_arch_init(void) +{ + /* Board-specifics. */ + /* Force some calls to happen earlier. */ + if (kgdb_early_setup == 0) { + trap_init(); + init_IRQ(); + kgdb_early_setup = 1; + } + + /* Set our traps. */ + /* This needs to be done more finely grained again, paired in + * a before/after in kgdb_handle_exception(...) -- Tom */ + set_debug_traps(); + register_die_notifier(&kgdb_notifier); + + return 0; +} diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S linux-2.6.22-591/arch/mips/kernel/kgdb_handler.S --- linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/mips/kernel/kgdb_handler.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,339 @@ +/* + * arch/mips/kernel/kgdb_handler.S + * + * Copyright (C) 2007 Wind River Systems, Inc + * + * Copyright (C) 2004-2005 MontaVista Software Inc. + * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com + * + * This file is licensed under the terms of the GNU General Public + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +/* + * Trap Handler for the new KGDB framework. The main KGDB handler is + * handle_exception that will be called from here + * + */ + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_32BIT +#define DMFC0 mfc0 +#define DMTC0 mtc0 +#define LDC1 lwc1 +#define SDC1 swc1 +#endif +#ifdef CONFIG_64BIT +#define DMFC0 dmfc0 +#define DMTC0 dmtc0 +#define LDC1 ldc1 +#define SDC1 sdc1 +#endif + +#include + +/* + * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed) + * part is used to store registers and passed to exception handler. + * The upper part is reserved for "call func" feature where gdb client + * saves some of the regs, setups call frame and passes args. + * + * A trace shows about 200 bytes are used to store about half of all regs. + * The rest should be big enough for frame setup and passing args. + */ + +/* + * The low level trap handler + */ + .align 5 + NESTED(trap_low, GDB_FR_SIZE, sp) + .set noat + .set noreorder + + mfc0 k0, CP0_STATUS + sll k0, 3 /* extract cu0 bit */ + bltz k0, 1f + move k1, sp + + /* + * Called from user mode, go somewhere else. + */ +#if defined(CONFIG_32BIT) + lui k1, %hi(saved_vectors) + mfc0 k0, CP0_CAUSE + andi k0, k0, 0x7c + add k1, k1, k0 + lw k0, %lo(saved_vectors)(k1) +#elif defined(CONFIG_64BIT) && defined(CONFIG_BUILD_ELF64) + DMFC0 k0, CP0_CAUSE + lui k1, %highest(saved_vectors) + andi k0, k0, 0x7c /* mask exception type */ + dsll k0, 1 /* turn into byte offset */ + daddiu k1, %higher(saved_vectors) + dsll k1, k1, 16 + daddiu k1, %hi(saved_vectors) + dsll k1, k1, 16 + daddu k1, k1, k0 + LONG_L k0, %lo(saved_vectors)(k1) +#else +#error "MIPS configuration is unsupported for kgdb!!" +#endif + jr k0 + nop +1: + move k0, sp + PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above + LONG_S k0, GDB_FR_REG29(sp) + LONG_S $2, GDB_FR_REG2(sp) + +/* + * First save the CP0 and special registers + */ + + mfc0 v0, CP0_STATUS + LONG_S v0, GDB_FR_STATUS(sp) + mfc0 v0, CP0_CAUSE + LONG_S v0, GDB_FR_CAUSE(sp) + DMFC0 v0, CP0_EPC + LONG_S v0, GDB_FR_EPC(sp) + DMFC0 v0, CP0_BADVADDR + LONG_S v0, GDB_FR_BADVADDR(sp) + mfhi v0 + LONG_S v0, GDB_FR_HI(sp) + mflo v0 + LONG_S v0, GDB_FR_LO(sp) + +/* + * Now the integer registers + */ + + LONG_S zero, GDB_FR_REG0(sp) /* I know... */ + LONG_S $1, GDB_FR_REG1(sp) + /* v0 already saved */ + LONG_S $3, GDB_FR_REG3(sp) + LONG_S $4, GDB_FR_REG4(sp) + LONG_S $5, GDB_FR_REG5(sp) + LONG_S $6, GDB_FR_REG6(sp) + LONG_S $7, GDB_FR_REG7(sp) + LONG_S $8, GDB_FR_REG8(sp) + LONG_S $9, GDB_FR_REG9(sp) + LONG_S $10, GDB_FR_REG10(sp) + LONG_S $11, GDB_FR_REG11(sp) + LONG_S $12, GDB_FR_REG12(sp) + LONG_S $13, GDB_FR_REG13(sp) + LONG_S $14, GDB_FR_REG14(sp) + LONG_S $15, GDB_FR_REG15(sp) + LONG_S $16, GDB_FR_REG16(sp) + LONG_S $17, GDB_FR_REG17(sp) + LONG_S $18, GDB_FR_REG18(sp) + LONG_S $19, GDB_FR_REG19(sp) + LONG_S $20, GDB_FR_REG20(sp) + LONG_S $21, GDB_FR_REG21(sp) + LONG_S $22, GDB_FR_REG22(sp) + LONG_S $23, GDB_FR_REG23(sp) + LONG_S $24, GDB_FR_REG24(sp) + LONG_S $25, GDB_FR_REG25(sp) + LONG_S $26, GDB_FR_REG26(sp) + LONG_S $27, GDB_FR_REG27(sp) + LONG_S $28, GDB_FR_REG28(sp) + /* sp already saved */ + LONG_S $30, GDB_FR_REG30(sp) + LONG_S $31, GDB_FR_REG31(sp) + + CLI /* disable interrupts */ + +/* + * Followed by the floating point registers + */ + mfc0 v0, CP0_STATUS /* FPU enabled? */ + srl v0, v0, 16 + andi v0, v0, (ST0_CU1 >> 16) + + beqz v0,3f /* disabled, skip */ + nop + + li t0, 0 +#ifdef CONFIG_64BIT + mfc0 t0, CP0_STATUS +#endif + fpu_save_double_kgdb sp t0 t1 # clobbers t1 + + +/* + * Current stack frame ptr + */ + +3: + LONG_S sp, GDB_FR_FRP(sp) + +/* + * CP0 registers (R4000/R4400 unused registers skipped) + */ + + mfc0 v0, CP0_INDEX + LONG_S v0, GDB_FR_CP0_INDEX(sp) + mfc0 v0, CP0_RANDOM + LONG_S v0, GDB_FR_CP0_RANDOM(sp) + DMFC0 v0, CP0_ENTRYLO0 + LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp) + DMFC0 v0, CP0_ENTRYLO1 + LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp) + DMFC0 v0, CP0_CONTEXT + LONG_S v0, GDB_FR_CP0_CONTEXT(sp) + mfc0 v0, CP0_PAGEMASK + LONG_S v0, GDB_FR_CP0_PAGEMASK(sp) + mfc0 v0, CP0_WIRED + LONG_S v0, GDB_FR_CP0_WIRED(sp) + DMFC0 v0, CP0_ENTRYHI + LONG_S v0, GDB_FR_CP0_ENTRYHI(sp) + mfc0 v0, CP0_PRID + LONG_S v0, GDB_FR_CP0_PRID(sp) + + .set at + +/* + * Continue with the higher level handler + */ + + move a0,sp + + jal handle_exception + nop + +/* + * Restore all writable registers, in reverse order + */ + + .set noat + + LONG_L v0, GDB_FR_CP0_ENTRYHI(sp) + LONG_L v1, GDB_FR_CP0_WIRED(sp) + DMTC0 v0, CP0_ENTRYHI + mtc0 v1, CP0_WIRED + LONG_L v0, GDB_FR_CP0_PAGEMASK(sp) + LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp) + mtc0 v0, CP0_PAGEMASK + DMTC0 v1, CP0_ENTRYLO1 + LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp) + LONG_L v1, GDB_FR_CP0_INDEX(sp) + DMTC0 v0, CP0_ENTRYLO0 + LONG_L v0, GDB_FR_CP0_CONTEXT(sp) + mtc0 v1, CP0_INDEX + DMTC0 v0, CP0_CONTEXT + + +/* + * Next, the floating point registers + */ + mfc0 v0, CP0_STATUS /* check if the FPU is enabled */ + srl v0, v0, 16 + andi v0, v0, (ST0_CU1 >> 16) + + beqz v0, 3f /* disabled, skip */ + nop + + li t0, 0 +#ifdef CONFIG_64BIT + mfc0 t0, CP0_STATUS +#endif + fpu_restore_double_kgdb sp t0 t1 # clobbers t1 + + +/* + * Now the CP0 and integer registers + */ + +3: + mfc0 t0, CP0_STATUS + ori t0, 0x1f + xori t0, 0x1f + mtc0 t0, CP0_STATUS + + LONG_L v0, GDB_FR_STATUS(sp) + LONG_L v1, GDB_FR_EPC(sp) + mtc0 v0, CP0_STATUS + DMTC0 v1, CP0_EPC + LONG_L v0, GDB_FR_HI(sp) + LONG_L v1, GDB_FR_LO(sp) + mthi v0 + mtlo v1 + LONG_L $31, GDB_FR_REG31(sp) + LONG_L $30, GDB_FR_REG30(sp) + LONG_L $28, GDB_FR_REG28(sp) + LONG_L $27, GDB_FR_REG27(sp) + LONG_L $26, GDB_FR_REG26(sp) + LONG_L $25, GDB_FR_REG25(sp) + LONG_L $24, GDB_FR_REG24(sp) + LONG_L $23, GDB_FR_REG23(sp) + LONG_L $22, GDB_FR_REG22(sp) + LONG_L $21, GDB_FR_REG21(sp) + LONG_L $20, GDB_FR_REG20(sp) + LONG_L $19, GDB_FR_REG19(sp) + LONG_L $18, GDB_FR_REG18(sp) + LONG_L $17, GDB_FR_REG17(sp) + LONG_L $16, GDB_FR_REG16(sp) + LONG_L $15, GDB_FR_REG15(sp) + LONG_L $14, GDB_FR_REG14(sp) + LONG_L $13, GDB_FR_REG13(sp) + LONG_L $12, GDB_FR_REG12(sp) + LONG_L $11, GDB_FR_REG11(sp) + LONG_L $10, GDB_FR_REG10(sp) + LONG_L $9, GDB_FR_REG9(sp) + LONG_L $8, GDB_FR_REG8(sp) + LONG_L $7, GDB_FR_REG7(sp) + LONG_L $6, GDB_FR_REG6(sp) + LONG_L $5, GDB_FR_REG5(sp) + LONG_L $4, GDB_FR_REG4(sp) + LONG_L $3, GDB_FR_REG3(sp) + LONG_L $2, GDB_FR_REG2(sp) + LONG_L $1, GDB_FR_REG1(sp) +#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) + LONG_L k0, GDB_FR_EPC(sp) + LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */ + jr k0 + rfe +#else + LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */ + + .set mips3 + eret + .set mips0 +#endif + .set at + .set reorder + END(trap_low) + +LEAF(kgdb_read_byte) +4: lb t0, (a0) + sb t0, (a1) + li v0, 0 + jr ra + .section __ex_table,"a" + PTR 4b, kgdbfault + .previous + END(kgdb_read_byte) + +LEAF(kgdb_write_byte) +5: sb a0, (a1) + li v0, 0 + jr ra + .section __ex_table,"a" + PTR 5b, kgdbfault + .previous + END(kgdb_write_byte) + + .type kgdbfault@function + .ent kgdbfault + +kgdbfault: li v0, -EFAULT + jr ra + .end kgdbfault diff -Nurb linux-2.6.22-570/arch/mips/kernel/traps.c linux-2.6.22-591/arch/mips/kernel/traps.c --- linux-2.6.22-570/arch/mips/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/mips/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 @@ -10,6 +10,8 @@ * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com * Copyright (C) 2000, 01 MIPS Technologies, Inc. * Copyright (C) 2002, 2003, 2004, 2005 Maciej W. Rozycki + * + * KGDB specific changes - Manish Lachwani (mlachwani@mvista.com) */ #include #include @@ -21,6 +23,7 @@ #include #include #include +#include #include #include @@ -42,6 +45,7 @@ #include #include #include +#include extern asmlinkage void handle_int(void); extern asmlinkage void handle_tlbm(void); @@ -1445,6 +1449,11 @@ extern char except_vec4; unsigned long i; +#if defined(CONFIG_KGDB) + if (kgdb_early_setup) + return; /* Already done */ +#endif + if (cpu_has_veic || cpu_has_vint) ebase = (unsigned long) alloc_bootmem_low_pages (0x200 + VECTORSPACING*64); else diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile linux-2.6.22-591/arch/mips/mips-boards/atlas/Makefile --- linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mips-boards/atlas/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -17,4 +17,3 @@ # obj-y := atlas_int.o atlas_setup.o -obj-$(CONFIG_KGDB) += atlas_gdb.o diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_gdb.c --- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_gdb.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,97 +0,0 @@ -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License (Version 2) as - * published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * This is the interface to the remote debugger stub. - */ -#include -#include -#include - -#define INB(a) inb((unsigned long)a) -#define OUTB(x,a) outb(x,(unsigned long)a) - -/* - * This is the interface to the remote debugger stub - * if the Philips part is used for the debug port, - * called from the platform setup code. - */ -void *saa9730_base = (void *)ATLAS_SAA9730_REG; - -static int saa9730_kgdb_active = 0; - -#define SAA9730_BAUDCLOCK(baud) (((ATLAS_SAA9730_BAUDCLOCK/(baud))/16)-1) - -int saa9730_kgdb_hook(int speed) -{ - int baudclock; - t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR); - - /* - * Clear all interrupts - */ - (void) INB(&kgdb_uart->Lsr); - (void) INB(&kgdb_uart->Msr); - (void) INB(&kgdb_uart->Thr_Rbr); - (void) INB(&kgdb_uart->Iir_Fcr); - - /* - * Now, initialize the UART - */ - /* 8 data bits, one stop bit, no parity */ - OUTB(SAA9730_LCR_DATA8, &kgdb_uart->Lcr); - - baudclock = SAA9730_BAUDCLOCK(speed); - - OUTB((baudclock >> 16) & 0xff, &kgdb_uart->BaudDivMsb); - OUTB( baudclock & 0xff, &kgdb_uart->BaudDivLsb); - - /* Set RTS/DTR active */ - OUTB(SAA9730_MCR_DTR | SAA9730_MCR_RTS, &kgdb_uart->Mcr); - saa9730_kgdb_active = 1; - - return speed; -} - -int saa9730_putDebugChar(char c) -{ - t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR); - - if (!saa9730_kgdb_active) { /* need to init device first */ - return 0; - } - - while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_THRE)) - ; - OUTB(c, &kgdb_uart->Thr_Rbr); - - return 1; -} - -char saa9730_getDebugChar(void) -{ - t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR); - char c; - - if (!saa9730_kgdb_active) { /* need to init device first */ - return 0; - } - while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_DR)) - ; - - c = INB(&kgdb_uart->Thr_Rbr); - return(c); -} diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_setup.c --- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -37,10 +37,6 @@ extern void mips_time_init(void); extern unsigned long mips_rtc_get_time(void); -#ifdef CONFIG_KGDB -extern void kgdb_config(void); -#endif - static void __init serial_init(void); const char *get_system_type(void) @@ -58,9 +54,6 @@ serial_init (); -#ifdef CONFIG_KGDB - kgdb_config(); -#endif mips_reboot_setup(); board_time_init = mips_time_init; diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c linux-2.6.22-591/arch/mips/mips-boards/generic/gdb_hook.c --- linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mips-boards/generic/gdb_hook.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,133 +0,0 @@ -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License (Version 2) as - * published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * This is the interface to the remote debugger stub. - */ -#include -#include -#include -#include - -#include -#include - -static struct serial_state rs_table[] = { - SERIAL_PORT_DFNS /* Defined in serial.h */ -}; - -static struct async_struct kdb_port_info = {0}; - -int (*generic_putDebugChar)(char); -char (*generic_getDebugChar)(void); - -static __inline__ unsigned int serial_in(struct async_struct *info, int offset) -{ - return inb(info->port + offset); -} - -static __inline__ void serial_out(struct async_struct *info, int offset, - int value) -{ - outb(value, info->port+offset); -} - -int rs_kgdb_hook(int tty_no, int speed) { - int t; - struct serial_state *ser = &rs_table[tty_no]; - - kdb_port_info.state = ser; - kdb_port_info.magic = SERIAL_MAGIC; - kdb_port_info.port = ser->port; - kdb_port_info.flags = ser->flags; - - /* - * Clear all interrupts - */ - serial_in(&kdb_port_info, UART_LSR); - serial_in(&kdb_port_info, UART_RX); - serial_in(&kdb_port_info, UART_IIR); - serial_in(&kdb_port_info, UART_MSR); - - /* - * Now, initialize the UART - */ - serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ - if (kdb_port_info.flags & ASYNC_FOURPORT) { - kdb_port_info.MCR = UART_MCR_DTR | UART_MCR_RTS; - t = UART_MCR_DTR | UART_MCR_OUT1; - } else { - kdb_port_info.MCR - = UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2; - t = UART_MCR_DTR | UART_MCR_RTS; - } - - kdb_port_info.MCR = t; /* no interrupts, please */ - serial_out(&kdb_port_info, UART_MCR, kdb_port_info.MCR); - - /* - * and set the speed of the serial port - */ - if (speed == 0) - speed = 9600; - - t = kdb_port_info.state->baud_base / speed; - /* set DLAB */ - serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); - serial_out(&kdb_port_info, UART_DLL, t & 0xff);/* LS of divisor */ - serial_out(&kdb_port_info, UART_DLM, t >> 8); /* MS of divisor */ - /* reset DLAB */ - serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); - - return speed; -} - -int putDebugChar(char c) -{ - return generic_putDebugChar(c); -} - -char getDebugChar(void) -{ - return generic_getDebugChar(); -} - -int rs_putDebugChar(char c) -{ - - if (!kdb_port_info.state) { /* need to init device first */ - return 0; - } - - while ((serial_in(&kdb_port_info, UART_LSR) & UART_LSR_THRE) == 0) - ; - - serial_out(&kdb_port_info, UART_TX, c); - - return 1; -} - -char rs_getDebugChar(void) -{ - if (!kdb_port_info.state) { /* need to init device first */ - return 0; - } - - while (!(serial_in(&kdb_port_info, UART_LSR) & 1)) - ; - - return serial_in(&kdb_port_info, UART_RX); -} diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/init.c linux-2.6.22-591/arch/mips/mips-boards/generic/init.c --- linux-2.6.22-570/arch/mips/mips-boards/generic/init.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mips-boards/generic/init.c 2007-12-21 15:36:11.000000000 -0500 @@ -37,15 +37,6 @@ #include -#ifdef CONFIG_KGDB -extern int rs_kgdb_hook(int, int); -extern int rs_putDebugChar(char); -extern char rs_getDebugChar(void); -extern int saa9730_kgdb_hook(int); -extern int saa9730_putDebugChar(char); -extern char saa9730_getDebugChar(void); -#endif - int prom_argc; int *_prom_argv, *_prom_envp; @@ -173,59 +164,6 @@ } #endif -#ifdef CONFIG_KGDB -void __init kgdb_config (void) -{ - extern int (*generic_putDebugChar)(char); - extern char (*generic_getDebugChar)(void); - char *argptr; - int line, speed; - - argptr = prom_getcmdline(); - if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) { - argptr += strlen("kgdb=ttyS"); - if (*argptr != '0' && *argptr != '1') - printk("KGDB: Unknown serial line /dev/ttyS%c, " - "falling back to /dev/ttyS1\n", *argptr); - line = *argptr == '0' ? 0 : 1; - printk("KGDB: Using serial line /dev/ttyS%d for session\n", line); - - speed = 0; - if (*++argptr == ',') - { - int c; - while ((c = *++argptr) && ('0' <= c && c <= '9')) - speed = speed * 10 + c - '0'; - } -#ifdef CONFIG_MIPS_ATLAS - if (line == 1) { - speed = saa9730_kgdb_hook(speed); - generic_putDebugChar = saa9730_putDebugChar; - generic_getDebugChar = saa9730_getDebugChar; - } - else -#endif - { - speed = rs_kgdb_hook(line, speed); - generic_putDebugChar = rs_putDebugChar; - generic_getDebugChar = rs_getDebugChar; - } - - pr_info("KGDB: Using serial line /dev/ttyS%d at %d for " - "session, please connect your debugger\n", - line ? 1 : 0, speed); - - { - char *s; - for (s = "Please connect GDB to this port\r\n"; *s; ) - generic_putDebugChar (*s++); - } - - /* Breakpoint is invoked after interrupts are initialised */ - } -} -#endif - void __init mips_nmi_setup (void) { void *base; diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c linux-2.6.22-591/arch/mips/mips-boards/malta/malta_setup.c --- linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mips-boards/malta/malta_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -39,10 +39,6 @@ extern void mips_time_init(void); extern unsigned long mips_rtc_get_time(void); -#ifdef CONFIG_KGDB -extern void kgdb_config(void); -#endif - struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY }, { .name = "timer", .start = 0x40, .end = 0x5f, .flags = IORESOURCE_BUSY }, @@ -99,10 +95,6 @@ */ enable_dma(4); -#ifdef CONFIG_KGDB - kgdb_config (); -#endif - if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) { char *argptr; diff -Nurb linux-2.6.22-570/arch/mips/mm/extable.c linux-2.6.22-591/arch/mips/mm/extable.c --- linux-2.6.22-570/arch/mips/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 @@ -3,6 +3,7 @@ */ #include #include +#include #include #include @@ -16,6 +17,12 @@ return 1; } +#ifdef CONFIG_KGDB + if (atomic_read(&debugger_active) && kgdb_may_fault) + /* Restore our previous state. */ + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + /* Not reached. */ +#endif return 0; } diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile linux-2.6.22-591/arch/mips/momentum/ocelot_c/Makefile --- linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/momentum/ocelot_c/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -4,5 +4,3 @@ obj-y += cpci-irq.o irq.o platform.o prom.o reset.o \ setup.o uart-irq.o - -obj-$(CONFIG_KGDB) += dbg_io.o diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c linux-2.6.22-591/arch/mips/momentum/ocelot_c/dbg_io.c --- linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/momentum/ocelot_c/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,121 +0,0 @@ - -#include /* For the serial port location and base baud */ - -/* --- CONFIG --- */ - -typedef unsigned char uint8; -typedef unsigned int uint32; - -/* --- END OF CONFIG --- */ - -#define UART16550_BAUD_2400 2400 -#define UART16550_BAUD_4800 4800 -#define UART16550_BAUD_9600 9600 -#define UART16550_BAUD_19200 19200 -#define UART16550_BAUD_38400 38400 -#define UART16550_BAUD_57600 57600 -#define UART16550_BAUD_115200 115200 - -#define UART16550_PARITY_NONE 0 -#define UART16550_PARITY_ODD 0x08 -#define UART16550_PARITY_EVEN 0x18 -#define UART16550_PARITY_MARK 0x28 -#define UART16550_PARITY_SPACE 0x38 - -#define UART16550_DATA_5BIT 0x0 -#define UART16550_DATA_6BIT 0x1 -#define UART16550_DATA_7BIT 0x2 -#define UART16550_DATA_8BIT 0x3 - -#define UART16550_STOP_1BIT 0x0 -#define UART16550_STOP_2BIT 0x4 - -/* ----------------------------------------------------- */ - -/* === CONFIG === */ - -/* [jsun] we use the second serial port for kdb */ -#define BASE OCELOT_SERIAL1_BASE -#define MAX_BAUD OCELOT_BASE_BAUD - -/* === END OF CONFIG === */ - -#define REG_OFFSET 4 - -/* register offset */ -#define OFS_RCV_BUFFER 0 -#define OFS_TRANS_HOLD 0 -#define OFS_SEND_BUFFER 0 -#define OFS_INTR_ENABLE (1*REG_OFFSET) -#define OFS_INTR_ID (2*REG_OFFSET) -#define OFS_DATA_FORMAT (3*REG_OFFSET) -#define OFS_LINE_CONTROL (3*REG_OFFSET) -#define OFS_MODEM_CONTROL (4*REG_OFFSET) -#define OFS_RS232_OUTPUT (4*REG_OFFSET) -#define OFS_LINE_STATUS (5*REG_OFFSET) -#define OFS_MODEM_STATUS (6*REG_OFFSET) -#define OFS_RS232_INPUT (6*REG_OFFSET) -#define OFS_SCRATCH_PAD (7*REG_OFFSET) - -#define OFS_DIVISOR_LSB (0*REG_OFFSET) -#define OFS_DIVISOR_MSB (1*REG_OFFSET) - - -/* memory-mapped read/write of the port */ -#define UART16550_READ(y) (*((volatile uint8*)(BASE + y))) -#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z) - -void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) -{ - /* disable interrupts */ - UART16550_WRITE(OFS_INTR_ENABLE, 0); - - /* set up baud rate */ - { - uint32 divisor; - - /* set DIAB bit */ - UART16550_WRITE(OFS_LINE_CONTROL, 0x80); - - /* set divisor */ - divisor = MAX_BAUD / baud; - UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff); - UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8); - - /* clear DIAB bit */ - UART16550_WRITE(OFS_LINE_CONTROL, 0x0); - } - - /* set data format */ - UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop); -} - -static int remoteDebugInitialized = 0; - -uint8 getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_38400, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, UART16550_STOP_1BIT); - } - - while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0); - return UART16550_READ(OFS_RCV_BUFFER); -} - - -int putDebugChar(uint8 byte) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(UART16550_BAUD_38400, - UART16550_DATA_8BIT, - UART16550_PARITY_NONE, UART16550_STOP_1BIT); - } - - while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0); - UART16550_WRITE(OFS_SEND_BUFFER, byte); - return 1; -} diff -Nurb linux-2.6.22-570/arch/mips/pci/fixup-atlas.c linux-2.6.22-591/arch/mips/pci/fixup-atlas.c --- linux-2.6.22-570/arch/mips/pci/fixup-atlas.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/pci/fixup-atlas.c 2007-12-21 15:36:11.000000000 -0500 @@ -68,24 +68,3 @@ { return 0; } - -#ifdef CONFIG_KGDB -/* - * The PCI scan may have moved the saa9730 I/O address, so reread - * the address here. - * This does mean that it's not possible to debug the PCI bus configuration - * code, but it is better than nothing... - */ - -static void atlas_saa9730_base_fixup (struct pci_dev *pdev) -{ - extern void *saa9730_base; - if (pdev->bus == 0 && PCI_SLOT(pdev->devfn) == 19) - (void) pci_read_config_dword (pdev, 0x14, (u32 *)&saa9730_base); - printk ("saa9730_base = %x\n", saa9730_base); -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PHILIPS, PCI_DEVICE_ID_PHILIPS_SAA9730, - atlas_saa9730_base_fixup); - -#endif diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile linux-2.6.22-591/arch/mips/philips/pnx8550/common/Makefile --- linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -24,4 +24,3 @@ obj-y := setup.o prom.o int.o reset.o time.o proc.o platform.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_KGDB) += gdb_hook.o diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c linux-2.6.22-591/arch/mips/philips/pnx8550/common/gdb_hook.c --- linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/gdb_hook.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,109 +0,0 @@ -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * - * ######################################################################## - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License (Version 2) as - * published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * ######################################################################## - * - * This is the interface to the remote debugger stub. - * - */ -#include -#include -#include -#include -#include - -#include -#include - -#include - -static struct serial_state rs_table[IP3106_NR_PORTS] = { -}; -static struct async_struct kdb_port_info = {0}; - -void rs_kgdb_hook(int tty_no) -{ - struct serial_state *ser = &rs_table[tty_no]; - - kdb_port_info.state = ser; - kdb_port_info.magic = SERIAL_MAGIC; - kdb_port_info.port = tty_no; - kdb_port_info.flags = ser->flags; - - /* - * Clear all interrupts - */ - /* Clear all the transmitter FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_TX_RST; - /* Clear all the receiver FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_RX_RST; - /* Clear all interrupts */ - ip3106_iclr(UART_BASE, tty_no) = IP3106_UART_INT_ALLRX | - IP3106_UART_INT_ALLTX; - - /* - * Now, initialize the UART - */ - ip3106_lcr(UART_BASE, tty_no) = IP3106_UART_LCR_8BIT; - ip3106_baud(UART_BASE, tty_no) = 5; // 38400 Baud -} - -int putDebugChar(char c) -{ - /* Wait until FIFO not full */ - while (((ip3106_fifo(UART_BASE, kdb_port_info.port) & IP3106_UART_FIFO_TXFIFO) >> 16) >= 16) - ; - /* Send one char */ - ip3106_fifo(UART_BASE, kdb_port_info.port) = c; - - return 1; -} - -char getDebugChar(void) -{ - char ch; - - /* Wait until there is a char in the FIFO */ - while (!((ip3106_fifo(UART_BASE, kdb_port_info.port) & - IP3106_UART_FIFO_RXFIFO) >> 8)) - ; - /* Read one char */ - ch = ip3106_fifo(UART_BASE, kdb_port_info.port) & - IP3106_UART_FIFO_RBRTHR; - /* Advance the RX FIFO read pointer */ - ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_NEXT; - return (ch); -} - -void rs_disable_debug_interrupts(void) -{ - ip3106_ien(UART_BASE, kdb_port_info.port) = 0; /* Disable all interrupts */ -} - -void rs_enable_debug_interrupts(void) -{ - /* Clear all the transmitter FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_TX_RST; - /* Clear all the receiver FIFO counters (pointer and status) */ - ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_RST; - /* Clear all interrupts */ - ip3106_iclr(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX | - IP3106_UART_INT_ALLTX; - ip3106_ien(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX; /* Enable RX interrupts */ -} diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c linux-2.6.22-591/arch/mips/philips/pnx8550/common/setup.c --- linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -145,16 +145,5 @@ ip3106_baud(UART_BASE, pnx8550_console_port) = 5; } -#ifdef CONFIG_KGDB - argptr = prom_getcmdline(); - if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) { - int line; - argptr += strlen("kgdb=ttyS"); - line = *argptr == '0' ? 0 : 1; - rs_kgdb_hook(line); - pr_info("KGDB: Using ttyS%i for session, " - "please connect your debugger\n", line ? 1 : 0); - } -#endif return; } diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/Makefile --- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -4,5 +4,4 @@ obj-y += irq.o i2c-yosemite.o prom.o py-console.o setup.o -obj-$(CONFIG_KGDB) += dbg_io.o obj-$(CONFIG_SMP) += smp.o diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/dbg_io.c --- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,180 +0,0 @@ -/* - * Copyright 2003 PMC-Sierra - * Author: Manish Lachwani (lachwani@pmc-sierra.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Support for KGDB for the Yosemite board. We make use of single serial - * port to be used for KGDB as well as console. The second serial port - * seems to be having a problem. Single IRQ is allocated for both the - * ports. Hence, the interrupt routing code needs to figure out whether - * the interrupt came from channel A or B. - */ - -#include - -/* - * Baud rate, Parity, Data and Stop bit settings for the - * serial port on the Yosemite. Note that the Early printk - * patch has been added. So, we should be all set to go - */ -#define YOSEMITE_BAUD_2400 2400 -#define YOSEMITE_BAUD_4800 4800 -#define YOSEMITE_BAUD_9600 9600 -#define YOSEMITE_BAUD_19200 19200 -#define YOSEMITE_BAUD_38400 38400 -#define YOSEMITE_BAUD_57600 57600 -#define YOSEMITE_BAUD_115200 115200 - -#define YOSEMITE_PARITY_NONE 0 -#define YOSEMITE_PARITY_ODD 0x08 -#define YOSEMITE_PARITY_EVEN 0x18 -#define YOSEMITE_PARITY_MARK 0x28 -#define YOSEMITE_PARITY_SPACE 0x38 - -#define YOSEMITE_DATA_5BIT 0x0 -#define YOSEMITE_DATA_6BIT 0x1 -#define YOSEMITE_DATA_7BIT 0x2 -#define YOSEMITE_DATA_8BIT 0x3 - -#define YOSEMITE_STOP_1BIT 0x0 -#define YOSEMITE_STOP_2BIT 0x4 - -/* This is crucial */ -#define SERIAL_REG_OFS 0x1 - -#define SERIAL_RCV_BUFFER 0x0 -#define SERIAL_TRANS_HOLD 0x0 -#define SERIAL_SEND_BUFFER 0x0 -#define SERIAL_INTR_ENABLE (1 * SERIAL_REG_OFS) -#define SERIAL_INTR_ID (2 * SERIAL_REG_OFS) -#define SERIAL_DATA_FORMAT (3 * SERIAL_REG_OFS) -#define SERIAL_LINE_CONTROL (3 * SERIAL_REG_OFS) -#define SERIAL_MODEM_CONTROL (4 * SERIAL_REG_OFS) -#define SERIAL_RS232_OUTPUT (4 * SERIAL_REG_OFS) -#define SERIAL_LINE_STATUS (5 * SERIAL_REG_OFS) -#define SERIAL_MODEM_STATUS (6 * SERIAL_REG_OFS) -#define SERIAL_RS232_INPUT (6 * SERIAL_REG_OFS) -#define SERIAL_SCRATCH_PAD (7 * SERIAL_REG_OFS) - -#define SERIAL_DIVISOR_LSB (0 * SERIAL_REG_OFS) -#define SERIAL_DIVISOR_MSB (1 * SERIAL_REG_OFS) - -/* - * Functions to READ and WRITE to serial port 0 - */ -#define SERIAL_READ(ofs) (*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE + ofs))) - -#define SERIAL_WRITE(ofs, val) ((*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE + ofs))) = val) - -/* - * Functions to READ and WRITE to serial port 1 - */ -#define SERIAL_READ_1(ofs) (*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE_1 + ofs))) - -#define SERIAL_WRITE_1(ofs, val) ((*((volatile unsigned char*) \ - (TITAN_SERIAL_BASE_1 + ofs))) = val) - -/* - * Second serial port initialization - */ -void init_second_port(void) -{ - /* Disable Interrupts */ - SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0); - SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0x0); - - { - unsigned int divisor; - - SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x80); - divisor = TITAN_SERIAL_BASE_BAUD / YOSEMITE_BAUD_115200; - SERIAL_WRITE_1(SERIAL_DIVISOR_LSB, divisor & 0xff); - - SERIAL_WRITE_1(SERIAL_DIVISOR_MSB, - (divisor & 0xff00) >> 8); - SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0); - } - - SERIAL_WRITE_1(SERIAL_DATA_FORMAT, YOSEMITE_DATA_8BIT | - YOSEMITE_PARITY_NONE | YOSEMITE_STOP_1BIT); - - /* Enable Interrupts */ - SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0xf); -} - -/* Initialize the serial port for KGDB debugging */ -void debugInit(unsigned int baud, unsigned char data, unsigned char parity, - unsigned char stop) -{ - /* Disable Interrupts */ - SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0); - SERIAL_WRITE(SERIAL_INTR_ENABLE, 0x0); - - { - unsigned int divisor; - - SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x80); - - divisor = TITAN_SERIAL_BASE_BAUD / baud; - SERIAL_WRITE(SERIAL_DIVISOR_LSB, divisor & 0xff); - - SERIAL_WRITE(SERIAL_DIVISOR_MSB, (divisor & 0xff00) >> 8); - SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0); - } - - SERIAL_WRITE(SERIAL_DATA_FORMAT, data | parity | stop); -} - -static int remoteDebugInitialized = 0; - -unsigned char getDebugChar(void) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(YOSEMITE_BAUD_115200, - YOSEMITE_DATA_8BIT, - YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT); - } - - while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x1) == 0); - return SERIAL_READ(SERIAL_RCV_BUFFER); -} - -int putDebugChar(unsigned char byte) -{ - if (!remoteDebugInitialized) { - remoteDebugInitialized = 1; - debugInit(YOSEMITE_BAUD_115200, - YOSEMITE_DATA_8BIT, - YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT); - } - - while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x20) == 0); - SERIAL_WRITE(SERIAL_SEND_BUFFER, byte); - - return 1; -} diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/irq.c --- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/irq.c 2007-12-21 15:36:11.000000000 -0500 @@ -137,10 +137,6 @@ } } -#ifdef CONFIG_KGDB -extern void init_second_port(void); -#endif - /* * Initialize the next level interrupt handler */ @@ -152,11 +148,6 @@ rm7k_cpu_irq_init(); rm9k_cpu_irq_init(); -#ifdef CONFIG_KGDB - /* At this point, initialize the second serial port */ - init_second_port(); -#endif - #ifdef CONFIG_GDB_CONSOLE register_gdb_console(); #endif diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c linux-2.6.22-591/arch/mips/sgi-ip22/ip22-setup.c --- linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sgi-ip22/ip22-setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -101,30 +101,6 @@ add_preferred_console("arc", 0, NULL); } -#ifdef CONFIG_KGDB - { - char *kgdb_ttyd = prom_getcmdline(); - - if ((kgdb_ttyd = strstr(kgdb_ttyd, "kgdb=ttyd")) != NULL) { - int line; - kgdb_ttyd += strlen("kgdb=ttyd"); - if (*kgdb_ttyd != '1' && *kgdb_ttyd != '2') - printk(KERN_INFO "KGDB: Uknown serial line /dev/ttyd%c" - ", falling back to /dev/ttyd1\n", *kgdb_ttyd); - line = *kgdb_ttyd == '2' ? 0 : 1; - printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for " - "session\n", line ? 1 : 2); - rs_kgdb_hook(line); - - printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for " - "session, please connect your debugger\n", line ? 1:2); - - kgdb_enabled = 1; - /* Breakpoints and stuff are in sgi_irq_setup() */ - } - } -#endif - #if defined(CONFIG_VT) && defined(CONFIG_SGI_NEWPORT_CONSOLE) { ULONG *gfxinfo; diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/Makefile linux-2.6.22-591/arch/mips/sgi-ip27/Makefile --- linux-2.6.22-570/arch/mips/sgi-ip27/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sgi-ip27/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -7,5 +7,4 @@ ip27-xtalk.o obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o -obj-$(CONFIG_KGDB) += ip27-dbgio.o obj-$(CONFIG_SMP) += ip27-smp.o diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c linux-2.6.22-591/arch/mips/sgi-ip27/ip27-dbgio.c --- linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sgi-ip27/ip27-dbgio.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,60 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright 2004 Ralf Baechle - */ -#include -#include -#include -#include -#include - -#include -#include -#include - -#define IOC3_CLK (22000000 / 3) -#define IOC3_FLAGS (0) - -static inline struct ioc3_uartregs *console_uart(void) -{ - struct ioc3 *ioc3; - - ioc3 = (struct ioc3 *)KL_CONFIG_CH_CONS_INFO(get_nasid())->memory_base; - - return &ioc3->sregs.uarta; -} - -unsigned char getDebugChar(void) -{ - struct ioc3_uartregs *uart = console_uart(); - - while ((uart->iu_lsr & UART_LSR_DR) == 0); - return uart->iu_rbr; -} - -void putDebugChar(unsigned char c) -{ - struct ioc3_uartregs *uart = console_uart(); - - while ((uart->iu_lsr & UART_LSR_THRE) == 0); - uart->iu_thr = c; -} diff -Nurb linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c linux-2.6.22-591/arch/mips/sibyte/bcm1480/irq.c --- linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sibyte/bcm1480/irq.c 2007-12-21 15:36:11.000000000 -0500 @@ -57,30 +57,6 @@ extern unsigned long ht_eoi_space; #endif -#ifdef CONFIG_KGDB -#include -extern void breakpoint(void); -static int kgdb_irq; -#ifdef CONFIG_GDB_CONSOLE -extern void register_gdb_console(void); -#endif - -/* kgdb is on when configured. Pass "nokgdb" kernel arg to turn it off */ -static int kgdb_flag = 1; -static int __init nokgdb(char *str) -{ - kgdb_flag = 0; - return 1; -} -__setup("nokgdb", nokgdb); - -/* Default to UART1 */ -int kgdb_port = 1; -#ifdef CONFIG_SIBYTE_SB1250_DUART -extern char sb1250_duart_present[]; -#endif -#endif - static struct irq_chip bcm1480_irq_type = { .name = "BCM1480-IMR", .ack = ack_bcm1480_irq, @@ -394,62 +370,11 @@ * does its own management of IP7. */ -#ifdef CONFIG_KGDB - imask |= STATUSF_IP6; -#endif /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); -#ifdef CONFIG_KGDB - if (kgdb_flag) { - kgdb_irq = K_BCM1480_INT_UART_0 + kgdb_port; - -#ifdef CONFIG_SIBYTE_SB1250_DUART - sb1250_duart_present[kgdb_port] = 0; -#endif - /* Setup uart 1 settings, mapper */ - /* QQQ FIXME */ - __raw_writeq(M_DUART_IMR_BRK, IO_SPACE_BASE + A_DUART_IMRREG(kgdb_port)); - - bcm1480_steal_irq(kgdb_irq); - __raw_writeq(IMR_IP6_VAL, - IO_SPACE_BASE + A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) + - (kgdb_irq<<3)); - bcm1480_unmask_irq(0, kgdb_irq); - -#ifdef CONFIG_GDB_CONSOLE - register_gdb_console(); -#endif - printk("Waiting for GDB on UART port %d\n", kgdb_port); - set_debug_traps(); - breakpoint(); - } -#endif } -#ifdef CONFIG_KGDB - -#include - -#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) -#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) - -static void bcm1480_kgdb_interrupt(void) -{ - /* - * Clear break-change status (allow some time for the remote - * host to stop the break, since we would see another - * interrupt on the end-of-break too) - */ - kstat.irqs[smp_processor_id()][kgdb_irq]++; - mdelay(500); - duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | - M_DUART_RX_EN | M_DUART_TX_EN); - set_async_breakpoint(&get_irq_regs()->cp0_epc); -} - -#endif /* CONFIG_KGDB */ - extern void bcm1480_timer_interrupt(void); extern void bcm1480_mailbox_interrupt(void); @@ -478,11 +403,6 @@ bcm1480_mailbox_interrupt(); #endif -#ifdef CONFIG_KGDB - else if (pending & CAUSEF_IP6) - bcm1480_kgdb_interrupt(); /* KGDB (uart 1) */ -#endif - else if (pending & CAUSEF_IP2) { unsigned long long mask_h, mask_l; unsigned long base; diff -Nurb linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c linux-2.6.22-591/arch/mips/sibyte/cfe/setup.c --- linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sibyte/cfe/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -58,10 +58,6 @@ extern unsigned long initrd_start, initrd_end; #endif -#ifdef CONFIG_KGDB -extern int kgdb_port; -#endif - static void ATTRIB_NORET cfe_linux_exit(void *arg) { int warm = *(int *)arg; @@ -242,9 +238,6 @@ int argc = fw_arg0; char **envp = (char **) fw_arg2; int *prom_vec = (int *) fw_arg3; -#ifdef CONFIG_KGDB - char *arg; -#endif _machine_restart = cfe_linux_restart; _machine_halt = cfe_linux_halt; @@ -308,13 +301,6 @@ } } -#ifdef CONFIG_KGDB - if ((arg = strstr(arcs_cmdline,"kgdb=duart")) != NULL) - kgdb_port = (arg[10] == '0') ? 0 : 1; - else - kgdb_port = 1; -#endif - #ifdef CONFIG_BLK_DEV_INITRD { char *ptr; diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile linux-2.6.22-591/arch/mips/sibyte/sb1250/Makefile --- linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sibyte/sb1250/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -3,3 +3,4 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SIBYTE_STANDALONE) += prom.o obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o +obj-$(CONFIG_KGDB_SIBYTE) += kgdb_sibyte.o diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c linux-2.6.22-591/arch/mips/sibyte/sb1250/irq.c --- linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sibyte/sb1250/irq.c 2007-12-21 15:36:11.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -56,16 +57,6 @@ extern unsigned long ldt_eoi_space; #endif -#ifdef CONFIG_KGDB -static int kgdb_irq; - -/* Default to UART1 */ -int kgdb_port = 1; -#ifdef CONFIG_SIBYTE_SB1250_DUART -extern char sb1250_duart_present[]; -#endif -#endif - static struct irq_chip sb1250_irq_type = { .name = "SB1250-IMR", .ack = ack_sb1250_irq, @@ -304,6 +295,11 @@ unsigned int imask = STATUSF_IP4 | STATUSF_IP3 | STATUSF_IP2 | STATUSF_IP1 | STATUSF_IP0; +#ifdef CONFIG_KGDB + if (kgdb_early_setup) + return; +#endif + /* Default everything to IP2 */ for (i = 0; i < SB1250_NR_IRQS; i++) { /* was I0 */ __raw_writeq(IMR_IP2_VAL, @@ -349,58 +345,16 @@ * does its own management of IP7. */ -#ifdef CONFIG_KGDB +#ifdef CONFIG_KGDB_SIBYTE imask |= STATUSF_IP6; #endif /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); - -#ifdef CONFIG_KGDB - if (kgdb_flag) { - kgdb_irq = K_INT_UART_0 + kgdb_port; - -#ifdef CONFIG_SIBYTE_SB1250_DUART - sb1250_duart_present[kgdb_port] = 0; -#endif - /* Setup uart 1 settings, mapper */ - __raw_writeq(M_DUART_IMR_BRK, - IOADDR(A_DUART_IMRREG(kgdb_port))); - - sb1250_steal_irq(kgdb_irq); - __raw_writeq(IMR_IP6_VAL, - IOADDR(A_IMR_REGISTER(0, - R_IMR_INTERRUPT_MAP_BASE) + - (kgdb_irq << 3))); - sb1250_unmask_irq(0, kgdb_irq); - } -#endif } -#ifdef CONFIG_KGDB - -#include - -#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) -#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) - -static void sb1250_kgdb_interrupt(void) -{ - /* - * Clear break-change status (allow some time for the remote - * host to stop the break, since we would see another - * interrupt on the end-of-break too) - */ - kstat_this_cpu.irqs[kgdb_irq]++; - mdelay(500); - duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | - M_DUART_RX_EN | M_DUART_TX_EN); - set_async_breakpoint(&get_irq_regs()->cp0_epc); -} - -#endif /* CONFIG_KGDB */ - extern void sb1250_timer_interrupt(void); extern void sb1250_mailbox_interrupt(void); +extern void sb1250_kgdb_interrupt(void); asmlinkage void plat_irq_dispatch(void) { @@ -437,7 +391,7 @@ sb1250_mailbox_interrupt(); #endif -#ifdef CONFIG_KGDB +#ifdef CONFIG_KGDB_SIBYTE else if (pending & CAUSEF_IP6) /* KGDB (uart 1) */ sb1250_kgdb_interrupt(); #endif diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c linux-2.6.22-591/arch/mips/sibyte/sb1250/kgdb_sibyte.c --- linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/mips/sibyte/sb1250/kgdb_sibyte.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,144 @@ +/* + * arch/mips/sibyte/sb1250/kgdb_sibyte.c + * + * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com + * + * 2004 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +/* + * Support for KGDB on the Broadcom Sibyte. The SWARM board + * for example does not have a 8250/16550 compatible serial + * port. Hence, we need to have a driver for the serial + * ports to handle KGDB. This board needs nothing in addition + * to what is normally provided by the gdb portion of the stub. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int kgdb_port = 1; +static int kgdb_irq; + +extern char sb1250_duart_present[]; +extern int sb1250_steal_irq(int irq); + +/* Forward declarations. */ +static void kgdbsibyte_init_duart(void); +static int kgdb_init_io(void); + +#define IMR_IP6_VAL K_INT_MAP_I4 +#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) + +static void kgdbsibyte_write_char(u8 c) +{ + while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0) ; + duart_out(R_DUART_TX_HOLD, c); +} + +static int kgdbsibyte_read_char(void) +{ + int ret_char; + unsigned int status; + + do { + status = duart_in(R_DUART_STATUS); + } while ((status & M_DUART_RX_RDY) == 0); + + /* + * Check for framing error + */ + if (status & M_DUART_FRM_ERR) { + kgdbsibyte_init_duart(); + kgdbsibyte_write_char('-'); + return '-'; + } + + ret_char = duart_in(R_DUART_RX_HOLD); + + return ret_char; +} + +void sb1250_kgdb_interrupt(void) +{ + int kgdb_irq = K_INT_UART_0 + kgdb_port; + + /* + * Clear break-change status (allow some time for the remote + * host to stop the break, since we would see another + * interrupt on the end-of-break too) + */ + kstat_this_cpu.irqs[kgdb_irq]++; + mdelay(500); + duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | + M_DUART_RX_EN | M_DUART_TX_EN); + breakpoint(); + +} + +/* + * We use port #1 and we set it for 115200 BAUD, 8n1. + */ +static void kgdbsibyte_init_duart(void) +{ + /* Set 8n1. */ + duart_out(R_DUART_MODE_REG_1, + V_DUART_BITS_PER_CHAR_8 | V_DUART_PARITY_MODE_NONE); + duart_out(R_DUART_MODE_REG_2, M_DUART_STOP_BIT_LEN_1); + /* Set baud rate of 115200. */ + duart_out(R_DUART_CLK_SEL, V_DUART_BAUD_RATE(115200)); + /* Enable rx and tx */ + duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN); +} + +static int kgdb_init_io(void) +{ +#ifdef CONFIG_SIBYTE_SB1250_DUART + sb1250_duart_present[kgdb_port] = 0; +#endif + + kgdbsibyte_init_duart(); + + return 0; +} + +/* + * Hookup our IRQ line. We will already have been initialized a + * this point. + */ +static void __init kgdbsibyte_hookup_irq(void) +{ + /* Steal the IRQ. */ + kgdb_irq = K_INT_UART_0 + kgdb_port; + + /* Setup uart 1 settings, mapper */ + __raw_writeq(M_DUART_IMR_BRK, IOADDR(A_DUART_IMRREG(kgdb_port))); + + sb1250_steal_irq(kgdb_irq); + + __raw_writeq(IMR_IP6_VAL, + IOADDR(A_IMR_REGISTER(0, R_IMR_INTERRUPT_MAP_BASE) + + (kgdb_irq << 3))); + + sb1250_unmask_irq(0, kgdb_irq); +} + +struct kgdb_io kgdb_io_ops = { + .read_char = kgdbsibyte_read_char, + .write_char = kgdbsibyte_write_char, + .init = kgdb_init_io, + .late_init = kgdbsibyte_hookup_irq, +}; diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile linux-2.6.22-591/arch/mips/sibyte/swarm/Makefile --- linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sibyte/swarm/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -1,3 +1 @@ lib-y = setup.o rtc_xicor1241.o rtc_m41t81.o - -lib-$(CONFIG_KGDB) += dbg_io.o diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c linux-2.6.22-591/arch/mips/sibyte/swarm/dbg_io.c --- linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/sibyte/swarm/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,76 +0,0 @@ -/* - * kgdb debug routines for SiByte boards. - * - * Copyright (C) 2001 MontaVista Software Inc. - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -/* -------------------- BEGINNING OF CONFIG --------------------- */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * We use the second serial port for kgdb traffic. - * 115200, 8, N, 1. - */ - -#define BAUD_RATE 115200 -#define CLK_DIVISOR V_DUART_BAUD_RATE(BAUD_RATE) -#define DATA_BITS V_DUART_BITS_PER_CHAR_8 /* or 7 */ -#define PARITY V_DUART_PARITY_MODE_NONE /* or even */ -#define STOP_BITS M_DUART_STOP_BIT_LEN_1 /* or 2 */ - -static int duart_initialized = 0; /* 0: need to be init'ed by kgdb */ - -/* -------------------- END OF CONFIG --------------------- */ -extern int kgdb_port; - -#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) -#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) - -void putDebugChar(unsigned char c); -unsigned char getDebugChar(void); -static void -duart_init(int clk_divisor, int data, int parity, int stop) -{ - duart_out(R_DUART_MODE_REG_1, data | parity); - duart_out(R_DUART_MODE_REG_2, stop); - duart_out(R_DUART_CLK_SEL, clk_divisor); - - duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN); /* enable rx and tx */ -} - -void -putDebugChar(unsigned char c) -{ - if (!duart_initialized) { - duart_initialized = 1; - duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS); - } - while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0); - duart_out(R_DUART_TX_HOLD, c); -} - -unsigned char -getDebugChar(void) -{ - if (!duart_initialized) { - duart_initialized = 1; - duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS); - } - while ((duart_in(R_DUART_STATUS) & M_DUART_RX_RDY) == 0) ; - return duart_in(R_DUART_RX_HOLD); -} - diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/Makefile linux-2.6.22-591/arch/mips/tx4927/common/Makefile --- linux-2.6.22-570/arch/mips/tx4927/common/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/tx4927/common/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -9,4 +9,3 @@ obj-y += tx4927_prom.o tx4927_setup.o tx4927_irq.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o -obj-$(CONFIG_KGDB) += tx4927_dbgio.o diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c linux-2.6.22-591/arch/mips/tx4927/common/tx4927_dbgio.c --- linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/tx4927/common/tx4927_dbgio.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,47 +0,0 @@ -/* - * linux/arch/mips/tx4927/common/tx4927_dbgio.c - * - * kgdb interface for gdb - * - * Author: MontaVista Software, Inc. - * source@mvista.com - * - * Copyright 2001-2002 MontaVista Software Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include - -u8 getDebugChar(void) -{ - extern u8 txx9_sio_kdbg_rd(void); - return (txx9_sio_kdbg_rd()); -} - - -int putDebugChar(u8 byte) -{ - extern int txx9_sio_kdbg_wr( u8 ch ); - return (txx9_sio_kdbg_wr(byte)); -} diff -Nurb linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c linux-2.6.22-591/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c --- linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -76,7 +76,7 @@ #include #include #endif -#ifdef CONFIG_SERIAL_TXX9 +#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) #include #include #include @@ -973,9 +973,10 @@ #endif /* CONFIG_PCI */ -#ifdef CONFIG_SERIAL_TXX9 +#if defined (CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) { extern int early_serial_txx9_setup(struct uart_port *port); + extern int txx9_kgdb_add_port(int n, struct uart_port *port); int i; struct uart_port req; for(i = 0; i < 2; i++) { @@ -987,7 +988,12 @@ req.irq = 32 + i; req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; req.uartclk = 50000000; +#ifdef CONFIG_SERIAL_TXX9 early_serial_txx9_setup(&req); +#endif +#ifdef CONFIG_KGDB_TXX9 + txx9_kgdb_add_port(i, &req); +#endif } } #ifdef CONFIG_SERIAL_TXX9_CONSOLE @@ -996,7 +1002,7 @@ strcat(argptr, " console=ttyS0,38400"); } #endif -#endif +#endif /* defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) */ #ifdef CONFIG_ROOT_NFS argptr = prom_getcmdline(); diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/Makefile linux-2.6.22-591/arch/mips/tx4938/common/Makefile --- linux-2.6.22-570/arch/mips/tx4938/common/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/tx4938/common/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -7,5 +7,4 @@ # obj-y += prom.o setup.o irq.o rtc_rx5c348.o -obj-$(CONFIG_KGDB) += dbgio.o diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c linux-2.6.22-591/arch/mips/tx4938/common/dbgio.c --- linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/tx4938/common/dbgio.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,50 +0,0 @@ -/* - * linux/arch/mips/tx4938/common/dbgio.c - * - * kgdb interface for gdb - * - * Author: MontaVista Software, Inc. - * source@mvista.com - * - * Copyright 2005 MontaVista Software Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Support for TX4938 in 2.6 - Hiroshi DOYU - */ - -#include -#include -#include - -extern u8 txx9_sio_kdbg_rd(void); -extern int txx9_sio_kdbg_wr( u8 ch ); - -u8 getDebugChar(void) -{ - return (txx9_sio_kdbg_rd()); -} - -int putDebugChar(u8 byte) -{ - return (txx9_sio_kdbg_wr(byte)); -} - diff -Nurb linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c linux-2.6.22-591/arch/mips/tx4938/toshiba_rbtx4938/setup.c --- linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/mips/tx4938/toshiba_rbtx4938/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -30,7 +30,7 @@ #include #include #include -#ifdef CONFIG_SERIAL_TXX9 +#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) #include #include #include @@ -924,9 +924,10 @@ set_io_port_base(RBTX4938_ETHER_BASE); #endif -#ifdef CONFIG_SERIAL_TXX9 +#if defined (CONFIG_SERIAL_TXX9) || defined (CONFIG_KGDB_TXX9) { extern int early_serial_txx9_setup(struct uart_port *port); + extern int txx9_kgdb_add_port(int n, struct uart_port *port); int i; struct uart_port req; for(i = 0; i < 2; i++) { @@ -938,7 +939,12 @@ req.irq = 32 + i; req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; req.uartclk = 50000000; +#ifdef CONFIG_SERIAL_TXX9 early_serial_txx9_setup(&req); +#endif +#ifdef CONFIG_KGDB_TXX9 + txx9_kgdb_add_port(i, &req); +#endif } } #ifdef CONFIG_SERIAL_TXX9_CONSOLE diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig linux-2.6.22-591/arch/powerpc/Kconfig --- linux-2.6.22-570/arch/powerpc/Kconfig 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -4,12 +4,7 @@ mainmenu "Linux/PowerPC Kernel Configuration" -config PPC64 - bool "64-bit kernel" - default n - help - This option selects whether a 32-bit or a 64-bit kernel - will be built. +source "arch/powerpc/platforms/Kconfig.cputype" config PPC_PM_NEEDS_RTC_LIB bool @@ -132,123 +127,6 @@ depends on PPC64 && (BROKEN || (PPC_PMAC64 && EXPERIMENTAL)) default y -menu "Processor support" -choice - prompt "Processor Type" - depends on PPC32 - default 6xx - -config CLASSIC32 - bool "52xx/6xx/7xx/74xx" - select PPC_FPU - select 6xx - help - There are four families of PowerPC chips supported. The more common - types (601, 603, 604, 740, 750, 7400), the Motorola embedded - versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC - embedded versions (403 and 405) and the high end 64 bit Power - processors (POWER 3, POWER4, and IBM PPC970 also known as G5). - - This option is the catch-all for 6xx types, including some of the - embedded versions. Unless there is see an option for the specific - chip family you are using, you want this option. - - You do not want this if you are building a kernel for a 64 bit - IBM RS/6000 or an Apple G5, choose 6xx. - - If unsure, select this option - - Note that the kernel runs in 32-bit mode even on 64-bit chips. - -config PPC_82xx - bool "Freescale 82xx" - select 6xx - select PPC_FPU - -config PPC_83xx - bool "Freescale 83xx" - select 6xx - select FSL_SOC - select 83xx - select PPC_FPU - select WANT_DEVICE_TREE - -config PPC_85xx - bool "Freescale 85xx" - select E500 - select FSL_SOC - select 85xx - select WANT_DEVICE_TREE - -config PPC_86xx - bool "Freescale 86xx" - select 6xx - select FSL_SOC - select FSL_PCIE - select PPC_FPU - select ALTIVEC - help - The Freescale E600 SoCs have 74xx cores. - -config PPC_8xx - bool "Freescale 8xx" - select FSL_SOC - select 8xx - -config 40x - bool "AMCC 40x" - select PPC_DCR_NATIVE - -config 44x - bool "AMCC 44x" - select PPC_DCR_NATIVE - select WANT_DEVICE_TREE - -config E200 - bool "Freescale e200" - -endchoice - -config POWER4_ONLY - bool "Optimize for POWER4" - depends on PPC64 - default n - ---help--- - Cause the compiler to optimize for POWER4/POWER5/PPC970 processors. - The resulting binary will not work on POWER3 or RS64 processors - when compiled with binutils 2.15 or later. - -config POWER3 - bool - depends on PPC64 - default y if !POWER4_ONLY - -config POWER4 - depends on PPC64 - def_bool y - -config 6xx - bool - -# this is temp to handle compat with arch=ppc -config 8xx - bool - -# this is temp to handle compat with arch=ppc -config 83xx - bool - -# this is temp to handle compat with arch=ppc -config 85xx - bool - -config E500 - bool - -config PPC_FPU - bool - default y if PPC64 - config PPC_DCR_NATIVE bool default n @@ -267,134 +145,6 @@ depends on PPC64 # not supported on 32 bits yet default n -config 4xx - bool - depends on 40x || 44x - default y - -config BOOKE - bool - depends on E200 || E500 || 44x - default y - -config FSL_BOOKE - bool - depends on E200 || E500 - default y - -config PTE_64BIT - bool - depends on 44x || E500 - default y if 44x - default y if E500 && PHYS_64BIT - -config PHYS_64BIT - bool 'Large physical address support' if E500 - depends on 44x || E500 - select RESOURCES_64BIT - default y if 44x - ---help--- - This option enables kernel support for larger than 32-bit physical - addresses. This features is not be available on all e500 cores. - - If in doubt, say N here. - -config ALTIVEC - bool "AltiVec Support" - depends on CLASSIC32 || POWER4 - ---help--- - This option enables kernel support for the Altivec extensions to the - PowerPC processor. The kernel currently supports saving and restoring - altivec registers, and turning on the 'altivec enable' bit so user - processes can execute altivec instructions. - - This option is only usefully if you have a processor that supports - altivec (G4, otherwise known as 74xx series), but does not have - any affect on a non-altivec cpu (it does, however add code to the - kernel). - - If in doubt, say Y here. - -config SPE - bool "SPE Support" - depends on E200 || E500 - default y - ---help--- - This option enables kernel support for the Signal Processing - Extensions (SPE) to the PowerPC processor. The kernel currently - supports saving and restoring SPE registers, and turning on the - 'spe enable' bit so user processes can execute SPE instructions. - - This option is only useful if you have a processor that supports - SPE (e500, otherwise known as 85xx series), but does not have any - effect on a non-spe cpu (it does, however add code to the kernel). - - If in doubt, say Y here. - -config PPC_STD_MMU - bool - depends on 6xx || POWER3 || POWER4 || PPC64 - default y - -config PPC_STD_MMU_32 - def_bool y - depends on PPC_STD_MMU && PPC32 - -config PPC_MM_SLICES - bool - default y if HUGETLB_PAGE - default n - -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - depends on PPC64 - default y - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. This also enables accounting of - stolen time on logically-partitioned systems running on - IBM POWER5-based machines. - - If in doubt, say Y here. - -config SMP - depends on PPC_STD_MMU - bool "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, say N. If you have a system with more - than one CPU, say Y. Note that the kernel does not currently - support SMP machines with 603/603e/603ev or PPC750 ("G3") processors - since they have inadequate hardware support for multiprocessor - operation. - - If you say N here, the kernel will run on single and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on single-processor machines. - On a single-processor machine, the kernel will run faster if you say - N here. - - If you don't know what to do here, say N. - -config NR_CPUS - int "Maximum number of CPUs (2-128)" - range 2 128 - depends on SMP - default "32" if PPC64 - default "4" - -config NOT_COHERENT_CACHE - bool - depends on 4xx || 8xx || E200 - default y - -config CONFIG_CHECK_CACHE_COHERENCY - bool -endmenu - source "init/Kconfig" source "arch/powerpc/platforms/Kconfig" @@ -686,9 +436,9 @@ bool "PCI support" if 40x || CPM2 || PPC_83xx || PPC_85xx || PPC_86xx \ || PPC_MPC52xx || (EMBEDDED && (PPC_PSERIES || PPC_ISERIES)) \ || MPC7448HPC2 || PPC_PS3 || PPC_HOLLY - default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx \ + default y if !40x && !CPM2 && !8xx && !PPC_83xx \ && !PPC_85xx && !PPC_86xx - default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS + default PCI_PERMEDIA if !4xx && !CPM2 && !8xx default PCI_QSPAN if !4xx && !CPM2 && 8xx select ARCH_SUPPORTS_MSI help diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig.debug linux-2.6.22-591/arch/powerpc/Kconfig.debug --- linux-2.6.22-570/arch/powerpc/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 @@ -41,52 +41,9 @@ This option will add a small amount of overhead to all hypervisor calls. -config DEBUGGER - bool "Enable debugger hooks" - depends on DEBUG_KERNEL - help - Include in-kernel hooks for kernel debuggers. Unless you are - intending to debug the kernel, say N here. - -config KGDB - bool "Include kgdb kernel debugger" - depends on DEBUGGER && (BROKEN || PPC_GEN550 || 4xx) - select DEBUG_INFO - help - Include in-kernel hooks for kgdb, the Linux kernel source level - debugger. See for more information. - Unless you are intending to debug the kernel, say N here. - -choice - prompt "Serial Port" - depends on KGDB - default KGDB_TTYS1 - -config KGDB_TTYS0 - bool "ttyS0" - -config KGDB_TTYS1 - bool "ttyS1" - -config KGDB_TTYS2 - bool "ttyS2" - -config KGDB_TTYS3 - bool "ttyS3" - -endchoice - -config KGDB_CONSOLE - bool "Enable serial console thru kgdb port" - depends on KGDB && 8xx || CPM2 - help - If you enable this, all serial console messages will be sent - over the gdb stub. - If unsure, say N. - config XMON bool "Include xmon kernel debugger" - depends on DEBUGGER + depends on DEBUG_KERNEL help Include in-kernel hooks for the xmon kernel monitor/debugger. Unless you are intending to debug the kernel, say N here. @@ -116,6 +73,11 @@ to say Y here, unless you're building for a memory-constrained system. +config DEBUGGER + bool + depends on KGDB || XMON + default y + config IRQSTACKS bool "Use separate kernel stacks when processing interrupts" depends on PPC64 diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.c linux-2.6.22-591/arch/powerpc/boot/44x.c --- linux-2.6.22-570/arch/powerpc/boot/44x.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/44x.c 2007-12-21 15:36:11.000000000 -0500 @@ -38,3 +38,48 @@ dt_fixup_memory(0, memsize); } + +#define SPRN_DBCR0 0x134 +#define DBCR0_RST_SYSTEM 0x30000000 + +void ibm44x_dbcr_reset(void) +{ + unsigned long tmp; + + asm volatile ( + "mfspr %0,%1\n" + "oris %0,%0,%2@h\n" + "mtspr %1,%0" + : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM) + ); + +} + +/* Read 4xx EBC bus bridge registers to get mappings of the peripheral + * banks into the OPB address space */ +void ibm4xx_fixup_ebc_ranges(const char *ebc) +{ + void *devp; + u32 bxcr; + u32 ranges[EBC_NUM_BANKS*4]; + u32 *p = ranges; + int i; + + for (i = 0; i < EBC_NUM_BANKS; i++) { + mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i)); + bxcr = mfdcr(DCRN_EBC0_CFGDATA); + + if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) { + *p++ = i; + *p++ = 0; + *p++ = bxcr & EBC_BXCR_BAS; + *p++ = EBC_BXCR_BANK_SIZE(bxcr); + } + } + + devp = finddevice(ebc); + if (! devp) + fatal("Couldn't locate EBC node %s\n\r", ebc); + + setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32)); +} diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.h linux-2.6.22-591/arch/powerpc/boot/44x.h --- linux-2.6.22-570/arch/powerpc/boot/44x.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/44x.h 2007-12-21 15:36:11.000000000 -0500 @@ -11,6 +11,9 @@ #define _PPC_BOOT_44X_H_ void ibm44x_fixup_memsize(void); +void ibm4xx_fixup_ebc_ranges(const char *ebc); + +void ibm44x_dbcr_reset(void); void ebony_init(void *mac0, void *mac1); #endif /* _PPC_BOOT_44X_H_ */ diff -Nurb linux-2.6.22-570/arch/powerpc/boot/Makefile linux-2.6.22-591/arch/powerpc/boot/Makefile --- linux-2.6.22-570/arch/powerpc/boot/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -43,8 +43,8 @@ src-wlib := string.S crt0.S stdio.c main.c flatdevtree.c flatdevtree_misc.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ - gunzip_util.c elf_util.c $(zlib) devtree.c \ - 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c + gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \ + 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c src-plat := of.c cuboot-83xx.c cuboot-85xx.c holly.c \ cuboot-ebony.c treeboot-ebony.c prpmc2800.c src-boot := $(src-wlib) $(src-plat) empty.c diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c linux-2.6.22-591/arch/powerpc/boot/cuboot-83xx.c --- linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/cuboot-83xx.c 2007-12-21 15:36:11.000000000 -0500 @@ -12,12 +12,12 @@ #include "ops.h" #include "stdio.h" +#include "cuboot.h" #define TARGET_83xx #include "ppcboot.h" static bd_t bd; -extern char _end[]; extern char _dtb_start[], _dtb_end[]; static void platform_fixups(void) @@ -52,16 +52,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize; - unsigned long avail_ram = end_of_ram - (unsigned long)_end; - - memcpy(&bd, (bd_t *)r3, sizeof(bd)); - loader_info.initrd_addr = r4; - loader_info.initrd_size = r4 ? r5 - r4 : 0; - loader_info.cmdline = (char *)r6; - loader_info.cmdline_len = r7 - r6; - - simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64); + CUBOOT_INIT(); ft_init(_dtb_start, _dtb_end - _dtb_start, 32); serial_console_init(); platform_ops.fixups = platform_fixups; diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c linux-2.6.22-591/arch/powerpc/boot/cuboot-85xx.c --- linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/cuboot-85xx.c 2007-12-21 15:36:11.000000000 -0500 @@ -12,12 +12,12 @@ #include "ops.h" #include "stdio.h" +#include "cuboot.h" #define TARGET_85xx #include "ppcboot.h" static bd_t bd; -extern char _end[]; extern char _dtb_start[], _dtb_end[]; static void platform_fixups(void) @@ -53,16 +53,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize; - unsigned long avail_ram = end_of_ram - (unsigned long)_end; - - memcpy(&bd, (bd_t *)r3, sizeof(bd)); - loader_info.initrd_addr = r4; - loader_info.initrd_size = r4 ? r5 - r4 : 0; - loader_info.cmdline = (char *)r6; - loader_info.cmdline_len = r7 - r6; - - simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64); + CUBOOT_INIT(); ft_init(_dtb_start, _dtb_end - _dtb_start, 32); serial_console_init(); platform_ops.fixups = platform_fixups; diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c linux-2.6.22-591/arch/powerpc/boot/cuboot-ebony.c --- linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/cuboot-ebony.c 2007-12-21 15:36:11.000000000 -0500 @@ -15,28 +15,16 @@ #include "ops.h" #include "stdio.h" #include "44x.h" +#include "cuboot.h" #define TARGET_44x #include "ppcboot.h" static bd_t bd; -extern char _end[]; - -BSS_STACK(4096); void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { - unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize; - unsigned long avail_ram = end_of_ram - (unsigned long)_end; - - memcpy(&bd, (bd_t *)r3, sizeof(bd)); - loader_info.initrd_addr = r4; - loader_info.initrd_size = r4 ? r5 : 0; - loader_info.cmdline = (char *)r6; - loader_info.cmdline_len = r7 - r6; - - simple_alloc_init(_end, avail_ram, 32, 64); - + CUBOOT_INIT(); ebony_init(&bd.bi_enetaddr, &bd.bi_enet1addr); } diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.c linux-2.6.22-591/arch/powerpc/boot/cuboot.c --- linux-2.6.22-570/arch/powerpc/boot/cuboot.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/boot/cuboot.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,35 @@ +/* + * Compatibility for old (not device tree aware) U-Boot versions + * + * Author: Scott Wood + * Consolidated using macros by David Gibson + * + * Copyright 2007 David Gibson, IBM Corporation. + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include "ops.h" +#include "stdio.h" + +#include "ppcboot.h" + +extern char _end[]; +extern char _dtb_start[], _dtb_end[]; + +void cuboot_init(unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, + unsigned long end_of_ram) +{ + unsigned long avail_ram = end_of_ram - (unsigned long)_end; + + loader_info.initrd_addr = r4; + loader_info.initrd_size = r4 ? r5 - r4 : 0; + loader_info.cmdline = (char *)r6; + loader_info.cmdline_len = r7 - r6; + + simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64); +} diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.h linux-2.6.22-591/arch/powerpc/boot/cuboot.h --- linux-2.6.22-570/arch/powerpc/boot/cuboot.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/boot/cuboot.h 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,14 @@ +#ifndef _PPC_BOOT_CUBOOT_H_ +#define _PPC_BOOT_CUBOOT_H_ + +void cuboot_init(unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, + unsigned long end_of_ram); + +#define CUBOOT_INIT() \ + do { \ + memcpy(&bd, (bd_t *)r3, sizeof(bd)); \ + cuboot_init(r4, r5, r6, r7, bd.bi_memstart + bd.bi_memsize); \ + } while (0) + +#endif /* _PPC_BOOT_CUBOOT_H_ */ diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dcr.h linux-2.6.22-591/arch/powerpc/boot/dcr.h --- linux-2.6.22-570/arch/powerpc/boot/dcr.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/dcr.h 2007-12-21 15:36:11.000000000 -0500 @@ -26,6 +26,43 @@ #define SDRAM_CONFIG_BANK_SIZE(reg) \ (0x00400000 << ((reg & SDRAM_CONFIG_SIZE_MASK) >> 17)) +/* 440GP External Bus Controller (EBC) */ +#define DCRN_EBC0_CFGADDR 0x012 +#define DCRN_EBC0_CFGDATA 0x013 +#define EBC_NUM_BANKS 8 +#define EBC_B0CR 0x00 +#define EBC_B1CR 0x01 +#define EBC_B2CR 0x02 +#define EBC_B3CR 0x03 +#define EBC_B4CR 0x04 +#define EBC_B5CR 0x05 +#define EBC_B6CR 0x06 +#define EBC_B7CR 0x07 +#define EBC_BXCR(n) (n) +#define EBC_BXCR_BAS 0xfff00000 +#define EBC_BXCR_BS 0x000e0000 +#define EBC_BXCR_BANK_SIZE(reg) \ + (0x100000 << (((reg) & EBC_BXCR_BS) >> 17)) +#define EBC_BXCR_BU 0x00018000 +#define EBC_BXCR_BU_OFF 0x00000000 +#define EBC_BXCR_BU_RO 0x00008000 +#define EBC_BXCR_BU_WO 0x00010000 +#define EBC_BXCR_BU_RW 0x00018000 +#define EBC_BXCR_BW 0x00006000 +#define EBC_B0AP 0x10 +#define EBC_B1AP 0x11 +#define EBC_B2AP 0x12 +#define EBC_B3AP 0x13 +#define EBC_B4AP 0x14 +#define EBC_B5AP 0x15 +#define EBC_B6AP 0x16 +#define EBC_B7AP 0x17 +#define EBC_BXAP(n) (0x10+(n)) +#define EBC_BEAR 0x20 +#define EBC_BESR 0x21 +#define EBC_CFG 0x23 +#define EBC_CID 0x24 + /* 440GP Clock, PM, chip control */ #define DCRN_CPC0_SR 0x0b0 #define DCRN_CPC0_ER 0x0b1 diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts linux-2.6.22-591/arch/powerpc/boot/dts/ebony.dts --- linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/dts/ebony.dts 2007-12-21 15:36:11.000000000 -0500 @@ -135,11 +135,9 @@ #address-cells = <2>; #size-cells = <1>; clock-frequency = <0>; // Filled in by zImage - ranges = <0 00000000 fff00000 100000 - 1 00000000 48000000 100000 - 2 00000000 ff800000 400000 - 3 00000000 48200000 100000 - 7 00000000 48300000 100000>; + // ranges property is supplied by zImage + // based on firmware's configuration of the + // EBC bridge interrupts = <5 4>; interrupt-parent = <&UIC1>; diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts linux-2.6.22-591/arch/powerpc/boot/dts/holly.dts --- linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/dts/holly.dts 2007-12-21 15:36:11.000000000 -0500 @@ -46,7 +46,7 @@ tsi109@c0000000 { device_type = "tsi-bridge"; - compatible = "tsi-bridge"; + compatible = "tsi109-bridge", "tsi108-bridge"; #address-cells = <1>; #size-cells = <1>; ranges = <00000000 c0000000 00010000>; @@ -54,52 +54,55 @@ i2c@7000 { device_type = "i2c"; - compatible = "tsi-i2c"; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + compatible = "tsi109-i2c", "tsi108-i2c"; + interrupt-parent = <&MPIC>; interrupts = ; reg = <7000 400>; }; - mdio@6000 { + MDIO: mdio@6000 { device_type = "mdio"; - compatible = "tsi-ethernet"; - - PHY1: ethernet-phy@6000 { - device_type = "ethernet-phy"; - compatible = "bcm54xx"; + compatible = "tsi109-mdio", "tsi108-mdio"; reg = <6000 50>; - phy-id = <1>; + #address-cells = <1>; + #size-cells = <0>; + + PHY1: ethernet-phy@1 { + compatible = "bcm5461a"; + reg = <1>; + txc-rxc-delay-disable; }; - PHY2: ethernet-phy@6400 { - device_type = "ethernet-phy"; - compatible = "bcm54xx"; - reg = <6000 50>; - phy-id = <2>; + PHY2: ethernet-phy@2 { + compatible = "bcm5461a"; + reg = <2>; + txc-rxc-delay-disable; }; }; ethernet@6200 { device_type = "network"; - compatible = "tsi-ethernet"; + compatible = "tsi109-ethernet", "tsi108-ethernet"; #address-cells = <1>; #size-cells = <0>; reg = <6000 200>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + interrupt-parent = <&MPIC>; interrupts = <10 2>; + mdio-handle = <&MDIO>; phy-handle = <&PHY1>; }; ethernet@6600 { device_type = "network"; - compatible = "tsi-ethernet"; + compatible = "tsi109-ethernet", "tsi108-ethernet"; #address-cells = <1>; #size-cells = <0>; reg = <6400 200>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + interrupt-parent = <&MPIC>; interrupts = <11 2>; + mdio-handle = <&MDIO>; phy-handle = <&PHY2>; }; @@ -110,7 +113,7 @@ virtual-reg = ; clock-frequency = <3F9C6000>; current-speed = <1c200>; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + interrupt-parent = <&MPIC>; interrupts = ; }; @@ -121,7 +124,7 @@ virtual-reg = ; clock-frequency = <3F9C6000>; current-speed = <1c200>; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + interrupt-parent = <&MPIC>; interrupts = ; }; @@ -136,7 +139,7 @@ pci@1000 { device_type = "pci"; - compatible = "tsi109"; + compatible = "tsi109-pci", "tsi108-pci"; #interrupt-cells = <1>; #size-cells = <2>; #address-cells = <3>; @@ -150,7 +153,7 @@ ranges = <02000000 0 40000000 40000000 0 10000000 01000000 0 00000000 7e000000 0 00010000>; clock-frequency = <7f28154>; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + interrupt-parent = <&MPIC>; interrupts = <17 2>; interrupt-map-mask = ; /*----------------------------------------------------+ @@ -186,13 +189,12 @@ #address-cells = <0>; #interrupt-cells = <2>; interrupts = <17 2>; - interrupt-parent = < &/tsi109@c0000000/pic@7400 >; + interrupt-parent = <&MPIC>; }; }; }; chosen { linux,stdout-path = "/tsi109@c0000000/serial@7808"; - bootargs = "console=ttyS0,115200"; }; }; diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts linux-2.6.22-591/arch/powerpc/boot/dts/mpc7448hpc2.dts --- linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/dts/mpc7448hpc2.dts 2007-12-21 15:36:11.000000000 -0500 @@ -45,7 +45,7 @@ #address-cells = <1>; #size-cells = <1>; #interrupt-cells = <2>; - device_type = "tsi-bridge"; + device_type = "tsi108-bridge"; ranges = <00000000 c0000000 00010000>; reg = ; bus-frequency = <0>; @@ -55,27 +55,26 @@ interrupts = ; reg = <7000 400>; device_type = "i2c"; - compatible = "tsi-i2c"; + compatible = "tsi108-i2c"; }; - mdio@6000 { + MDIO: mdio@6000 { device_type = "mdio"; - compatible = "tsi-ethernet"; + compatible = "tsi108-mdio"; + reg = <6000 50>; + #address-cells = <1>; + #size-cells = <0>; - phy8: ethernet-phy@6000 { + phy8: ethernet-phy@8 { interrupt-parent = <&mpic>; interrupts = <2 1>; - reg = <6000 50>; - phy-id = <8>; - device_type = "ethernet-phy"; + reg = <8>; }; - phy9: ethernet-phy@6400 { + phy9: ethernet-phy@9 { interrupt-parent = <&mpic>; interrupts = <2 1>; - reg = <6000 50>; - phy-id = <9>; - device_type = "ethernet-phy"; + reg = <9>; }; }; @@ -83,12 +82,12 @@ ethernet@6200 { #size-cells = <0>; device_type = "network"; - model = "TSI-ETH"; - compatible = "tsi-ethernet"; + compatible = "tsi108-ethernet"; reg = <6000 200>; address = [ 00 06 D2 00 00 01 ]; interrupts = <10 2>; interrupt-parent = <&mpic>; + mdio-handle = <&MDIO>; phy-handle = <&phy8>; }; @@ -96,12 +95,12 @@ #address-cells = <1>; #size-cells = <0>; device_type = "network"; - model = "TSI-ETH"; - compatible = "tsi-ethernet"; + compatible = "tsi108-ethernet"; reg = <6400 200>; address = [ 00 06 D2 00 00 02 ]; interrupts = <11 2>; interrupt-parent = <&mpic>; + mdio-handle = <&MDIO>; phy-handle = <&phy9>; }; @@ -135,7 +134,7 @@ big-endian; }; pci@1000 { - compatible = "tsi10x"; + compatible = "tsi108-pci"; device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ebony.c linux-2.6.22-591/arch/powerpc/boot/ebony.c --- linux-2.6.22-570/arch/powerpc/boot/ebony.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/ebony.c 2007-12-21 15:36:11.000000000 -0500 @@ -100,28 +100,13 @@ ibm440gp_fixup_clocks(sysclk, 6 * 1843200); ibm44x_fixup_memsize(); dt_fixup_mac_addresses(ebony_mac0, ebony_mac1); -} - -#define SPRN_DBCR0 0x134 -#define DBCR0_RST_SYSTEM 0x30000000 - -static void ebony_exit(void) -{ - unsigned long tmp; - - asm volatile ( - "mfspr %0,%1\n" - "oris %0,%0,%2@h\n" - "mtspr %1,%0" - : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM) - ); - + ibm4xx_fixup_ebc_ranges("/plb/opb/ebc"); } void ebony_init(void *mac0, void *mac1) { platform_ops.fixups = ebony_fixups; - platform_ops.exit = ebony_exit; + platform_ops.exit = ibm44x_dbcr_reset; ebony_mac0 = mac0; ebony_mac1 = mac1; ft_init(_dtb_start, _dtb_end - _dtb_start, 32); diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.c linux-2.6.22-591/arch/powerpc/boot/of.c --- linux-2.6.22-570/arch/powerpc/boot/of.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/boot/of.c 2007-12-21 15:36:11.000000000 -0500 @@ -15,8 +15,7 @@ #include "page.h" #include "ops.h" -typedef void *ihandle; -typedef void *phandle; +#include "of.h" extern char _end[]; @@ -25,154 +24,10 @@ #define RAM_END (512<<20) /* Fixme: use OF */ #define ONE_MB 0x100000 -int (*prom) (void *); static unsigned long claim_base; -static int call_prom(const char *service, int nargs, int nret, ...) -{ - int i; - struct prom_args { - const char *service; - int nargs; - int nret; - unsigned int args[12]; - } args; - va_list list; - - args.service = service; - args.nargs = nargs; - args.nret = nret; - - va_start(list, nret); - for (i = 0; i < nargs; i++) - args.args[i] = va_arg(list, unsigned int); - va_end(list); - - for (i = 0; i < nret; i++) - args.args[nargs+i] = 0; - - if (prom(&args) < 0) - return -1; - - return (nret > 0)? args.args[nargs]: 0; -} - -static int call_prom_ret(const char *service, int nargs, int nret, - unsigned int *rets, ...) -{ - int i; - struct prom_args { - const char *service; - int nargs; - int nret; - unsigned int args[12]; - } args; - va_list list; - - args.service = service; - args.nargs = nargs; - args.nret = nret; - - va_start(list, rets); - for (i = 0; i < nargs; i++) - args.args[i] = va_arg(list, unsigned int); - va_end(list); - - for (i = 0; i < nret; i++) - args.args[nargs+i] = 0; - - if (prom(&args) < 0) - return -1; - - if (rets != (void *) 0) - for (i = 1; i < nret; ++i) - rets[i-1] = args.args[nargs+i]; - - return (nret > 0)? args.args[nargs]: 0; -} - -/* - * Older OF's require that when claiming a specific range of addresses, - * we claim the physical space in the /memory node and the virtual - * space in the chosen mmu node, and then do a map operation to - * map virtual to physical. - */ -static int need_map = -1; -static ihandle chosen_mmu; -static phandle memory; - -/* returns true if s2 is a prefix of s1 */ -static int string_match(const char *s1, const char *s2) -{ - for (; *s2; ++s2) - if (*s1++ != *s2) - return 0; - return 1; -} - -static int check_of_version(void) -{ - phandle oprom, chosen; - char version[64]; - - oprom = finddevice("/openprom"); - if (oprom == (phandle) -1) - return 0; - if (getprop(oprom, "model", version, sizeof(version)) <= 0) - return 0; - version[sizeof(version)-1] = 0; - printf("OF version = '%s'\r\n", version); - if (!string_match(version, "Open Firmware, 1.") - && !string_match(version, "FirmWorks,3.")) - return 0; - chosen = finddevice("/chosen"); - if (chosen == (phandle) -1) { - chosen = finddevice("/chosen@0"); - if (chosen == (phandle) -1) { - printf("no chosen\n"); - return 0; - } - } - if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) { - printf("no mmu\n"); - return 0; - } - memory = (ihandle) call_prom("open", 1, 1, "/memory"); - if (memory == (ihandle) -1) { - memory = (ihandle) call_prom("open", 1, 1, "/memory@0"); - if (memory == (ihandle) -1) { - printf("no memory node\n"); - return 0; - } - } - printf("old OF detected\r\n"); - return 1; -} - -static void *claim(unsigned long virt, unsigned long size, unsigned long align) -{ - int ret; - unsigned int result; - - if (need_map < 0) - need_map = check_of_version(); - if (align || !need_map) - return (void *) call_prom("claim", 3, 1, virt, size, align); - - ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory, - align, size, virt); - if (ret != 0 || result == -1) - return (void *) -1; - ret = call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu, - align, size, virt); - /* 0x12 == coherent + read/write */ - ret = call_prom("call-method", 6, 1, "map", chosen_mmu, - 0x12, size, virt, virt); - return (void *) virt; -} - static void *of_try_claim(unsigned long size) { unsigned long addr = 0; @@ -184,7 +39,7 @@ #ifdef DEBUG printf(" trying: 0x%08lx\n\r", claim_base); #endif - addr = (unsigned long)claim(claim_base, size, 0); + addr = (unsigned long)of_claim(claim_base, size, 0); if ((void *)addr != (void *)-1) break; } @@ -218,52 +73,24 @@ return p; } -static void of_exit(void) -{ - call_prom("exit", 0, 0); -} - /* * OF device tree routines */ static void *of_finddevice(const char *name) { - return (phandle) call_prom("finddevice", 1, 1, name); + return (phandle) of_call_prom("finddevice", 1, 1, name); } static int of_getprop(const void *phandle, const char *name, void *buf, const int buflen) { - return call_prom("getprop", 4, 1, phandle, name, buf, buflen); + return of_call_prom("getprop", 4, 1, phandle, name, buf, buflen); } static int of_setprop(const void *phandle, const char *name, const void *buf, const int buflen) { - return call_prom("setprop", 4, 1, phandle, name, buf, buflen); -} - -/* - * OF console routines - */ -static void *of_stdout_handle; - -static int of_console_open(void) -{ - void *devp; - - if (((devp = finddevice("/chosen")) != NULL) - && (getprop(devp, "stdout", &of_stdout_handle, - sizeof(of_stdout_handle)) - == sizeof(of_stdout_handle))) - return 0; - - return -1; -} - -static void of_console_write(char *buf, int len) -{ - call_prom("write", 3, 1, of_stdout_handle, buf, len); + return of_call_prom("setprop", 4, 1, phandle, name, buf, buflen); } void platform_init(unsigned long a1, unsigned long a2, void *promptr) @@ -277,10 +104,9 @@ dt_ops.getprop = of_getprop; dt_ops.setprop = of_setprop; - console_ops.open = of_console_open; - console_ops.write = of_console_write; + of_console_init(); - prom = (int (*)(void *))promptr; + of_init(promptr); loader_info.promptr = promptr; if (a1 && a2 && a2 != 0xdeadbeef) { loader_info.initrd_addr = a1; diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.h linux-2.6.22-591/arch/powerpc/boot/of.h --- linux-2.6.22-570/arch/powerpc/boot/of.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/boot/of.h 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,15 @@ +#ifndef _PPC_BOOT_OF_H_ +#define _PPC_BOOT_OF_H_ + +typedef void *phandle; +typedef void *ihandle; + +void of_init(void *promptr); +int of_call_prom(const char *service, int nargs, int nret, ...); +void *of_claim(unsigned long virt, unsigned long size, unsigned long align); +void of_exit(void); + +/* Console functions */ +void of_console_init(void); + +#endif /* _PPC_BOOT_OF_H_ */ diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ofconsole.c linux-2.6.22-591/arch/powerpc/boot/ofconsole.c --- linux-2.6.22-570/arch/powerpc/boot/ofconsole.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/boot/ofconsole.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,45 @@ +/* + * OF console routines + * + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include "types.h" +#include "elf.h" +#include "string.h" +#include "stdio.h" +#include "page.h" +#include "ops.h" + +#include "of.h" + +static void *of_stdout_handle; + +static int of_console_open(void) +{ + void *devp; + + if (((devp = finddevice("/chosen")) != NULL) + && (getprop(devp, "stdout", &of_stdout_handle, + sizeof(of_stdout_handle)) + == sizeof(of_stdout_handle))) + return 0; + + return -1; +} + +static void of_console_write(char *buf, int len) +{ + of_call_prom("write", 3, 1, of_stdout_handle, buf, len); +} + +void of_console_init(void) +{ + console_ops.open = of_console_open; + console_ops.write = of_console_write; +} diff -Nurb linux-2.6.22-570/arch/powerpc/boot/oflib.c linux-2.6.22-591/arch/powerpc/boot/oflib.c --- linux-2.6.22-570/arch/powerpc/boot/oflib.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/boot/oflib.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,172 @@ +/* + * Copyright (C) Paul Mackerras 1997. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include "types.h" +#include "elf.h" +#include "string.h" +#include "stdio.h" +#include "page.h" +#include "ops.h" + +#include "of.h" + +static int (*prom) (void *); + +void of_init(void *promptr) +{ + prom = (int (*)(void *))promptr; +} + +int of_call_prom(const char *service, int nargs, int nret, ...) +{ + int i; + struct prom_args { + const char *service; + int nargs; + int nret; + unsigned int args[12]; + } args; + va_list list; + + args.service = service; + args.nargs = nargs; + args.nret = nret; + + va_start(list, nret); + for (i = 0; i < nargs; i++) + args.args[i] = va_arg(list, unsigned int); + va_end(list); + + for (i = 0; i < nret; i++) + args.args[nargs+i] = 0; + + if (prom(&args) < 0) + return -1; + + return (nret > 0)? args.args[nargs]: 0; +} + +static int of_call_prom_ret(const char *service, int nargs, int nret, + unsigned int *rets, ...) +{ + int i; + struct prom_args { + const char *service; + int nargs; + int nret; + unsigned int args[12]; + } args; + va_list list; + + args.service = service; + args.nargs = nargs; + args.nret = nret; + + va_start(list, rets); + for (i = 0; i < nargs; i++) + args.args[i] = va_arg(list, unsigned int); + va_end(list); + + for (i = 0; i < nret; i++) + args.args[nargs+i] = 0; + + if (prom(&args) < 0) + return -1; + + if (rets != (void *) 0) + for (i = 1; i < nret; ++i) + rets[i-1] = args.args[nargs+i]; + + return (nret > 0)? args.args[nargs]: 0; +} + +/* returns true if s2 is a prefix of s1 */ +static int string_match(const char *s1, const char *s2) +{ + for (; *s2; ++s2) + if (*s1++ != *s2) + return 0; + return 1; +} + +/* + * Older OF's require that when claiming a specific range of addresses, + * we claim the physical space in the /memory node and the virtual + * space in the chosen mmu node, and then do a map operation to + * map virtual to physical. + */ +static int need_map = -1; +static ihandle chosen_mmu; +static phandle memory; + +static int check_of_version(void) +{ + phandle oprom, chosen; + char version[64]; + + oprom = finddevice("/openprom"); + if (oprom == (phandle) -1) + return 0; + if (getprop(oprom, "model", version, sizeof(version)) <= 0) + return 0; + version[sizeof(version)-1] = 0; + printf("OF version = '%s'\r\n", version); + if (!string_match(version, "Open Firmware, 1.") + && !string_match(version, "FirmWorks,3.")) + return 0; + chosen = finddevice("/chosen"); + if (chosen == (phandle) -1) { + chosen = finddevice("/chosen@0"); + if (chosen == (phandle) -1) { + printf("no chosen\n"); + return 0; + } + } + if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) { + printf("no mmu\n"); + return 0; + } + memory = (ihandle) of_call_prom("open", 1, 1, "/memory"); + if (memory == (ihandle) -1) { + memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0"); + if (memory == (ihandle) -1) { + printf("no memory node\n"); + return 0; + } + } + printf("old OF detected\r\n"); + return 1; +} + +void *of_claim(unsigned long virt, unsigned long size, unsigned long align) +{ + int ret; + unsigned int result; + + if (need_map < 0) + need_map = check_of_version(); + if (align || !need_map) + return (void *) of_call_prom("claim", 3, 1, virt, size, align); + + ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory, + align, size, virt); + if (ret != 0 || result == -1) + return (void *) -1; + ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu, + align, size, virt); + /* 0x12 == coherent + read/write */ + ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu, + 0x12, size, virt, virt); + return (void *) virt; +} + +void of_exit(void) +{ + of_call_prom("exit", 0, 0); +} diff -Nurb linux-2.6.22-570/arch/powerpc/configs/holly_defconfig linux-2.6.22-591/arch/powerpc/configs/holly_defconfig --- linux-2.6.22-570/arch/powerpc/configs/holly_defconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/configs/holly_defconfig 2007-12-21 15:36:11.000000000 -0500 @@ -190,7 +190,8 @@ # CONFIG_RESOURCES_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 CONFIG_PROC_DEVICETREE=y -# CONFIG_CMDLINE_BOOL is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_PM is not set # CONFIG_SECCOMP is not set # CONFIG_WANT_DEVICE_TREE is not set diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/Makefile linux-2.6.22-591/arch/powerpc/kernel/Makefile --- linux-2.6.22-570/arch/powerpc/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -12,7 +12,8 @@ obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ - init_task.o process.o systbl.o idle.o + init_task.o process.o systbl.o idle.o \ + signal.o obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o \ @@ -62,10 +63,16 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp32.o +else +obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp64.o +endif + module-$(CONFIG_PPC64) += module_64.o obj-$(CONFIG_MODULES) += $(module-y) -pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o +pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o isa-bridge.o pci32-$(CONFIG_PPC32) := pci_32.o obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y) obj-$(CONFIG_PCI_MSI) += msi.o diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/head_32.S linux-2.6.22-591/arch/powerpc/kernel/head_32.S --- linux-2.6.22-570/arch/powerpc/kernel/head_32.S 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/head_32.S 2007-12-21 15:36:11.000000000 -0500 @@ -9,7 +9,6 @@ * rewritten by Paul Mackerras. * Copyright (C) 1996 Paul Mackerras. * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * This file contains the low-level support and setup for the * PowerPC platform, including trap and interrupt dispatch. @@ -32,10 +31,6 @@ #include #include -#ifdef CONFIG_APUS -#include -#endif - /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ @@ -92,11 +87,6 @@ * r4: virtual address of boot_infos_t * r5: 0 * - * APUS - * r3: 'APUS' - * r4: physical address of memory base - * Linux/m68k style BootInfo structure at &_end. - * * PREP * This is jumped to on prep systems right after the kernel is relocated * to its proper place in memory by the boot loader. The expected layout @@ -150,14 +140,6 @@ */ bl early_init -#ifdef CONFIG_APUS -/* On APUS the __va/__pa constants need to be set to the correct - * values before continuing. - */ - mr r4,r30 - bl fix_mem_constants -#endif /* CONFIG_APUS */ - /* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains * the physical address we are running at, returned by early_init() */ @@ -167,7 +149,7 @@ bl flush_tlbs bl initial_bats -#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +#if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -183,7 +165,6 @@ #endif /* CONFIG_6xx */ -#ifndef CONFIG_APUS /* * We need to run with _start at physical address 0. * On CHRP, we are loaded at 0x10000 since OF on CHRP uses @@ -196,7 +177,6 @@ addis r4,r3,KERNELBASE@h /* current address of _start */ cmpwi 0,r4,0 /* are we already running at 0? */ bne relocate_kernel -#endif /* CONFIG_APUS */ /* * we now have the 1st 16M of ram mapped with the bats. * prep needs the mmu to be turned on here, but pmac already has it on. @@ -881,85 +861,6 @@ addi r6,r6,4 blr -#ifdef CONFIG_APUS -/* - * On APUS the physical base address of the kernel is not known at compile - * time, which means the __pa/__va constants used are incorrect. In the - * __init section is recorded the virtual addresses of instructions using - * these constants, so all that has to be done is fix these before - * continuing the kernel boot. - * - * r4 = The physical address of the kernel base. - */ -fix_mem_constants: - mr r10,r4 - addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */ - neg r11,r10 /* phys_to_virt constant */ - - lis r12,__vtop_table_begin@h - ori r12,r12,__vtop_table_begin@l - add r12,r12,r10 /* table begin phys address */ - lis r13,__vtop_table_end@h - ori r13,r13,__vtop_table_end@l - add r13,r13,r10 /* table end phys address */ - subi r12,r12,4 - subi r13,r13,4 -1: lwzu r14,4(r12) /* virt address of instruction */ - add r14,r14,r10 /* phys address of instruction */ - lwz r15,0(r14) /* instruction, now insert top */ - rlwimi r15,r10,16,16,31 /* half of vp const in low half */ - stw r15,0(r14) /* of instruction and restore. */ - dcbst r0,r14 /* write it to memory */ - sync - icbi r0,r14 /* flush the icache line */ - cmpw r12,r13 - bne 1b - sync /* additional sync needed on g4 */ - isync - -/* - * Map the memory where the exception handlers will - * be copied to when hash constants have been patched. - */ -#ifdef CONFIG_APUS_FAST_EXCEPT - lis r8,0xfff0 -#else - lis r8,0 -#endif - ori r8,r8,0x2 /* 128KB, supervisor */ - mtspr SPRN_DBAT3U,r8 - mtspr SPRN_DBAT3L,r8 - - lis r12,__ptov_table_begin@h - ori r12,r12,__ptov_table_begin@l - add r12,r12,r10 /* table begin phys address */ - lis r13,__ptov_table_end@h - ori r13,r13,__ptov_table_end@l - add r13,r13,r10 /* table end phys address */ - subi r12,r12,4 - subi r13,r13,4 -1: lwzu r14,4(r12) /* virt address of instruction */ - add r14,r14,r10 /* phys address of instruction */ - lwz r15,0(r14) /* instruction, now insert top */ - rlwimi r15,r11,16,16,31 /* half of pv const in low half*/ - stw r15,0(r14) /* of instruction and restore. */ - dcbst r0,r14 /* write it to memory */ - sync - icbi r0,r14 /* flush the icache line */ - cmpw r12,r13 - bne 1b - - sync /* additional sync needed on g4 */ - isync /* No speculative loading until now */ - blr - -/*********************************************************************** - * Please note that on APUS the exception handlers are located at the - * physical address 0xfff0000. For this reason, the exception handlers - * cannot use relative branches to access the code below. - ***********************************************************************/ -#endif /* CONFIG_APUS */ - #ifdef CONFIG_SMP #ifdef CONFIG_GEMINI .globl __secondary_start_gemini @@ -1135,19 +1036,6 @@ bl __save_cpu_setup bl MMU_init -#ifdef CONFIG_APUS - /* Copy exception code to exception vector base on APUS. */ - lis r4,KERNELBASE@h -#ifdef CONFIG_APUS_FAST_EXCEPT - lis r3,0xfff0 /* Copy to 0xfff00000 */ -#else - lis r3,0 /* Copy to 0x00000000 */ -#endif - li r5,0x4000 /* # bytes of memory to copy */ - li r6,0 - bl copy_and_flush /* copy the first 0x4000 bytes */ -#endif /* CONFIG_APUS */ - /* * Go back to running unmapped so we can load up new values * for SDR1 (hash table pointer) and the segment registers @@ -1324,11 +1212,7 @@ #else ori r8,r8,2 /* R/W access */ #endif /* CONFIG_SMP */ -#ifdef CONFIG_APUS - ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */ -#else ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ -#endif /* CONFIG_APUS */ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ @@ -1338,7 +1222,7 @@ blr -#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +#ifdef CONFIG_BOOTX_TEXT setup_disp_bat: /* * setup the display bat prepared for us in prom.c @@ -1362,7 +1246,7 @@ 1: mtspr SPRN_IBAT3L,r8 mtspr SPRN_IBAT3U,r11 blr -#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */ +#endif /* CONFIG_BOOTX_TEXT */ #ifdef CONFIG_8260 /* Jump into the system reset for the rom. diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/irq.c linux-2.6.22-591/arch/powerpc/kernel/irq.c --- linux-2.6.22-570/arch/powerpc/kernel/irq.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/irq.c 2007-12-21 15:36:11.000000000 -0500 @@ -7,7 +7,6 @@ * Copyright (C) 1996-2001 Cort Dougan * Adapted for Power Macintosh by Paul Mackerras * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c linux-2.6.22-591/arch/powerpc/kernel/isa-bridge.c --- linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/isa-bridge.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,271 @@ +/* + * Routines for tracking a legacy ISA bridge + * + * Copyrigh 2007 Benjamin Herrenschmidt , IBM Corp. + * + * Some bits and pieces moved over from pci_64.c + * + * Copyrigh 2003 Anton Blanchard , IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +unsigned long isa_io_base; /* NULL if no ISA bus */ +EXPORT_SYMBOL(isa_io_base); + +/* Cached ISA bridge dev. */ +static struct device_node *isa_bridge_devnode; +struct pci_dev *isa_bridge_pcidev; +EXPORT_SYMBOL_GPL(isa_bridge_pcidev); + +#define ISA_SPACE_MASK 0x1 +#define ISA_SPACE_IO 0x1 + +static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys) +{ + /* We should get some saner parsing here and remove these structs */ + struct pci_address { + u32 a_hi; + u32 a_mid; + u32 a_lo; + }; + + struct isa_address { + u32 a_hi; + u32 a_lo; + }; + + struct isa_range { + struct isa_address isa_addr; + struct pci_address pci_addr; + unsigned int size; + }; + + const struct isa_range *range; + unsigned long pci_addr; + unsigned int isa_addr; + unsigned int size; + int rlen = 0; + + range = of_get_property(isa_node, "ranges", &rlen); + if (range == NULL || (rlen < sizeof(struct isa_range))) + goto inval_range; + + /* From "ISA Binding to 1275" + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 1: an ISA address + * cells 2 - 4: a PCI address + * (size depending on dev->n_addr_cells) + * cell 5: the size of the range + */ + if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) { + range++; + rlen -= sizeof(struct isa_range); + if (rlen < sizeof(struct isa_range)) + goto inval_range; + } + if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) + goto inval_range; + + isa_addr = range->isa_addr.a_lo; + pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | + range->pci_addr.a_lo; + + /* Assume these are both zero. Note: We could fix that and + * do a proper parsing instead ... oh well, that will do for + * now as nobody uses fancy mappings for ISA bridges + */ + if ((pci_addr != 0) || (isa_addr != 0)) { + printk(KERN_ERR "unexpected isa to pci mapping: %s\n", + __FUNCTION__); + return; + } + + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(range->size); + if (size > 0x10000) + size = 0x10000; + + printk(KERN_ERR "no ISA IO ranges or unexpected isa range," + "mapping 64k\n"); + + __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, + size, _PAGE_NO_CACHE|_PAGE_GUARDED); + return; + +inval_range: + printk(KERN_ERR "no ISA IO ranges or unexpected isa range," + "mapping 64k\n"); + __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, + 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED); +} + + +/** + * isa_bridge_find_early - Find and map the ISA IO space early before + * main PCI discovery. This is optionally called by + * the arch code when adding PCI PHBs to get early + * access to ISA IO ports + */ +void __init isa_bridge_find_early(struct pci_controller *hose) +{ + struct device_node *np, *parent = NULL, *tmp; + + /* If we already have an ISA bridge, bail off */ + if (isa_bridge_devnode != NULL) + return; + + /* For each "isa" node in the system. Note : we do a search by + * type and not by name. It might be better to do by name but that's + * what the code used to do and I don't want to break too much at + * once. We can look into changing that separately + */ + for_each_node_by_type(np, "isa") { + /* Look for our hose being a parent */ + for (parent = of_get_parent(np); parent;) { + if (parent == hose->arch_data) { + of_node_put(parent); + break; + } + tmp = parent; + parent = of_get_parent(parent); + of_node_put(tmp); + } + if (parent != NULL) + break; + } + if (np == NULL) + return; + isa_bridge_devnode = np; + + /* Now parse the "ranges" property and setup the ISA mapping */ + pci_process_ISA_OF_ranges(np, hose->io_base_phys); + + /* Set the global ISA io base to indicate we have an ISA bridge */ + isa_io_base = ISA_IO_BASE; + + pr_debug("ISA bridge (early) is %s\n", np->full_name); +} + +/** + * isa_bridge_find_late - Find and map the ISA IO space upon discovery of + * a new ISA bridge + */ +static void __devinit isa_bridge_find_late(struct pci_dev *pdev, + struct device_node *devnode) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + + /* Store ISA device node and PCI device */ + isa_bridge_devnode = of_node_get(devnode); + isa_bridge_pcidev = pdev; + + /* Now parse the "ranges" property and setup the ISA mapping */ + pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); + + /* Set the global ISA io base to indicate we have an ISA bridge */ + isa_io_base = ISA_IO_BASE; + + pr_debug("ISA bridge (late) is %s on %s\n", + devnode->full_name, pci_name(pdev)); +} + +/** + * isa_bridge_remove - Remove/unmap an ISA bridge + */ +static void isa_bridge_remove(void) +{ + pr_debug("ISA bridge removed !\n"); + + /* Clear the global ISA io base to indicate that we have no more + * ISA bridge. Note that drivers don't quite handle that, though + * we should probably do something about it. But do we ever really + * have ISA bridges being removed on machines using legacy devices ? + */ + isa_io_base = ISA_IO_BASE; + + /* Clear references to the bridge */ + of_node_put(isa_bridge_devnode); + isa_bridge_devnode = NULL; + isa_bridge_pcidev = NULL; + + /* Unmap the ISA area */ + __iounmap_at((void *)ISA_IO_BASE, 0x10000); +} + +/** + * isa_bridge_notify - Get notified of PCI devices addition/removal + */ +static int __devinit isa_bridge_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct device_node *devnode = pci_device_to_OF_node(pdev); + + switch(action) { + case BUS_NOTIFY_ADD_DEVICE: + /* Check if we have an early ISA device, without PCI dev */ + if (isa_bridge_devnode && isa_bridge_devnode == devnode && + !isa_bridge_pcidev) { + pr_debug("ISA bridge PCI attached: %s\n", + pci_name(pdev)); + isa_bridge_pcidev = pdev; + } + + /* Check if we have no ISA device, and this happens to be one, + * register it as such if it has an OF device + */ + if (!isa_bridge_devnode && devnode && devnode->type && + !strcmp(devnode->type, "isa")) + isa_bridge_find_late(pdev, devnode); + + return 0; + case BUS_NOTIFY_DEL_DEVICE: + /* Check if this our existing ISA device */ + if (pdev == isa_bridge_pcidev || + (devnode && devnode == isa_bridge_devnode)) + isa_bridge_remove(); + return 0; + } + return 0; +} + +static struct notifier_block isa_bridge_notifier = { + .notifier_call = isa_bridge_notify +}; + +/** + * isa_bridge_init - register to be notified of ISA bridge addition/removal + * + */ +static int __init isa_bridge_init(void) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + return 0; + bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); + return 0; +} +arch_initcall(isa_bridge_init); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb.c linux-2.6.22-591/arch/powerpc/kernel/kgdb.c --- linux-2.6.22-570/arch/powerpc/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,499 @@ +/* + * arch/powerpc/kernel/kgdb.c + * + * PowerPC backend to the KGDB stub. + * + * Maintainer: Tom Rini + * + * Copied from arch/ppc/kernel/kgdb.c, updated for ppc64 + * + * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) + * Copyright (C) 2003 Timesys Corporation. + * Copyright (C) 2004-2006 MontaVista Software, Inc. + * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) + * PPC32 support restored by Vitaly Wool and + * Sergei Shtylyov + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This table contains the mapping between PowerPC hardware trap types, and + * signals, which are primarily what GDB understands. GDB and the kernel + * don't always agree on values, so we use constants taken from gdb-6.2. + */ +static struct hard_trap_info +{ + unsigned int tt; /* Trap type code for powerpc */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ + { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ + { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ + { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ + { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ + { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ + { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ + { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ +#if defined(CONFIG_FSL_BOOKE) + { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ + { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ + { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ +#else + { 0x1000, 0x0e /* SIGALRM */ }, /* programmable interval timer */ + { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ + { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ +#endif +#else + { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ +#if defined(CONFIG_8xx) + { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ +#else + { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ + { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ + { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ +#if defined(CONFIG_PPC64) + { 0x1200, 0x05 /* SIGILL */ }, /* system error */ + { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */ + { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */ + { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1800, 0x04 /* SIGILL */ }, /* thermal */ +#else + { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ + { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ + { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ +#endif +#endif +#endif + { 0x0000, 0x00 } /* Must be last */ +}; + +extern atomic_t cpu_doing_single_step; + +static int computeSignal(unsigned int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +static int kgdb_call_nmi_hook(struct pt_regs *regs) +{ + kgdb_nmihook(smp_processor_id(), regs); + return 0; +} + +#ifdef CONFIG_SMP +void kgdb_roundup_cpus(unsigned long flags) +{ + smp_send_debugger_break(MSG_ALL_BUT_SELF); +} +#endif + +/* KGDB functions to use existing PowerPC64 hooks. */ +static int kgdb_debugger(struct pt_regs *regs) +{ + return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); +} + +static int kgdb_breakpoint(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + regs->nip += 4; + + return 1; +} + +static int kgdb_singlestep(struct pt_regs *regs) +{ + struct thread_info *thread_info, *exception_thread_info; + + if (user_mode(regs)) + return 0; + + /* + * On Book E and perhaps other processsors, singlestep is handled on + * the critical exception stack. This causes current_thread_info() + * to fail, since it it locates the thread_info by masking off + * the low bits of the current stack pointer. We work around + * this issue by copying the thread_info from the kernel stack + * before calling kgdb_handle_exception, and copying it back + * afterwards. On most processors the copy is avoided since + * exception_thread_info == thread_info. + */ + thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); + exception_thread_info = current_thread_info(); + + if (thread_info != exception_thread_info) + memcpy(exception_thread_info, thread_info, sizeof *thread_info); + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (thread_info != exception_thread_info) + memcpy(thread_info, exception_thread_info, sizeof *thread_info); + + return 1; +} + +int kgdb_iabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); + return 1; +} + +int kgdb_dabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); + return 1; +} + +#define PACK64(ptr,src) do { *(ptr++) = (src); } while(0) + +#define PACK32(ptr,src) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + *(ptr32++) = (src); \ + ptr = (unsigned long *)ptr32; \ + } while(0) + + +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + for (reg = 0; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, current->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + +#if 0 + Following are in struct thread_struct, not struct pt_regs, + ignoring for now since kernel does not use them. Would it + make sense to get them from the thread that kgdb is set to? + + If this code is enabled, update the definition of NUMREGBYTES to + include the vector registers and vector state registers. + + PACK32(ptr, current->thread->fpscr); + + /* vr registers not used by kernel, leave zero */ + ptr += 32 * 16 / sizeof(long); + +#ifdef CONFIG_ALTIVEC + PACK32(ptr, current->thread->vscr); + PACK32(ptr, current->thread->vrsave); +#else + ptr += 2 * 4 / sizeof(long); +#endif +#else +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + /* u64 acc */ + PACK32(ptr, current->thread.acc >> 32); + PACK32(ptr, current->thread.acc & 0xffffffff); + PACK64(ptr, current->thread.spefscr); +#else + ptr += 2 + 1; +#endif +#else + /* fpscr not used by kernel, leave zero */ + PACK32(ptr, 0); +#endif +#endif + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + + STACK_FRAME_OVERHEAD); + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + /* Regs GPR0-2 */ + for (reg = 0; reg < 3; reg++) + PACK64(ptr, regs->gpr[reg]); + + /* Regs GPR3-13 are caller saved, not in regs->gpr[] */ + ptr += 11; + + /* Regs GPR14-31 */ + for (reg = 14; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, p->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + +#if 0 + Following are in struct thread_struct, not struct pt_regs, + ignoring for now since kernel does not use them. Would it + make sense to get them from the thread that kgdb is set to? + + If this code is enabled, update the definition of NUMREGBYTES to + include the vector registers and vector state registers. + + PACK32(ptr, p->thread->fpscr); + + /* vr registers not used by kernel, leave zero */ + ptr += 32 * 16 / sizeof(long); + +#ifdef CONFIG_ALTIVEC + PACK32(ptr, p->thread->vscr); + PACK32(ptr, p->thread->vrsave); +#else + ptr += 2 * 4 / sizeof(long); +#endif +#else +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + /* u64 acc */ + PACK32(ptr, p->thread.acc >> 32); + PACK32(ptr, p->thread.acc & 0xffffffff); + PACK64(ptr, p->thread.spefscr); +#else + ptr += 2 + 1; +#endif +#else + /* fpscr not used by kernel, leave zero */ + PACK32(ptr, 0); +#endif +#endif + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +#define UNPACK64(dest,ptr) do { dest = *(ptr++); } while(0) + +#define UNPACK32(dest,ptr) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + dest = *(ptr32++); \ + ptr = (unsigned long *)ptr32; \ + } while(0) + +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; +#ifdef CONFIG_SPE + union { + u32 v32[2]; + u64 v64; + } acc; +#endif + + for (reg = 0; reg < 32; reg++) + UNPACK64(regs->gpr[reg], ptr); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + UNPACK64(current->thread.evr[reg], ptr); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(int); +#endif + + UNPACK64(regs->nip, ptr); + UNPACK64(regs->msr, ptr); + UNPACK32(regs->ccr, ptr); + UNPACK64(regs->link, ptr); + UNPACK64(regs->ctr, ptr); + UNPACK32(regs->xer, ptr); + +#if 0 + Following are in struct thread_struct, not struct pt_regs, + ignoring for now since kernel does not use them. Would it + make sense to get them from the thread that kgdb is set to? + + If this code is enabled, update the definition of NUMREGBYTES to + include the vector registers and vector state registers. + + /* fpscr, vscr, vrsave not used by kernel, leave unchanged */ + + UNPACK32(current->thread->fpscr, ptr); + + /* vr registers not used by kernel, leave zero */ + ptr += 32 * 16 / sizeof(long); + +#ifdef CONFIG_ALTIVEC + UNPACK32(current->thread->vscr, ptr); + UNPACK32(current->thread->vrsave, ptr); +#else + ptr += 2 * 4 / sizeof(long); +#endif +#else +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + /* u64 acc */ + UNPACK32(acc.v32[0], ptr); + UNPACK32(acc.v32[1], ptr); + current->thread.acc = acc.v64; + UNPACK64(current->thread.spefscr, ptr); +#else + ptr += 2 + 1; +#endif +#endif +#endif + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +/* + * This function does PowerPC specific procesing for interfacing to gdb. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr; + + switch (remcom_in_buffer[0]) { + /* + * sAA..AA Step one instruction from AA..AA + * This will return an error to gdb .. + */ + case 's': + case 'c': + /* handle the optional parameter */ + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->nip = addr; + + atomic_set(&cpu_doing_single_step, -1); + /* set the trace bit if we're stepping */ + if (remcom_in_buffer[0] == 's') { +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, + mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); + linux_regs->msr |= MSR_DE; +#else + linux_regs->msr |= MSR_SE; +#endif + debugger_step = 1; + if (kgdb_contthread) + atomic_set(&cpu_doing_single_step, + smp_processor_id()); + } + return 0; + } + + return -1; +} + +/* + * Global data + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +}; + +int kgdb_not_implemented(struct pt_regs *regs) +{ + return 0; +} + +int kgdb_arch_init(void) +{ +#ifdef CONFIG_XMON +#error Both XMON and KGDB selected in .config. Unselect one of them. +#endif + + __debugger_ipi = kgdb_call_nmi_hook; + __debugger = kgdb_debugger; + __debugger_bpt = kgdb_breakpoint; + __debugger_sstep = kgdb_singlestep; + __debugger_iabr_match = kgdb_iabr_match; + __debugger_dabr_match = kgdb_dabr_match; + __debugger_fault_handler = kgdb_not_implemented; + + return 0; +} + +arch_initcall(kgdb_arch_init); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp32.S --- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp32.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,41 @@ +/* + * Copyright (C) 1996 Paul Mackerras + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include + + .text + +/* + * Save/restore state in case a memory access causes a fault. + * + * int kgdb_fault_setjmp(unsigned long *curr_context); + * void kgdb_fault_longjmp(unsigned long *curr_context); + */ + +_GLOBAL(kgdb_fault_setjmp) + mflr r0 + stw r0,0(r3) + stw r1,4(r3) + stw r2,8(r3) + mfcr r0 + stw r0,12(r3) + stmw r13,16(r3) + li r3,0 + blr + +_GLOBAL(kgdb_fault_longjmp) + lmw r13,16(r3) + lwz r0,12(r3) + mtcrf 0x38,r0 + lwz r0,0(r3) + lwz r1,4(r3) + lwz r2,8(r3) + mtlr r0 + mr r3,r1 + blr diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp64.S --- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp64.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,77 @@ +/* + * Copyright (C) 1996 Paul Mackerras + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include + + .text + +/* + * Save/restore state in case a memory access causes a fault. + * + * int kgdb_fault_setjmp(unsigned long *curr_context); + * void kgdb_fault_longjmp(unsigned long *curr_context); + */ + +_GLOBAL(kgdb_fault_setjmp) + mflr r0 + std r0,0(r3) + std r1,8(r3) + std r2,16(r3) + mfcr r0 + std r0,24(r3) + std r13,32(r3) + std r14,40(r3) + std r15,48(r3) + std r16,56(r3) + std r17,64(r3) + std r18,72(r3) + std r19,80(r3) + std r20,88(r3) + std r21,96(r3) + std r22,104(r3) + std r23,112(r3) + std r24,120(r3) + std r25,128(r3) + std r26,136(r3) + std r27,144(r3) + std r28,152(r3) + std r29,160(r3) + std r30,168(r3) + std r31,176(r3) + li r3,0 + blr + +_GLOBAL(kgdb_fault_longjmp) + ld r13,32(r3) + ld r14,40(r3) + ld r15,48(r3) + ld r16,56(r3) + ld r17,64(r3) + ld r18,72(r3) + ld r19,80(r3) + ld r20,88(r3) + ld r21,96(r3) + ld r22,104(r3) + ld r23,112(r3) + ld r24,120(r3) + ld r25,128(r3) + ld r26,136(r3) + ld r27,144(r3) + ld r28,152(r3) + ld r29,160(r3) + ld r30,168(r3) + ld r31,176(r3) + ld r0,24(r3) + mtcrf 0x38,r0 + ld r0,0(r3) + ld r1,8(r3) + ld r2,16(r3) + mtlr r0 + mr r3,r1 + blr diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c linux-2.6.22-591/arch/powerpc/kernel/legacy_serial.c --- linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/legacy_serial.c 2007-12-21 15:36:11.000000000 -0500 @@ -11,6 +11,9 @@ #include #include #include +#ifdef CONFIG_KGDB_8250 +#include +#endif #undef DEBUG @@ -487,6 +490,9 @@ fixup_port_pio(i, np, port); if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI)) fixup_port_mmio(i, np, port); +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_platform_port(i, port); +#endif } DBG("Registering platform serial ports\n"); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/misc_32.S linux-2.6.22-591/arch/powerpc/kernel/misc_32.S --- linux-2.6.22-570/arch/powerpc/kernel/misc_32.S 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/misc_32.S 2007-12-21 15:36:11.000000000 -0500 @@ -392,7 +392,7 @@ mtspr SPRN_L1CSR0,r3 isync blr -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 @@ -419,7 +419,7 @@ _GLOBAL(__flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 @@ -514,8 +514,8 @@ */ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION - blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r3,r3,0,0,19 /* Get page base address */ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 @@ -543,7 +543,7 @@ _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mfmsr r10 rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/of_platform.c linux-2.6.22-591/arch/powerpc/kernel/of_platform.c --- linux-2.6.22-570/arch/powerpc/kernel/of_platform.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/of_platform.c 2007-12-21 15:36:11.000000000 -0500 @@ -427,14 +427,6 @@ /* Process "ranges" property */ pci_process_bridge_OF_ranges(phb, dev->node, 0); - /* Setup IO space. We use the non-dynamic version of that code here, - * which doesn't quite support unplugging. Next kernel release will - * have a better fix for this. - * Note also that we don't do ISA, this will also be fixed with a - * more massive rework. - */ - pci_setup_phb_io(phb, pci_io_base == 0); - /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/pci_64.c linux-2.6.22-591/arch/powerpc/kernel/pci_64.c --- linux-2.6.22-570/arch/powerpc/kernel/pci_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/pci_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -11,7 +11,7 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define DEBUG #include #include @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -41,35 +42,26 @@ unsigned long pci_probe_only = 1; int pci_assign_all_buses = 0; -static int pci_initial_scan_done; static void fixup_resource(struct resource *res, struct pci_dev *dev); static void do_bus_setup(struct pci_bus *bus); -static void phbs_remap_io(void); /* pci_io_base -- the base address from which io bars are offsets. * This is the lowest I/O base address (so bar values are always positive), * and it *must* be the start of ISA space if an ISA bus exists because - * ISA drivers use hard coded offsets. If no ISA bus exists a dummy - * page is mapped and isa_io_limit prevents access to it. + * ISA drivers use hard coded offsets. If no ISA bus exists nothing + * is mapped on the first 64K of IO space */ -unsigned long isa_io_base; /* NULL if no ISA bus */ -EXPORT_SYMBOL(isa_io_base); -unsigned long pci_io_base; +unsigned long pci_io_base = ISA_IO_BASE; EXPORT_SYMBOL(pci_io_base); -void iSeries_pcibios_init(void); - LIST_HEAD(hose_list); static struct dma_mapping_ops *pci_dma_ops; +/* XXX kill that some day ... */ int global_phb_number; /* Global phb counter */ -/* Cached ISA bridge dev. */ -struct pci_dev *ppc64_isabridge_dev = NULL; -EXPORT_SYMBOL_GPL(ppc64_isabridge_dev); - void set_pci_dma_ops(struct dma_mapping_ops *dma_ops) { pci_dma_ops = dma_ops; @@ -100,7 +92,7 @@ return; if (res->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - pci_io_base; + offset = (unsigned long)hose->io_base_virt - _IO_BASE; if (res->flags & IORESOURCE_MEM) offset = hose->pci_mem_offset; @@ -119,7 +111,7 @@ return; if (res->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - pci_io_base; + offset = (unsigned long)hose->io_base_virt - _IO_BASE; if (res->flags & IORESOURCE_MEM) offset = hose->pci_mem_offset; @@ -156,7 +148,7 @@ if (res->flags & IORESOURCE_IO) { unsigned long offset = (unsigned long)hose->io_base_virt - - pci_io_base; + _IO_BASE; /* Make sure we start at our min on all hoses */ if (start - offset < PCIBIOS_MIN_IO) start = PCIBIOS_MIN_IO + offset; @@ -535,10 +527,16 @@ bus->secondary = hose->first_busno; hose->bus = bus; + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + pcibios_map_io_space(bus); + bus->resource[0] = res = &hose->io_resource; - if (res->flags && request_resource(&ioport_resource, res)) + if (res->flags && request_resource(&ioport_resource, res)) { printk(KERN_ERR "Failed to request PCI IO region " "on PCI domain %04x\n", hose->global_number); + DBG("res->start = 0x%016lx, res->end = 0x%016lx\n", + res->start, res->end); + } for (i = 0; i < 3; ++i) { res = &hose->mem_resources[i]; @@ -596,17 +594,6 @@ if (ppc_md.pcibios_fixup) ppc_md.pcibios_fixup(); - /* Cache the location of the ISA bridge (if we have one) */ - ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - if (ppc64_isabridge_dev != NULL) - printk(KERN_DEBUG "ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); - - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - /* map in PCI I/O space */ - phbs_remap_io(); - - pci_initial_scan_done = 1; - printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); return 0; @@ -711,7 +698,7 @@ #endif res_bit = IORESOURCE_MEM; } else { - io_offset = (unsigned long)hose->io_base_virt - pci_io_base; + io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; *offset += io_offset; res_bit = IORESOURCE_IO; } @@ -881,76 +868,6 @@ device_create_file(&pdev->dev, &dev_attr_devspec); } -#define ISA_SPACE_MASK 0x1 -#define ISA_SPACE_IO 0x1 - -static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, - unsigned long phb_io_base_phys, - void __iomem * phb_io_base_virt) -{ - /* Remove these asap */ - - struct pci_address { - u32 a_hi; - u32 a_mid; - u32 a_lo; - }; - - struct isa_address { - u32 a_hi; - u32 a_lo; - }; - - struct isa_range { - struct isa_address isa_addr; - struct pci_address pci_addr; - unsigned int size; - }; - - const struct isa_range *range; - unsigned long pci_addr; - unsigned int isa_addr; - unsigned int size; - int rlen = 0; - - range = of_get_property(isa_node, "ranges", &rlen); - if (range == NULL || (rlen < sizeof(struct isa_range))) { - printk(KERN_ERR "no ISA ranges or unexpected isa range size," - "mapping 64k\n"); - __ioremap_explicit(phb_io_base_phys, - (unsigned long)phb_io_base_virt, - 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED); - return; - } - - /* From "ISA Binding to 1275" - * The ranges property is laid out as an array of elements, - * each of which comprises: - * cells 0 - 1: an ISA address - * cells 2 - 4: a PCI address - * (size depending on dev->n_addr_cells) - * cell 5: the size of the range - */ - if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) { - isa_addr = range->isa_addr.a_lo; - pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | - range->pci_addr.a_lo; - - /* Assume these are both zero */ - if ((pci_addr != 0) || (isa_addr != 0)) { - printk(KERN_ERR "unexpected isa to pci mapping: %s\n", - __FUNCTION__); - return; - } - - size = PAGE_ALIGN(range->size); - - __ioremap_explicit(phb_io_base_phys, - (unsigned long) phb_io_base_virt, - size, _PAGE_NO_CACHE | _PAGE_GUARDED); - } -} - void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int prim) { @@ -1045,155 +962,122 @@ } } -void __devinit pci_setup_phb_io(struct pci_controller *hose, int primary) -{ - unsigned long size = hose->pci_io_size; - unsigned long io_virt_offset; - struct resource *res; - struct device_node *isa_dn; - - if (size == 0) - return; - - hose->io_base_virt = reserve_phb_iospace(size); - DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", - hose->global_number, hose->io_base_phys, - (unsigned long) hose->io_base_virt); - - if (primary) { - pci_io_base = (unsigned long)hose->io_base_virt; - isa_dn = of_find_node_by_type(NULL, "isa"); - if (isa_dn) { - isa_io_base = pci_io_base; - pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys, - hose->io_base_virt); - of_node_put(isa_dn); - } - } - - io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; - res = &hose->io_resource; - res->start += io_virt_offset; - res->end += io_virt_offset; - - /* If this is called after the initial PCI scan, then we need to - * proceed to IO mappings now - */ - if (pci_initial_scan_done) - __ioremap_explicit(hose->io_base_phys, - (unsigned long)hose->io_base_virt, - hose->pci_io_size, - _PAGE_NO_CACHE | _PAGE_GUARDED); -} +#ifdef CONFIG_HOTPLUG -void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose, - int primary) +int pcibios_unmap_io_space(struct pci_bus *bus) { - unsigned long size = hose->pci_io_size; - unsigned long io_virt_offset; - struct resource *res; + struct pci_controller *hose; - if (size == 0) - return; + WARN_ON(bus == NULL); - hose->io_base_virt = __ioremap(hose->io_base_phys, size, - _PAGE_NO_CACHE | _PAGE_GUARDED); - DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", - hose->global_number, hose->io_base_phys, - (unsigned long) hose->io_base_virt); - - if (primary) - pci_io_base = (unsigned long)hose->io_base_virt; - - io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; - res = &hose->io_resource; - res->start += io_virt_offset; - res->end += io_virt_offset; -} + /* If this is not a PHB, we only flush the hash table over + * the area mapped by this bridge. We don't play with the PTE + * mappings since we might have to deal with sub-page alignemnts + * so flushing the hash table is the only sane way to make sure + * that no hash entries are covering that removed bridge area + * while still allowing other busses overlapping those pages + */ + if (bus->self) { + struct resource *res = bus->resource[0]; + DBG("IO unmapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); -static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys, - unsigned long *start_virt, unsigned long *size) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - struct resource *res; + __flush_hash_table_range(&init_mm, res->start + _IO_BASE, + res->end - res->start + 1); + return 0; + } - if (bus->self) - res = bus->resource[0]; - else - /* Root Bus */ - res = &hose->io_resource; + /* Get the host bridge */ + hose = pci_bus_to_host(bus); - if (res->end == 0 && res->start == 0) - return 1; + /* Check if we have IOs allocated */ + if (hose->io_base_alloc == 0) + return 0; - *start_virt = pci_io_base + res->start; - *start_phys = *start_virt + hose->io_base_phys - - (unsigned long) hose->io_base_virt; + DBG("IO unmapping for PHB %s\n", + ((struct device_node *)hose->arch_data)->full_name); + DBG(" alloc=0x%p\n", hose->io_base_alloc); - if (res->end > res->start) - *size = res->end - res->start + 1; - else { - printk("%s(): unexpected region 0x%lx->0x%lx\n", - __FUNCTION__, res->start, res->end); - return 1; - } + /* This is a PHB, we fully unmap the IO area */ + vunmap(hose->io_base_alloc); return 0; } +EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); -int unmap_bus_range(struct pci_bus *bus) -{ - unsigned long start_phys; - unsigned long start_virt; - unsigned long size; +#endif /* CONFIG_HOTPLUG */ - if (!bus) { - printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); - return 1; - } +int __devinit pcibios_map_io_space(struct pci_bus *bus) +{ + struct vm_struct *area; + unsigned long phys_page; + unsigned long size_page; + unsigned long io_virt_offset; + struct pci_controller *hose; - if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) - return 1; - if (__iounmap_explicit((void __iomem *) start_virt, size)) - return 1; + WARN_ON(bus == NULL); + /* If this not a PHB, nothing to do, page tables still exist and + * thus HPTEs will be faulted in when needed + */ + if (bus->self) { + DBG("IO mapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); + DBG(" virt=0x%016lx...0x%016lx\n", + bus->resource[0]->start + _IO_BASE, + bus->resource[0]->end + _IO_BASE); return 0; -} -EXPORT_SYMBOL(unmap_bus_range); - -int remap_bus_range(struct pci_bus *bus) -{ - unsigned long start_phys; - unsigned long start_virt; - unsigned long size; - - if (!bus) { - printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); - return 1; } + /* Get the host bridge */ + hose = pci_bus_to_host(bus); + phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); + size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); - if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) - return 1; - if (start_phys == 0) - return 1; - printk(KERN_DEBUG "mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size); - if (__ioremap_explicit(start_phys, start_virt, size, - _PAGE_NO_CACHE | _PAGE_GUARDED)) - return 1; + /* Make sure IO area address is clear */ + hose->io_base_alloc = NULL; + /* If there's no IO to map on that bus, get away too */ + if (hose->pci_io_size == 0 || hose->io_base_phys == 0) return 0; -} -EXPORT_SYMBOL(remap_bus_range); -static void phbs_remap_io(void) -{ - struct pci_controller *hose, *tmp; + /* Let's allocate some IO space for that guy. We don't pass + * VM_IOREMAP because we don't care about alignment tricks that + * the core does in that case. Maybe we should due to stupid card + * with incomplete address decoding but I'd rather not deal with + * those outside of the reserved 64K legacy region. + */ + area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); + if (area == NULL) + return -ENOMEM; + hose->io_base_alloc = area->addr; + hose->io_base_virt = (void __iomem *)(area->addr + + hose->io_base_phys - phys_page); + + DBG("IO mapping for PHB %s\n", + ((struct device_node *)hose->arch_data)->full_name); + DBG(" phys=0x%016lx, virt=0x%p (alloc=0x%p)\n", + hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); + DBG(" size=0x%016lx (alloc=0x%016lx)\n", + hose->pci_io_size, size_page); + + /* Establish the mapping */ + if (__ioremap_at(phys_page, area->addr, size_page, + _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL) + return -ENOMEM; + + /* Fixup hose IO resource */ + io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + hose->io_resource.start += io_virt_offset; + hose->io_resource.end += io_virt_offset; - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - remap_bus_range(hose->bus); + DBG(" hose->io_resource=0x%016lx...0x%016lx\n", + hose->io_resource.start, hose->io_resource.end); + + return 0; } +EXPORT_SYMBOL_GPL(pcibios_map_io_space); static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) { @@ -1201,8 +1085,7 @@ unsigned long offset; if (res->flags & IORESOURCE_IO) { - offset = (unsigned long)hose->io_base_virt - pci_io_base; - + offset = (unsigned long)hose->io_base_virt - _IO_BASE; res->start += offset; res->end += offset; } else if (res->flags & IORESOURCE_MEM) { @@ -1217,9 +1100,20 @@ /* Update device resources. */ int i; - for (i = 0; i < PCI_NUM_RESOURCES; i++) - if (dev->resource[i].flags) - fixup_resource(&dev->resource[i], dev); + DBG("%s: Fixup resources:\n", pci_name(dev)); + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + if (!res->flags) + continue; + + DBG(" 0x%02x < %08lx:0x%016lx...0x%016lx\n", + i, res->flags, res->start, res->end); + + fixup_resource(res, dev); + + DBG(" > %08lx:0x%016lx...0x%016lx\n", + res->flags, res->start, res->end); + } } EXPORT_SYMBOL(pcibios_fixup_device_resources); @@ -1360,7 +1254,7 @@ return; if (rsrc->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - pci_io_base; + offset = (unsigned long)hose->io_base_virt - _IO_BASE; /* We pass a fully fixed up address to userland for MMIO instead of * a BAR value because X is lame and expects to be able to use that @@ -1410,7 +1304,7 @@ if (address >= hose->io_base_phys && address < (hose->io_base_phys + hose->pci_io_size)) { unsigned long base = - (unsigned long)hose->io_base_virt - pci_io_base; + (unsigned long)hose->io_base_virt - _IO_BASE; return base + (address - hose->io_base_phys); } } diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/process.c linux-2.6.22-591/arch/powerpc/kernel/process.c --- linux-2.6.22-570/arch/powerpc/kernel/process.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/process.c 2007-12-21 15:36:11.000000000 -0500 @@ -219,22 +219,26 @@ } #endif /* CONFIG_SMP */ -#ifdef CONFIG_PPC_MERGE /* XXX for now */ int set_dabr(unsigned long dabr) { +#ifdef CONFIG_PPC_MERGE /* XXX for now */ if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr); +#endif + /* XXX should we have a CPU_FTR_HAS_DABR ? */ +#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) mtspr(SPRN_DABR, dabr); +#endif return 0; } -#endif #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); -static DEFINE_PER_CPU(unsigned long, current_dabr); #endif +static DEFINE_PER_CPU(unsigned long, current_dabr); + struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -299,12 +303,10 @@ #endif /* CONFIG_SMP */ -#ifdef CONFIG_PPC64 /* for now */ if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) { set_dabr(new->thread.dabr); __get_cpu_var(current_dabr) = new->thread.dabr; } -#endif /* CONFIG_PPC64 */ new_thread = &new->thread; old_thread = ¤t->thread; @@ -474,12 +476,10 @@ discard_lazy_cpu_state(); -#ifdef CONFIG_PPC64 /* for now */ if (current->thread.dabr) { current->thread.dabr = 0; set_dabr(0); } -#endif } void diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/prom_init.c linux-2.6.22-591/arch/powerpc/kernel/prom_init.c --- linux-2.6.22-570/arch/powerpc/kernel/prom_init.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/prom_init.c 2007-12-21 15:36:11.000000000 -0500 @@ -635,6 +635,7 @@ /* ibm,dynamic-reconfiguration-memory property supported */ #define OV5_DRCONF_MEMORY 0x20 #define OV5_LARGE_PAGES 0x10 /* large pages supported */ +#define OV5_DONATE_DEDICATE_CPU 0x02 /* donate dedicated CPU support */ /* PCIe/MSI support. Without MSI full PCIe is not supported */ #ifdef CONFIG_PCI_MSI #define OV5_MSI 0x01 /* PCIe/MSI support */ @@ -685,7 +686,8 @@ /* option vector 5: PAPR/OF options */ 3 - 2, /* length */ 0, /* don't ignore, don't halt */ - OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_MSI, + OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | + OV5_DONATE_DEDICATE_CPU | OV5_MSI, }; /* Old method - ELF header with PT_NOTE sections */ diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h linux-2.6.22-591/arch/powerpc/kernel/ptrace-common.h --- linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/ptrace-common.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2002 Stephen Rothwell, IBM Coproration - * Extracted from ptrace.c and ptrace32.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. - */ - -#ifndef _PPC64_PTRACE_COMMON_H -#define _PPC64_PTRACE_COMMON_H - -#include - -/* - * Set of msr bits that gdb can change on behalf of a process. - */ -#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1) - -/* - * Get contents of register REGNO in task TASK. - */ -static inline unsigned long get_reg(struct task_struct *task, int regno) -{ - unsigned long tmp = 0; - - /* - * Put the correct FP bits in, they might be wrong as a result - * of our lazy FP restore. - */ - if (regno == PT_MSR) { - tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; - tmp |= task->thread.fpexc_mode; - } else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { - tmp = ((unsigned long *)task->thread.regs)[regno]; - } - - return tmp; -} - -/* - * Write contents of register REGNO in task TASK. - */ -static inline int put_reg(struct task_struct *task, int regno, - unsigned long data) -{ - if (regno < PT_SOFTE) { - if (regno == PT_MSR) - data = (data & MSR_DEBUGCHANGE) - | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); - ((unsigned long *)task->thread.regs)[regno] = data; - return 0; - } - return -EIO; -} - -static inline void set_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - if (regs != NULL) - regs->msr |= MSR_SE; - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -static inline void clear_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - if (regs != NULL) - regs->msr &= ~MSR_SE; - clear_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -#ifdef CONFIG_ALTIVEC -/* - * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. - * The transfer totals 34 quadword. Quadwords 0-31 contain the - * corresponding vector registers. Quadword 32 contains the vscr as the - * last word (offset 12) within that quadword. Quadword 33 contains the - * vrsave as the first word (offset 0) within the quadword. - * - * This definition of the VMX state is compatible with the current PPC32 - * ptrace interface. This allows signal handling and ptrace to use the - * same structures. This also simplifies the implementation of a bi-arch - * (combined (32- and 64-bit) gdb. - */ - -/* - * Get contents of AltiVec register state in task TASK - */ -static inline int get_vrregs(unsigned long __user *data, - struct task_struct *task) -{ - unsigned long regsize; - - /* copy AltiVec registers VR[0] .. VR[31] */ - regsize = 32 * sizeof(vector128); - if (copy_to_user(data, task->thread.vr, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VSCR */ - regsize = 1 * sizeof(vector128); - if (copy_to_user(data, &task->thread.vscr, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VRSAVE */ - if (put_user(task->thread.vrsave, (u32 __user *)data)) - return -EFAULT; - - return 0; -} - -/* - * Write contents of AltiVec register state into task TASK. - */ -static inline int set_vrregs(struct task_struct *task, - unsigned long __user *data) -{ - unsigned long regsize; - - /* copy AltiVec registers VR[0] .. VR[31] */ - regsize = 32 * sizeof(vector128); - if (copy_from_user(task->thread.vr, data, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VSCR */ - regsize = 1 * sizeof(vector128); - if (copy_from_user(&task->thread.vscr, data, regsize)) - return -EFAULT; - data += (regsize / sizeof(unsigned long)); - - /* copy VRSAVE */ - if (get_user(task->thread.vrsave, (u32 __user *)data)) - return -EFAULT; - - return 0; -} -#endif - -static inline int ptrace_set_debugreg(struct task_struct *task, - unsigned long addr, unsigned long data) -{ - /* We only support one DABR and no IABRS at the moment */ - if (addr > 0) - return -EINVAL; - - /* The bottom 3 bits are flags */ - if ((data & ~0x7UL) >= TASK_SIZE) - return -EIO; - - /* Ensure translation is on */ - if (data && !(data & DABR_TRANSLATION)) - return -EIO; - - task->thread.dabr = data; - return 0; -} - -#endif /* _PPC64_PTRACE_COMMON_H */ diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace.c linux-2.6.22-591/arch/powerpc/kernel/ptrace.c --- linux-2.6.22-570/arch/powerpc/kernel/ptrace.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/ptrace.c 2007-12-21 15:36:11.000000000 -0500 @@ -35,11 +35,11 @@ #include #include -#ifdef CONFIG_PPC64 -#include "ptrace-common.h" -#endif +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ -#ifdef CONFIG_PPC32 /* * Set of msr bits that gdb can change on behalf of a process. */ @@ -48,65 +48,117 @@ #else #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) #endif -#endif /* CONFIG_PPC32 */ /* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. + * Max register writeable via put_reg */ - #ifdef CONFIG_PPC32 +#define PT_MAX_PUT_REG PT_MQ +#else +#define PT_MAX_PUT_REG PT_CCR +#endif + /* * Get contents of register REGNO in task TASK. */ -static inline unsigned long get_reg(struct task_struct *task, int regno) +unsigned long ptrace_get_reg(struct task_struct *task, int regno) { - if (regno < sizeof(struct pt_regs) / sizeof(unsigned long) - && task->thread.regs != NULL) + unsigned long tmp = 0; + + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) { + tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; + return tmp | task->thread.fpexc_mode; + } + + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) return ((unsigned long *)task->thread.regs)[regno]; - return (0); + + return -EIO; } /* * Write contents of register REGNO in task TASK. */ -static inline int put_reg(struct task_struct *task, int regno, - unsigned long data) +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) { - if (regno <= PT_MQ && task->thread.regs != NULL) { + if (task->thread.regs == NULL) + return -EIO; + + if (regno <= PT_MAX_PUT_REG || regno == PT_TRAP) { if (regno == PT_MSR) data = (data & MSR_DEBUGCHANGE) | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); + /* We prevent mucking around with the reserved area of trap + * which are used internally by the kernel + */ + if (regno == PT_TRAP) + data &= 0xfff0; ((unsigned long *)task->thread.regs)[regno] = data; return 0; } return -EIO; } + +static int get_fpregs(void __user *data, struct task_struct *task, + int has_fpscr) +{ + unsigned int count = has_fpscr ? 33 : 32; + + if (copy_to_user(data, task->thread.fpr, count * sizeof(double))) + return -EFAULT; + return 0; +} + +static int set_fpregs(void __user *data, struct task_struct *task, + int has_fpscr) +{ + unsigned int count = has_fpscr ? 33 : 32; + + if (copy_from_user(task->thread.fpr, data, count * sizeof(double))) + return -EFAULT; + return 0; +} + + #ifdef CONFIG_ALTIVEC /* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +/* * Get contents of AltiVec register state in task TASK */ -static inline int get_vrregs(unsigned long __user *data, struct task_struct *task) +static int get_vrregs(unsigned long __user *data, struct task_struct *task) { - int i, j; - - if (!access_ok(VERIFY_WRITE, data, 133 * sizeof(unsigned long))) - return -EFAULT; + unsigned long regsize; /* copy AltiVec registers VR[0] .. VR[31] */ - for (i = 0; i < 32; i++) - for (j = 0; j < 4; j++, data++) - if (__put_user(task->thread.vr[i].u[j], data)) + regsize = 32 * sizeof(vector128); + if (copy_to_user(data, task->thread.vr, regsize)) return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VSCR */ - for (i = 0; i < 4; i++, data++) - if (__put_user(task->thread.vscr.u[i], data)) + regsize = 1 * sizeof(vector128); + if (copy_to_user(data, &task->thread.vscr, regsize)) return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VRSAVE */ - if (__put_user(task->thread.vrsave, data)) + if (put_user(task->thread.vrsave, (u32 __user *)data)) return -EFAULT; return 0; @@ -115,31 +167,29 @@ /* * Write contents of AltiVec register state into task TASK. */ -static inline int set_vrregs(struct task_struct *task, unsigned long __user *data) +static int set_vrregs(struct task_struct *task, unsigned long __user *data) { - int i, j; - - if (!access_ok(VERIFY_READ, data, 133 * sizeof(unsigned long))) - return -EFAULT; + unsigned long regsize; /* copy AltiVec registers VR[0] .. VR[31] */ - for (i = 0; i < 32; i++) - for (j = 0; j < 4; j++, data++) - if (__get_user(task->thread.vr[i].u[j], data)) + regsize = 32 * sizeof(vector128); + if (copy_from_user(task->thread.vr, data, regsize)) return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VSCR */ - for (i = 0; i < 4; i++, data++) - if (__get_user(task->thread.vscr.u[i], data)) + regsize = 1 * sizeof(vector128); + if (copy_from_user(&task->thread.vscr, data, regsize)) return -EFAULT; + data += (regsize / sizeof(unsigned long)); /* copy VRSAVE */ - if (__get_user(task->thread.vrsave, data)) + if (get_user(task->thread.vrsave, (u32 __user *)data)) return -EFAULT; return 0; } -#endif +#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_SPE @@ -156,7 +206,7 @@ /* * Get contents of SPE register state in task TASK. */ -static inline int get_evrregs(unsigned long *data, struct task_struct *task) +static int get_evrregs(unsigned long *data, struct task_struct *task) { int i; @@ -182,7 +232,7 @@ /* * Write contents of SPE register state into task TASK. */ -static inline int set_evrregs(struct task_struct *task, unsigned long *data) +static int set_evrregs(struct task_struct *task, unsigned long *data) { int i; @@ -205,8 +255,8 @@ } #endif /* CONFIG_SPE */ -static inline void -set_single_step(struct task_struct *task) + +static void set_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; @@ -221,8 +271,7 @@ set_tsk_thread_flag(task, TIF_SINGLESTEP); } -static inline void -clear_single_step(struct task_struct *task) +static void clear_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; @@ -236,7 +285,25 @@ } clear_tsk_thread_flag(task, TIF_SINGLESTEP); } -#endif /* CONFIG_PPC32 */ + +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, + unsigned long data) +{ + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* Ensure translation is on */ + if (data && !(data & DABR_TRANSLATION)) + return -EIO; + + task->thread.dabr = data; + return 0; +} /* * Called by kernel/ptrace.c when detaching.. @@ -249,6 +316,62 @@ clear_single_step(child); } +/* + * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, + * we mark them as obsolete now, they will be removed in a future version + */ +static long arch_ptrace_old(struct task_struct *child, long request, long addr, + long data) +{ + int ret = -EPERM; + + switch(request) { + case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned long __user *tmp = (unsigned long __user *)addr; + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ + flush_fp_to_thread(child); + ret = get_fpregs((void __user *)addr, child, 0); + break; + } + + case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ + flush_fp_to_thread(child); + ret = set_fpregs((void __user *)addr, child, 0); + break; + } + + } + return ret; +} + long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret = -EPERM; @@ -284,11 +407,9 @@ #endif break; -#ifdef CONFIG_PPC32 CHECK_FULL_REGS(child->thread.regs); -#endif if (index < PT_FPR0) { - tmp = get_reg(child, (int) index); + tmp = ptrace_get_reg(child, (int) index); } else { flush_fp_to_thread(child); tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; @@ -323,13 +444,9 @@ #endif break; -#ifdef CONFIG_PPC32 CHECK_FULL_REGS(child->thread.regs); -#endif - if (index == PT_ORIG_R3) - break; if (index < PT_FPR0) { - ret = put_reg(child, index, data); + ret = ptrace_put_reg(child, index, data); } else { flush_fp_to_thread(child); ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; @@ -384,7 +501,6 @@ break; } -#ifdef CONFIG_PPC64 case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ @@ -398,73 +514,61 @@ case PTRACE_SET_DEBUGREG: ret = ptrace_set_debugreg(child, addr, data); break; -#endif case PTRACE_DETACH: ret = ptrace_detach(child, data); break; - case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) +#ifdef CONFIG_PPC64 + case PTRACE_GETREGS64: +#endif + case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ + int ui; + if (!access_ok(VERIFY_WRITE, (void __user *)data, + sizeof(struct pt_regs))) { + ret = -EIO; break; - reg++; - tmp++; } - break; + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret |= __put_user(ptrace_get_reg(child, ui), + (unsigned long __user *) data); + data += sizeof(long); } - - case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); - if (ret) break; - reg++; - tmp++; } + +#ifdef CONFIG_PPC64 + case PTRACE_SETREGS64: +#endif + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + int ui; + if (!access_ok(VERIFY_READ, (void __user *)data, + sizeof(struct pt_regs))) { + ret = -EIO; break; } - - case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - - flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret = __get_user(tmp, (unsigned long __user *) data); if (ret) break; - reg++; - tmp++; + ptrace_put_reg(child, ui, tmp); + data += sizeof(long); } break; } - case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned long __user *tmp = (unsigned long __user *)addr; - + case PTRACE_GETFPREGS: { /* Get the child FPU state (FPR0...31 + FPSCR) */ flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); - if (ret) + ret = get_fpregs((void __user *)data, child, 1); break; - reg++; - tmp++; } + + case PTRACE_SETFPREGS: { /* Set the child FPU state (FPR0...31 + FPSCR) */ + flush_fp_to_thread(child); + ret = set_fpregs((void __user *)data, child, 1); break; } @@ -499,11 +603,18 @@ break; #endif + /* Old reverse args ptrace callss */ + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + ret = arch_ptrace_old(child, request, addr, data); + break; + default: ret = ptrace_request(child, request, addr, data); break; } - return ret; } diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c linux-2.6.22-591/arch/powerpc/kernel/ptrace32.c --- linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/ptrace32.c 2007-12-21 15:36:11.000000000 -0500 @@ -33,13 +33,55 @@ #include #include -#include "ptrace-common.h" - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. */ +/* + * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, + * we mark them as obsolete now, they will be removed in a future version + */ +static long compat_ptrace_old(struct task_struct *child, long request, + long addr, long data) +{ + int ret = -EPERM; + + switch(request) { + case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + for (i = 0; i < 32; i++) { + ret = put_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ + int i; + unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; + unsigned int __user *tmp = (unsigned int __user *)addr; + + for (i = 0; i < 32; i++) { + ret = get_user(*reg, tmp); + if (ret) + break; + reg++; + tmp++; + } + break; + } + + } + return ret; +} + long compat_sys_ptrace(int request, int pid, unsigned long addr, unsigned long data) { @@ -123,7 +165,7 @@ break; if (index < PT_FPR0) { - tmp = get_reg(child, index); + tmp = ptrace_get_reg(child, index); } else { flush_fp_to_thread(child); /* @@ -162,7 +204,9 @@ else part = 0; /* want the 1st half of the register (left-most). */ - /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */ + /* Validate the input - check to see if address is on the wrong boundary + * or beyond the end of the user area + */ if ((addr & 3) || numReg > PT_FPSCR) break; @@ -170,7 +214,7 @@ flush_fp_to_thread(child); tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0]; } else { /* register within PT_REGS struct */ - tmp = get_reg(child, numReg); + tmp = ptrace_get_reg(child, numReg); } reg32bits = ((u32*)&tmp)[part]; ret = put_user(reg32bits, (u32 __user *)data); @@ -226,10 +270,8 @@ if ((addr & 3) || (index > PT_FPSCR32)) break; - if (index == PT_ORIG_R3) - break; if (index < PT_FPR0) { - ret = put_reg(child, index, data); + ret = ptrace_put_reg(child, index, data); } else { flush_fp_to_thread(child); /* @@ -258,70 +300,25 @@ /* Determine which register the user wants */ index = (u64)addr >> 2; numReg = index / 2; + /* * Validate the input - check to see if address is on the * wrong boundary or beyond the end of the user area */ if ((addr & 3) || (numReg > PT_FPSCR)) break; - /* Insure it is a register we let them change */ - if ((numReg == PT_ORIG_R3) - || ((numReg > PT_CCR) && (numReg < PT_FPR0))) - break; - if (numReg >= PT_FPR0) { - flush_fp_to_thread(child); - } - if (numReg == PT_MSR) - data = (data & MSR_DEBUGCHANGE) - | (child->thread.regs->msr & ~MSR_DEBUGCHANGE); - ((u32*)child->thread.regs)[index] = data; - ret = 0; - break; - } - - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: { /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + if (numReg < PT_FPR0) { + unsigned long freg = ptrace_get_reg(child, numReg); + if (index % 2) + freg = (freg & ~0xfffffffful) | (data & 0xfffffffful); else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - ret = 0; - break; - } - - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: { + freg = (freg & 0xfffffffful) | (data << 32); + ret = ptrace_put_reg(child, numReg, freg); + } else { + flush_fp_to_thread(child); + ((unsigned int *)child->thread.regs)[index] = data; ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - break; } - - case PTRACE_SINGLESTEP: { /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_single_step(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; break; } @@ -334,95 +331,67 @@ break; } - case PTRACE_SET_DEBUGREG: - ret = ptrace_set_debugreg(child, addr, data); - break; - - case PTRACE_DETACH: - ret = ptrace_detach(child, data); + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); break; - case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; - } + case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ + int ui; + if (!access_ok(VERIFY_WRITE, (void __user *)data, + PT_REGS_COUNT * sizeof(int))) { + ret = -EIO; break; } - - case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret |= __put_user(ptrace_get_reg(child, ui), + (unsigned int __user *) data); + data += sizeof(int); } break; } - case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = put_user(*reg, tmp); - if (ret) - break; - reg++; - tmp++; - } + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + int ui; + if (!access_ok(VERIFY_READ, (void __user *)data, + PT_REGS_COUNT * sizeof(int))) { + ret = -EIO; break; } - - case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ - int i; - unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; - unsigned int __user *tmp = (unsigned int __user *)addr; - - flush_fp_to_thread(child); - - for (i = 0; i < 32; i++) { - ret = get_user(*reg, tmp); + ret = 0; + for (ui = 0; ui < PT_REGS_COUNT; ui ++) { + ret = __get_user(tmp, (unsigned int __user *) data); if (ret) break; - reg++; - tmp++; + ptrace_put_reg(child, ui, tmp); + data += sizeof(int); } break; } - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned int __user *) data); - break; - -#ifdef CONFIG_ALTIVEC + case PTRACE_GETFPREGS: + case PTRACE_SETFPREGS: case PTRACE_GETVRREGS: - /* Get the child altivec register state. */ - flush_altivec_to_thread(child); - ret = get_vrregs((unsigned long __user *)data, child); + case PTRACE_SETVRREGS: + case PTRACE_GETREGS64: + case PTRACE_SETREGS64: + case PPC_PTRACE_GETFPREGS: + case PPC_PTRACE_SETFPREGS: + case PTRACE_KILL: + case PTRACE_SINGLESTEP: + case PTRACE_DETACH: + case PTRACE_SET_DEBUGREG: + case PTRACE_SYSCALL: + case PTRACE_CONT: + ret = arch_ptrace(child, request, addr, data); break; - case PTRACE_SETVRREGS: - /* Set the child altivec register state. */ - flush_altivec_to_thread(child); - ret = set_vrregs(child, (unsigned long __user *)data); + /* Old reverse args ptrace callss */ + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + ret = compat_ptrace_old(child, request, addr, data); break; -#endif default: ret = ptrace_request(child, request, addr, data); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c linux-2.6.22-591/arch/powerpc/kernel/rtas_pci.c --- linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/rtas_pci.c 2007-12-21 15:36:11.000000000 -0500 @@ -278,10 +278,8 @@ { struct device_node *node; struct pci_controller *phb; - unsigned int index; struct device_node *root = of_find_node_by_path("/"); - index = 0; for (node = of_get_next_child(root, NULL); node != NULL; node = of_get_next_child(root, node)) { @@ -295,8 +293,7 @@ continue; rtas_setup_phb(phb); pci_process_bridge_OF_ranges(phb, node, 0); - pci_setup_phb_io(phb, index == 0); - index++; + isa_bridge_find_early(phb); } of_node_put(root); @@ -335,7 +332,7 @@ return 1; } - rc = unmap_bus_range(b); + rc = pcibios_unmap_io_space(b); if (rc) { printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", __FUNCTION__, b->name); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/setup_32.c linux-2.6.22-591/arch/powerpc/kernel/setup_32.c --- linux-2.6.22-570/arch/powerpc/kernel/setup_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/setup_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -45,10 +45,6 @@ #define DBG(fmt...) -#if defined CONFIG_KGDB -#include -#endif - extern void bootx_init(unsigned long r4, unsigned long phys); struct ide_machdep_calls ppc_ide_md; @@ -245,30 +241,16 @@ xmon_setup(); -#if defined(CONFIG_KGDB) - if (ppc_md.kgdb_map_scc) - ppc_md.kgdb_map_scc(); - set_debug_traps(); - if (strstr(cmd_line, "gdb")) { - if (ppc_md.progress) - ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); - printk("kgdb breakpoint activated\n"); - breakpoint(); - } -#endif - /* * Set cache line size based on type of cpu as a default. * Systems with OF can look in the properties on the cpu node(s) * for a possibly more accurate value. */ - if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) { dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; ucache_bsize = 0; - } else - ucache_bsize = dcache_bsize = icache_bsize - = cur_cpu_spec->dcache_bsize; + if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) + ucache_bsize = icache_bsize = dcache_bsize; /* reboot on panic */ panic_timeout = 180; diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.c linux-2.6.22-591/arch/powerpc/kernel/signal.c --- linux-2.6.22-570/arch/powerpc/kernel/signal.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,180 @@ +/* + * Common signal handling code for both 32 and 64 bits + * + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Extracted from signal_32.c and signal_64.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include + +#include "signal.h" + +/* + * Allocate space for the signal frame + */ +void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size) +{ + unsigned long oldsp, newsp; + + /* Default to using normal stack */ + oldsp = regs->gpr[1]; + + /* Check for alt stack */ + if ((ka->sa.sa_flags & SA_ONSTACK) && + current->sas_ss_size && !on_sig_stack(oldsp)) + oldsp = (current->sas_ss_sp + current->sas_ss_size); + + /* Get aligned frame */ + newsp = (oldsp - frame_size) & ~0xFUL; + + /* Check access */ + if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp)) + return NULL; + + return (void __user *)newsp; +} + + +/* + * Restore the user process's signal mask + */ +void restore_sigmask(sigset_t *set) +{ + sigdelsetmask(set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = *set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + +static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, + int has_handler) +{ + unsigned long ret = regs->gpr[3]; + int restart = 1; + + /* syscall ? */ + if (TRAP(regs) != 0x0C00) + return; + + /* error signalled ? */ + if (!(regs->ccr & 0x10000000)) + return; + + switch (ret) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + /* ERESTARTNOHAND means that the syscall should only be + * restarted if there was no handler for the signal, and since + * we only get here if there is a handler, we dont restart. + */ + restart = !has_handler; + break; + case ERESTARTSYS: + /* ERESTARTSYS means to restart the syscall if there is no + * handler or the handler was registered with SA_RESTART + */ + restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0; + break; + case ERESTARTNOINTR: + /* ERESTARTNOINTR means that the syscall should be + * called again after the signal handler returns. + */ + break; + default: + return; + } + if (restart) { + if (ret == ERESTART_RESTARTBLOCK) + regs->gpr[0] = __NR_restart_syscall; + else + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; + regs->result = 0; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } +} + +int do_signal(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + int ret; + int is32 = is_32bit_task(); + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else if (!oldset) + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + /* Is there any syscall restart business here ? */ + check_syscall_restart(regs, &ka, signr > 0); + + if (signr <= 0) { + /* No signal to deliver -- put the saved sigmask back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + return 0; /* no signals delivered */ + } + + /* + * Reenable the DABR before delivering the signal to + * user space. The DABR will have been cleared if it + * triggered inside the kernel. + */ + if (current->thread.dabr) + set_dabr(current->thread.dabr); + + if (is32) { + if (ka.sa.sa_flags & SA_SIGINFO) + ret = handle_rt_signal32(signr, &ka, &info, oldset, + regs); + else + ret = handle_signal32(signr, &ka, &info, oldset, + regs); + } else { + ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); + } + + if (ret) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka.sa.sa_mask); + if (!(ka.sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + /* + * A signal was successfully delivered; the saved sigmask is in + * its frame, and we can clear the TIF_RESTORE_SIGMASK flag. + */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + } + + return ret; +} + +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + unsigned long r5, unsigned long r6, unsigned long r7, + unsigned long r8, struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->gpr[1]); +} diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.h linux-2.6.22-591/arch/powerpc/kernel/signal.h --- linux-2.6.22-570/arch/powerpc/kernel/signal.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/signal.h 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Extracted from signal_32.c and signal_64.c + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#ifndef _POWERPC_ARCH_SIGNAL_H +#define _POWERPC_ARCH_SIGNAL_H + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size); +extern void restore_sigmask(sigset_t *set); + +extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs); + +extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs); + + +#ifdef CONFIG_PPC64 + +static inline int is_32bit_task(void) +{ + return test_thread_flag(TIF_32BIT); +} + +extern int handle_rt_signal64(int signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs); + +#else /* CONFIG_PPC64 */ + +static inline int is_32bit_task(void) +{ + return 1; +} + +static inline int handle_rt_signal64(int signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs) +{ + return -EFAULT; +} + +#endif /* !defined(CONFIG_PPC64) */ + +#endif /* _POWERPC_ARCH_SIGNAL_H */ diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_32.c linux-2.6.22-591/arch/powerpc/kernel/signal_32.c --- linux-2.6.22-570/arch/powerpc/kernel/signal_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/signal_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -51,12 +51,11 @@ #include #endif -#undef DEBUG_SIG +#include "signal.h" -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#undef DEBUG_SIG #ifdef CONFIG_PPC64 -#define do_signal do_signal32 #define sys_sigsuspend compat_sys_sigsuspend #define sys_rt_sigsuspend compat_sys_rt_sigsuspend #define sys_rt_sigreturn compat_sys_rt_sigreturn @@ -231,8 +230,6 @@ #endif /* CONFIG_PPC64 */ -int do_signal(sigset_t *oldset, struct pt_regs *regs); - /* * Atomically swap in the new signal mask, and wait for a signal. */ @@ -251,14 +248,6 @@ return -ERESTARTNOHAND; } -#ifdef CONFIG_PPC32 -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, int r5, - int r6, int r7, int r8, struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->gpr[1]); -} -#endif - long sys_sigaction(int sig, struct old_sigaction __user *act, struct old_sigaction __user *oact) { @@ -293,14 +282,17 @@ /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: - * a sigregs struct + * an ABI gap of 56 words + * an mcontext struct * a sigcontext struct * a gap of __SIGNAL_FRAMESIZE bytes * - * Each of these things must be a multiple of 16 bytes in size. + * Each of these things must be a multiple of 16 bytes in size. The following + * structure represent all of this except the __SIGNAL_FRAMESIZE gap * */ -struct sigregs { +struct sigframe { + struct sigcontext sctx; /* the sigcontext */ struct mcontext mctx; /* all the register values */ /* * Programs using the rs6000/xcoff abi can save up to 19 gp @@ -703,44 +695,22 @@ } #endif /* CONFIG_PPC64 */ - -/* - * Restore the user process's signal mask - */ -#ifdef CONFIG_PPC64 -extern void restore_sigmask(sigset_t *set); -#else /* CONFIG_PPC64 */ -static void restore_sigmask(sigset_t *set) -{ - sigdelsetmask(set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = *set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -} -#endif - /* * Set up a signal frame for a "real-time" signal handler * (one which gets siginfo). */ -static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, +int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, - struct pt_regs *regs, unsigned long newsp) + struct pt_regs *regs) { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; - unsigned long origsp = newsp; + unsigned long newsp = 0; /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - newsp -= sizeof(*rt_sf); - rt_sf = (struct rt_sigframe __user *)newsp; - - /* create a stack frame for the caller of the handler */ - newsp -= __SIGNAL_FRAMESIZE + 16; - - if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp)) + rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf)); + if (unlikely(rt_sf == NULL)) goto badframe; /* Put the siginfo & fill in most of the ucontext */ @@ -770,8 +740,12 @@ current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; + + /* Fill registers for signal handler */ regs->gpr[1] = newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) &rt_sf->info; @@ -1015,27 +989,18 @@ /* * OK, we're invoking a handler */ -static int handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs, - unsigned long newsp) +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { struct sigcontext __user *sc; - struct sigregs __user *frame; - unsigned long origsp = newsp; + struct sigframe __user *frame; + unsigned long newsp = 0; /* Set up Signal Frame */ - newsp -= sizeof(struct sigregs); - frame = (struct sigregs __user *) newsp; - - /* Put a sigcontext on the stack */ - newsp -= sizeof(*sc); - sc = (struct sigcontext __user *) newsp; - - /* create a stack frame for the caller of the handler */ - newsp -= __SIGNAL_FRAMESIZE; - - if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp)) + frame = get_sigframe(ka, regs, sizeof(*frame)); + if (unlikely(frame == NULL)) goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; #if _NSIG != 64 #error "Please adjust handle_signal()" @@ -1047,7 +1012,7 @@ #else || __put_user(oldset->sig[1], &sc->_unused[3]) #endif - || __put_user(to_user_ptr(frame), &sc->regs) + || __put_user(to_user_ptr(&frame->mctx), &sc->regs) || __put_user(sig, &sc->signal)) goto badframe; @@ -1063,8 +1028,11 @@ current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; + regs->gpr[1] = newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) sc; @@ -1126,106 +1094,3 @@ force_sig(SIGSEGV, current); return 0; } - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int do_signal(sigset_t *oldset, struct pt_regs *regs) -{ - siginfo_t info; - struct k_sigaction ka; - unsigned int newsp; - int signr, ret; - -#ifdef CONFIG_PPC32 - if (try_to_freeze()) { - signr = 0; - if (!signal_pending(current)) - goto no_signal; - } -#endif - - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else if (!oldset) - oldset = ¤t->blocked; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); -#ifdef CONFIG_PPC32 -no_signal: -#endif - if (TRAP(regs) == 0x0C00 /* System Call! */ - && regs->ccr & 0x10000000 /* error signalled */ - && ((ret = regs->gpr[3]) == ERESTARTSYS - || ret == ERESTARTNOHAND || ret == ERESTARTNOINTR - || ret == ERESTART_RESTARTBLOCK)) { - - if (signr > 0 - && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK - || (ret == ERESTARTSYS - && !(ka.sa.sa_flags & SA_RESTART)))) { - /* make the system call return an EINTR error */ - regs->result = -EINTR; - regs->gpr[3] = EINTR; - /* note that the cr0.SO bit is already set */ - } else { - regs->nip -= 4; /* Back up & retry system call */ - regs->result = 0; - regs->trap = 0; - if (ret == ERESTART_RESTARTBLOCK) - regs->gpr[0] = __NR_restart_syscall; - else - regs->gpr[3] = regs->orig_gpr3; - } - } - - if (signr == 0) { - /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } - return 0; /* no signals delivered */ - } - - if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size - && !on_sig_stack(regs->gpr[1])) - newsp = current->sas_ss_sp + current->sas_ss_size; - else - newsp = regs->gpr[1]; - newsp &= ~0xfUL; - -#ifdef CONFIG_PPC64 - /* - * Reenable the DABR before delivering the signal to - * user space. The DABR will have been cleared if it - * triggered inside the kernel. - */ - if (current->thread.dabr) - set_dabr(current->thread.dabr); -#endif - - /* Whee! Actually deliver the signal. */ - if (ka.sa.sa_flags & SA_SIGINFO) - ret = handle_rt_signal(signr, &ka, &info, oldset, regs, newsp); - else - ret = handle_signal(signr, &ka, &info, oldset, regs, newsp); - - if (ret) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, - &ka.sa.sa_mask); - if (!(ka.sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - /* A signal was successfully delivered; the saved sigmask is in - its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - } - - return ret; -} diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_64.c linux-2.6.22-591/arch/powerpc/kernel/signal_64.c --- linux-2.6.22-570/arch/powerpc/kernel/signal_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/kernel/signal_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -34,9 +34,9 @@ #include #include -#define DEBUG_SIG 0 +#include "signal.h" -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define DEBUG_SIG 0 #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) @@ -64,14 +64,6 @@ char abigap[288]; } __attribute__ ((aligned (16))); -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, - unsigned long r6, unsigned long r7, unsigned long r8, - struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->gpr[1]); -} - - /* * Set up the sigcontext for the signal frame. */ @@ -208,25 +200,6 @@ } /* - * Allocate space for the signal frame - */ -static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size) -{ - unsigned long newsp; - - /* Default to using normal stack */ - newsp = regs->gpr[1]; - - if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) { - if (! on_sig_stack(regs->gpr[1])) - newsp = (current->sas_ss_sp + current->sas_ss_size); - } - - return (void __user *)((newsp - frame_size) & -16ul); -} - -/* * Setup the trampoline code on the stack */ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) @@ -253,19 +226,6 @@ } /* - * Restore the user process's signal mask (also used by signal32.c) - */ -void restore_sigmask(sigset_t *set) -{ - sigdelsetmask(set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = *set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -} - - -/* * Handle {get,set,swap}_context operations */ int sys_swapcontext(struct ucontext __user *old_ctx, @@ -359,7 +319,7 @@ return 0; } -static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, +int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { /* Handler is *really* a pointer to the function descriptor for @@ -373,8 +333,7 @@ long err = 0; frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + if (unlikely(frame == NULL)) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); @@ -411,7 +370,7 @@ funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ - newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE; + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); /* Set up "regs" so we "return" to the signal handler. */ @@ -442,134 +401,3 @@ force_sigsegv(signr, current); return 0; } - - -/* - * OK, we're invoking a handler - */ -static int handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) -{ - int ret; - - /* Set up Signal Frame */ - ret = setup_rt_frame(sig, ka, info, oldset, regs); - - if (ret) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; -} - -static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) -{ - switch ((int)regs->result) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - /* ERESTARTNOHAND means that the syscall should only be - * restarted if there was no handler for the signal, and since - * we only get here if there is a handler, we dont restart. - */ - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - break; - case -ERESTARTSYS: - /* ERESTARTSYS means to restart the syscall if there is no - * handler or the handler was registered with SA_RESTART - */ - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - /* ERESTARTNOINTR means that the syscall should be - * called again after the signal handler returns. - */ - regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; - regs->result = 0; - break; - } -} - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int do_signal(sigset_t *oldset, struct pt_regs *regs) -{ - siginfo_t info; - int signr; - struct k_sigaction ka; - - /* - * If the current thread is 32 bit - invoke the - * 32 bit signal handling code - */ - if (test_thread_flag(TIF_32BIT)) - return do_signal32(oldset, regs); - - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else if (!oldset) - oldset = ¤t->blocked; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (signr > 0) { - int ret; - - /* Whee! Actually deliver the signal. */ - if (TRAP(regs) == 0x0C00) - syscall_restart(regs, &ka); - - /* - * Reenable the DABR before delivering the signal to - * user space. The DABR will have been cleared if it - * triggered inside the kernel. - */ - if (current->thread.dabr) - set_dabr(current->thread.dabr); - - ret = handle_signal(signr, &ka, &info, oldset, regs); - - /* If a signal was successfully delivered, the saved sigmask is in - its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ - if (ret && test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - - return ret; - } - - if (TRAP(regs) == 0x0C00) { /* System Call! */ - if ((int)regs->result == -ERESTARTNOHAND || - (int)regs->result == -ERESTARTSYS || - (int)regs->result == -ERESTARTNOINTR) { - regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; /* Back up & retry system call */ - regs->result = 0; - } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) { - regs->gpr[0] = __NR_restart_syscall; - regs->nip -= 4; - regs->result = 0; - } - } - /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } - - return 0; -} -EXPORT_SYMBOL(do_signal); diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c linux-2.6.22-591/arch/powerpc/kernel/sys_ppc32.c --- linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/sys_ppc32.c 2007-12-21 15:36:11.000000000 -0500 @@ -773,6 +773,13 @@ return sys_truncate(path, (high << 32) | low); } +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, + u32 lenhi, u32 lenlo) +{ + return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + ((loff_t)lenhi << 32) | lenlo); +} + asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, unsigned long low) { diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/vdso.c linux-2.6.22-591/arch/powerpc/kernel/vdso.c --- linux-2.6.22-570/arch/powerpc/kernel/vdso.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/kernel/vdso.c 2007-12-21 15:36:11.000000000 -0500 @@ -671,7 +671,7 @@ /* * Fill up the "systemcfg" stuff for backward compatiblity */ - strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); + strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); vdso_data->version.major = SYSTEMCFG_MAJOR; vdso_data->version.minor = SYSTEMCFG_MINOR; vdso_data->processor = mfspr(SPRN_PVR); diff -Nurb linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c linux-2.6.22-591/arch/powerpc/mm/44x_mmu.c --- linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/44x_mmu.c 2007-12-21 15:36:11.000000000 -0500 @@ -12,7 +12,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds diff -Nurb linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c linux-2.6.22-591/arch/powerpc/mm/4xx_mmu.c --- linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/4xx_mmu.c 2007-12-21 15:36:11.000000000 -0500 @@ -9,7 +9,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds diff -Nurb linux-2.6.22-570/arch/powerpc/mm/Makefile linux-2.6.22-591/arch/powerpc/mm/Makefile --- linux-2.6.22-570/arch/powerpc/mm/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -11,8 +11,7 @@ hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += init_64.o pgtable_64.o mmu_context_64.o \ hash_utils_64.o hash_low_64.o tlb_64.o \ - slb_low.o slb.o stab.o mmap.o imalloc.o \ - $(hash-y) + slb_low.o slb.o stab.o mmap.o $(hash-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o tlb_32.o obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fault.c linux-2.6.22-591/arch/powerpc/mm/fault.c --- linux-2.6.22-570/arch/powerpc/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -381,7 +382,7 @@ printk("VM: killing process %s(%d:#%u)\n", current->comm, current->pid, current->xid); if (user_mode(regs)) - do_exit(SIGKILL); + do_group_exit(SIGKILL); return SIGKILL; do_sigbus: @@ -412,6 +413,13 @@ return; } +#ifdef CONFIG_KGDB + if (atomic_read(&debugger_active) && kgdb_may_fault) + /* Restore our previous state. */ + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + /* Not reached. */ +#endif + /* kernel has accessed a bad area */ switch (regs->trap) { diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c linux-2.6.22-591/arch/powerpc/mm/fsl_booke_mmu.c --- linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/fsl_booke_mmu.c 2007-12-21 15:36:11.000000000 -0500 @@ -14,7 +14,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c linux-2.6.22-591/arch/powerpc/mm/hash_native_64.c --- linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/hash_native_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -104,7 +104,7 @@ spin_unlock(&native_tlbie_lock); } -static inline void native_lock_hpte(hpte_t *hptep) +static inline void native_lock_hpte(struct hash_pte *hptep) { unsigned long *word = &hptep->v; @@ -116,7 +116,7 @@ } } -static inline void native_unlock_hpte(hpte_t *hptep) +static inline void native_unlock_hpte(struct hash_pte *hptep) { unsigned long *word = &hptep->v; @@ -128,7 +128,7 @@ unsigned long pa, unsigned long rflags, unsigned long vflags, int psize) { - hpte_t *hptep = htab_address + hpte_group; + struct hash_pte *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; int i; @@ -177,7 +177,7 @@ static long native_hpte_remove(unsigned long hpte_group) { - hpte_t *hptep; + struct hash_pte *hptep; int i; int slot_offset; unsigned long hpte_v; @@ -217,7 +217,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long va, int psize, int local) { - hpte_t *hptep = htab_address + slot; + struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; int ret = 0; @@ -233,15 +233,14 @@ /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { DBG_LOW(" -> miss\n"); - native_unlock_hpte(hptep); ret = -1; } else { DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); - native_unlock_hpte(hptep); } + native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ tlbie(va, psize, local); @@ -251,7 +250,7 @@ static long native_hpte_find(unsigned long va, int psize) { - hpte_t *hptep; + struct hash_pte *hptep; unsigned long hash; unsigned long i, j; long slot; @@ -294,7 +293,7 @@ { unsigned long vsid, va; long slot; - hpte_t *hptep; + struct hash_pte *hptep; vsid = get_kernel_vsid(ea); va = (vsid << 28) | (ea & 0x0fffffff); @@ -315,7 +314,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, int psize, int local) { - hpte_t *hptep = htab_address + slot; + struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v; unsigned long want_v; unsigned long flags; @@ -345,7 +344,7 @@ #define LP_BITS 8 #define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) -static void hpte_decode(hpte_t *hpte, unsigned long slot, +static void hpte_decode(struct hash_pte *hpte, unsigned long slot, int *psize, unsigned long *va) { unsigned long hpte_r = hpte->r; @@ -415,7 +414,7 @@ static void native_hpte_clear(void) { unsigned long slot, slots, flags; - hpte_t *hptep = htab_address; + struct hash_pte *hptep = htab_address; unsigned long hpte_v, va; unsigned long pteg_count; int psize; @@ -462,7 +461,7 @@ static void native_flush_hash_range(unsigned long number, int local) { unsigned long va, hash, index, hidx, shift, slot; - hpte_t *hptep; + struct hash_pte *hptep; unsigned long hpte_v; unsigned long want_v; unsigned long flags; diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c linux-2.6.22-591/arch/powerpc/mm/hash_utils_64.c --- linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/hash_utils_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -87,7 +87,7 @@ static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; -hpte_t *htab_address; +struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; int mmu_linear_psize = MMU_PAGE_4K; diff -Nurb linux-2.6.22-570/arch/powerpc/mm/imalloc.c linux-2.6.22-591/arch/powerpc/mm/imalloc.c --- linux-2.6.22-570/arch/powerpc/mm/imalloc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/imalloc.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,313 +0,0 @@ -/* - * c 2001 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -#include -#include -#include -#include -#include - -#include "mmu_decl.h" - -static DEFINE_MUTEX(imlist_mutex); -struct vm_struct * imlist = NULL; - -static int get_free_im_addr(unsigned long size, unsigned long *im_addr) -{ - unsigned long addr; - struct vm_struct **p, *tmp; - - addr = ioremap_bot; - for (p = &imlist; (tmp = *p) ; p = &tmp->next) { - if (size + addr < (unsigned long) tmp->addr) - break; - if ((unsigned long)tmp->addr >= ioremap_bot) - addr = tmp->size + (unsigned long) tmp->addr; - if (addr >= IMALLOC_END-size) - return 1; - } - *im_addr = addr; - - return 0; -} - -/* Return whether the region described by v_addr and size is a subset - * of the region described by parent - */ -static inline int im_region_is_subset(unsigned long v_addr, unsigned long size, - struct vm_struct *parent) -{ - return (int) (v_addr >= (unsigned long) parent->addr && - v_addr < (unsigned long) parent->addr + parent->size && - size < parent->size); -} - -/* Return whether the region described by v_addr and size is a superset - * of the region described by child - */ -static int im_region_is_superset(unsigned long v_addr, unsigned long size, - struct vm_struct *child) -{ - struct vm_struct parent; - - parent.addr = (void *) v_addr; - parent.size = size; - - return im_region_is_subset((unsigned long) child->addr, child->size, - &parent); -} - -/* Return whether the region described by v_addr and size overlaps - * the region described by vm. Overlapping regions meet the - * following conditions: - * 1) The regions share some part of the address space - * 2) The regions aren't identical - * 3) Neither region is a subset of the other - */ -static int im_region_overlaps(unsigned long v_addr, unsigned long size, - struct vm_struct *vm) -{ - if (im_region_is_superset(v_addr, size, vm)) - return 0; - - return (v_addr + size > (unsigned long) vm->addr + vm->size && - v_addr < (unsigned long) vm->addr + vm->size) || - (v_addr < (unsigned long) vm->addr && - v_addr + size > (unsigned long) vm->addr); -} - -/* Determine imalloc status of region described by v_addr and size. - * Can return one of the following: - * IM_REGION_UNUSED - Entire region is unallocated in imalloc space. - * IM_REGION_SUBSET - Region is a subset of a region that is already - * allocated in imalloc space. - * vm will be assigned to a ptr to the parent region. - * IM_REGION_EXISTS - Exact region already allocated in imalloc space. - * vm will be assigned to a ptr to the existing imlist - * member. - * IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space. - * IM_REGION_SUPERSET - Region is a superset of a region that is already - * allocated in imalloc space. - */ -static int im_region_status(unsigned long v_addr, unsigned long size, - struct vm_struct **vm) -{ - struct vm_struct *tmp; - - for (tmp = imlist; tmp; tmp = tmp->next) - if (v_addr < (unsigned long) tmp->addr + tmp->size) - break; - - *vm = NULL; - if (tmp) { - if (im_region_overlaps(v_addr, size, tmp)) - return IM_REGION_OVERLAP; - - *vm = tmp; - if (im_region_is_subset(v_addr, size, tmp)) { - /* Return with tmp pointing to superset */ - return IM_REGION_SUBSET; - } - if (im_region_is_superset(v_addr, size, tmp)) { - /* Return with tmp pointing to first subset */ - return IM_REGION_SUPERSET; - } - else if (v_addr == (unsigned long) tmp->addr && - size == tmp->size) { - /* Return with tmp pointing to exact region */ - return IM_REGION_EXISTS; - } - } - - return IM_REGION_UNUSED; -} - -static struct vm_struct * split_im_region(unsigned long v_addr, - unsigned long size, struct vm_struct *parent) -{ - struct vm_struct *vm1 = NULL; - struct vm_struct *vm2 = NULL; - struct vm_struct *new_vm = NULL; - - vm1 = kmalloc(sizeof(*vm1), GFP_KERNEL); - if (vm1 == NULL) { - printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); - return NULL; - } - - if (v_addr == (unsigned long) parent->addr) { - /* Use existing parent vm_struct to represent child, allocate - * new one for the remainder of parent range - */ - vm1->size = parent->size - size; - vm1->addr = (void *) (v_addr + size); - vm1->next = parent->next; - - parent->size = size; - parent->next = vm1; - new_vm = parent; - } else if (v_addr + size == (unsigned long) parent->addr + - parent->size) { - /* Allocate new vm_struct to represent child, use existing - * parent one for remainder of parent range - */ - vm1->size = size; - vm1->addr = (void *) v_addr; - vm1->next = parent->next; - new_vm = vm1; - - parent->size -= size; - parent->next = vm1; - } else { - /* Allocate two new vm_structs for the new child and - * uppermost remainder, and use existing parent one for the - * lower remainder of parent range - */ - vm2 = kmalloc(sizeof(*vm2), GFP_KERNEL); - if (vm2 == NULL) { - printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); - kfree(vm1); - return NULL; - } - - vm1->size = size; - vm1->addr = (void *) v_addr; - vm1->next = vm2; - new_vm = vm1; - - vm2->size = ((unsigned long) parent->addr + parent->size) - - (v_addr + size); - vm2->addr = (void *) v_addr + size; - vm2->next = parent->next; - - parent->size = v_addr - (unsigned long) parent->addr; - parent->next = vm1; - } - - return new_vm; -} - -static struct vm_struct * __add_new_im_area(unsigned long req_addr, - unsigned long size) -{ - struct vm_struct **p, *tmp, *area; - - for (p = &imlist; (tmp = *p) ; p = &tmp->next) { - if (req_addr + size <= (unsigned long)tmp->addr) - break; - } - - area = kmalloc(sizeof(*area), GFP_KERNEL); - if (!area) - return NULL; - area->flags = 0; - area->addr = (void *)req_addr; - area->size = size; - area->next = *p; - *p = area; - - return area; -} - -static struct vm_struct * __im_get_area(unsigned long req_addr, - unsigned long size, - int criteria) -{ - struct vm_struct *tmp; - int status; - - status = im_region_status(req_addr, size, &tmp); - if ((criteria & status) == 0) { - return NULL; - } - - switch (status) { - case IM_REGION_UNUSED: - tmp = __add_new_im_area(req_addr, size); - break; - case IM_REGION_SUBSET: - tmp = split_im_region(req_addr, size, tmp); - break; - case IM_REGION_EXISTS: - /* Return requested region */ - break; - case IM_REGION_SUPERSET: - /* Return first existing subset of requested region */ - break; - default: - printk(KERN_ERR "%s() unexpected imalloc region status\n", - __FUNCTION__); - tmp = NULL; - } - - return tmp; -} - -struct vm_struct * im_get_free_area(unsigned long size) -{ - struct vm_struct *area; - unsigned long addr; - - mutex_lock(&imlist_mutex); - if (get_free_im_addr(size, &addr)) { - printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n", - __FUNCTION__, size); - area = NULL; - goto next_im_done; - } - - area = __im_get_area(addr, size, IM_REGION_UNUSED); - if (area == NULL) { - printk(KERN_ERR - "%s() cannot obtain area for addr 0x%lx size 0x%lx\n", - __FUNCTION__, addr, size); - } -next_im_done: - mutex_unlock(&imlist_mutex); - return area; -} - -struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, - int criteria) -{ - struct vm_struct *area; - - mutex_lock(&imlist_mutex); - area = __im_get_area(v_addr, size, criteria); - mutex_unlock(&imlist_mutex); - return area; -} - -void im_free(void * addr) -{ - struct vm_struct **p, *tmp; - - if (!addr) - return; - if ((unsigned long) addr & ~PAGE_MASK) { - printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); - return; - } - mutex_lock(&imlist_mutex); - for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { - if (tmp->addr == addr) { - *p = tmp->next; - unmap_vm_area(tmp); - kfree(tmp); - mutex_unlock(&imlist_mutex); - return; - } - } - mutex_unlock(&imlist_mutex); - printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, - addr); -} diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_32.c linux-2.6.22-591/arch/powerpc/mm/init_32.c --- linux-2.6.22-570/arch/powerpc/mm/init_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/init_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -5,7 +5,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) * * Derived from "arch/i386/mm/init.c" diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_64.c linux-2.6.22-591/arch/powerpc/mm/init_64.c --- linux-2.6.22-570/arch/powerpc/mm/init_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/init_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -5,7 +5,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mem.c linux-2.6.22-591/arch/powerpc/mm/mem.c --- linux-2.6.22-570/arch/powerpc/mm/mem.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/mem.c 2007-12-21 15:36:11.000000000 -0500 @@ -5,7 +5,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) * * Derived from "arch/i386/mm/init.c" diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c linux-2.6.22-591/arch/powerpc/mm/mmu_context_32.c --- linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/mmu_context_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -11,7 +11,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h linux-2.6.22-591/arch/powerpc/mm/mmu_decl.h --- linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/mmu_decl.h 2007-12-21 15:36:11.000000000 -0500 @@ -8,7 +8,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds @@ -40,8 +39,8 @@ extern unsigned long ioremap_base; extern unsigned int rtas_data, rtas_size; -struct _PTE; -extern struct _PTE *Hash, *Hash_end; +struct hash_pte; +extern struct hash_pte *Hash, *Hash_end; extern unsigned long Hash_size, Hash_mask; extern unsigned int num_tlbcam_entries; @@ -90,16 +89,4 @@ else _tlbie(va); } -#else /* CONFIG_PPC64 */ -/* imalloc region types */ -#define IM_REGION_UNUSED 0x1 -#define IM_REGION_SUBSET 0x2 -#define IM_REGION_EXISTS 0x4 -#define IM_REGION_OVERLAP 0x8 -#define IM_REGION_SUPERSET 0x10 - -extern struct vm_struct * im_get_free_area(unsigned long size); -extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, - int region_type); -extern void im_free(void *addr); #endif diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c linux-2.6.22-591/arch/powerpc/mm/pgtable_32.c --- linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/pgtable_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -8,7 +8,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds @@ -37,7 +36,6 @@ unsigned long ioremap_base; unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ -int io_bat_index; #if defined(CONFIG_6xx) || defined(CONFIG_POWER3) #define HAVE_BATS 1 @@ -300,51 +298,6 @@ } } -/* is x a power of 4? */ -#define is_power_of_4(x) is_power_of_2(x) && (ffs(x) & 1) - -/* - * Set up a mapping for a block of I/O. - * virt, phys, size must all be page-aligned. - * This should only be called before ioremap is called. - */ -void __init io_block_mapping(unsigned long virt, phys_addr_t phys, - unsigned int size, int flags) -{ - int i; - - if (virt > KERNELBASE && virt < ioremap_bot) - ioremap_bot = ioremap_base = virt; - -#ifdef HAVE_BATS - /* - * Use a BAT for this if possible... - */ - if (io_bat_index < 2 && is_power_of_2(size) - && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { - setbat(io_bat_index, virt, phys, size, flags); - ++io_bat_index; - return; - } -#endif /* HAVE_BATS */ - -#ifdef HAVE_TLBCAM - /* - * Use a CAM for this if possible... - */ - if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) - && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { - settlbcam(tlbcam_index, virt, phys, size, flags, 0); - ++tlbcam_index; - return; - } -#endif /* HAVE_TLBCAM */ - - /* No BATs available, put it in the page tables. */ - for (i = 0; i < size; i += PAGE_SIZE) - map_page(virt + i, phys + i, flags); -} - /* Scan the real Linux page tables and return a PTE pointer for * a virtual address in a context. * Returns true (1) if PTE was found, zero otherwise. The pointer to @@ -379,82 +332,6 @@ return(retval); } -/* Find physical address for this virtual address. Normally used by - * I/O functions, but anyone can call it. - */ -unsigned long iopa(unsigned long addr) -{ - unsigned long pa; - - /* I don't know why this won't work on PMacs or CHRP. It - * appears there is some bug, or there is some implicit - * mapping done not properly represented by BATs or in page - * tables.......I am actively working on resolving this, but - * can't hold up other stuff. -- Dan - */ - pte_t *pte; - struct mm_struct *mm; - - /* Check the BATs */ - pa = v_mapped_by_bats(addr); - if (pa) - return pa; - - /* Allow mapping of user addresses (within the thread) - * for DMA if necessary. - */ - if (addr < TASK_SIZE) - mm = current->mm; - else - mm = &init_mm; - - pa = 0; - if (get_pteptr(mm, addr, &pte, NULL)) { - pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); - pte_unmap(pte); - } - - return(pa); -} - -/* This is will find the virtual address for a physical one.... - * Swiped from APUS, could be dangerous :-). - * This is only a placeholder until I really find a way to make this - * work. -- Dan - */ -unsigned long -mm_ptov (unsigned long paddr) -{ - unsigned long ret; -#if 0 - if (paddr < 16*1024*1024) - ret = ZTWO_VADDR(paddr); - else { - int i; - - for (i = 0; i < kmap_chunk_count;){ - unsigned long phys = kmap_chunks[i++]; - unsigned long size = kmap_chunks[i++]; - unsigned long virt = kmap_chunks[i++]; - if (paddr >= phys - && paddr < (phys + size)){ - ret = virt + paddr - phys; - goto exit; - } - } - - ret = (unsigned long) __va(paddr); - } -exit: -#ifdef DEBUGPV - printk ("PTOV(%lx)=%lx\n", paddr, ret); -#endif -#else - ret = (unsigned long)paddr + KERNELBASE; -#endif - return ret; -} - #ifdef CONFIG_DEBUG_PAGEALLOC static int __change_page_attr(struct page *page, pgprot_t prot) diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c linux-2.6.22-591/arch/powerpc/mm/pgtable_64.c --- linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/pgtable_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -7,7 +7,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds @@ -34,41 +33,27 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include #include #include #include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include -#include #include #include "mmu_decl.h" -unsigned long ioremap_bot = IMALLOC_BASE; -static unsigned long phbs_io_bot = PHBS_IO_BASE; +unsigned long ioremap_bot = IOREMAP_BASE; /* * map_io_page currently only called by __ioremap @@ -102,8 +87,8 @@ * entry in the hardware page table. * */ - if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, - mmu_io_psize)) { + if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE, + pa, flags, mmu_io_psize)) { printk(KERN_ERR "Failed to do bolted mapping IO " "memory at %016lx !\n", pa); return -ENOMEM; @@ -113,8 +98,11 @@ } -static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa, - unsigned long ea, unsigned long size, +/** + * __ioremap_at - Low level function to establish the page tables + * for an IO mapping + */ +void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, unsigned long flags) { unsigned long i; @@ -122,17 +110,35 @@ if ((flags & _PAGE_PRESENT) == 0) flags |= pgprot_val(PAGE_KERNEL); + WARN_ON(pa & ~PAGE_MASK); + WARN_ON(((unsigned long)ea) & ~PAGE_MASK); + WARN_ON(size & ~PAGE_MASK); + for (i = 0; i < size; i += PAGE_SIZE) - if (map_io_page(ea+i, pa+i, flags)) + if (map_io_page((unsigned long)ea+i, pa+i, flags)) return NULL; - return (void __iomem *) (ea + (addr & ~PAGE_MASK)); + return (void __iomem *)ea; +} + +/** + * __iounmap_from - Low level function to tear down the page tables + * for an IO mapping. This is used for mappings that + * are manipulated manually, like partial unmapping of + * PCI IOs or ISA space. + */ +void __iounmap_at(void *ea, unsigned long size) +{ + WARN_ON(((unsigned long)ea) & ~PAGE_MASK); + WARN_ON(size & ~PAGE_MASK); + + unmap_kernel_range((unsigned long)ea, size); } void __iomem * __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) { - unsigned long pa, ea; + phys_addr_t paligned; void __iomem *ret; /* @@ -144,27 +150,30 @@ * IMALLOC_END * */ - pa = addr & PAGE_MASK; - size = PAGE_ALIGN(addr + size) - pa; + paligned = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - paligned; - if ((size == 0) || (pa == 0)) + if ((size == 0) || (paligned == 0)) return NULL; if (mem_init_done) { struct vm_struct *area; - area = im_get_free_area(size); + + area = __get_vm_area(size, VM_IOREMAP, + ioremap_bot, IOREMAP_END); if (area == NULL) return NULL; - ea = (unsigned long)(area->addr); - ret = __ioremap_com(addr, pa, ea, size, flags); + ret = __ioremap_at(paligned, area->addr, size, flags); if (!ret) - im_free(area->addr); + vunmap(area->addr); } else { - ea = ioremap_bot; - ret = __ioremap_com(addr, pa, ea, size, flags); + ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags); if (ret) ioremap_bot += size; } + + if (ret) + ret += addr & ~PAGE_MASK; return ret; } @@ -187,62 +196,9 @@ } -#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) - -int __ioremap_explicit(phys_addr_t pa, unsigned long ea, - unsigned long size, unsigned long flags) -{ - struct vm_struct *area; - void __iomem *ret; - - /* For now, require page-aligned values for pa, ea, and size */ - if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || - !IS_PAGE_ALIGNED(size)) { - printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); - return 1; - } - - if (!mem_init_done) { - /* Two things to consider in this case: - * 1) No records will be kept (imalloc, etc) that the region - * has been remapped - * 2) It won't be easy to iounmap() the region later (because - * of 1) - */ - ; - } else { - area = im_get_area(ea, size, - IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); - if (area == NULL) { - /* Expected when PHB-dlpar is in play */ - return 1; - } - if (ea != (unsigned long) area->addr) { - printk(KERN_ERR "unexpected addr return from " - "im_get_area\n"); - return 1; - } - } - - ret = __ioremap_com(pa, pa, ea, size, flags); - if (ret == NULL) { - printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); - return 1; - } - if (ret != (void *) ea) { - printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); - return 1; - } - - return 0; -} - /* * Unmap an IO region and remove it from imalloc'd list. * Access to IO memory should be serialized by driver. - * This code is modeled after vmalloc code - unmap_vm_area() - * - * XXX what about calls before mem_init_done (ie python_countermeasures()) */ void __iounmap(volatile void __iomem *token) { @@ -251,9 +207,14 @@ if (!mem_init_done) return; - addr = (void *) ((unsigned long __force) token & PAGE_MASK); - - im_free(addr); + addr = (void *) ((unsigned long __force) + PCI_FIX_ADDR(token) & PAGE_MASK); + if ((unsigned long)addr < ioremap_bot) { + printk(KERN_WARNING "Attempt to iounmap early bolted mapping" + " at 0x%p\n", addr); + return; + } + vunmap(addr); } void iounmap(volatile void __iomem *token) @@ -264,77 +225,8 @@ __iounmap(token); } -static int iounmap_subset_regions(unsigned long addr, unsigned long size) -{ - struct vm_struct *area; - - /* Check whether subsets of this region exist */ - area = im_get_area(addr, size, IM_REGION_SUPERSET); - if (area == NULL) - return 1; - - while (area) { - iounmap((void __iomem *) area->addr); - area = im_get_area(addr, size, - IM_REGION_SUPERSET); - } - - return 0; -} - -int __iounmap_explicit(volatile void __iomem *start, unsigned long size) -{ - struct vm_struct *area; - unsigned long addr; - int rc; - - addr = (unsigned long __force) start & PAGE_MASK; - - /* Verify that the region either exists or is a subset of an existing - * region. In the latter case, split the parent region to create - * the exact region - */ - area = im_get_area(addr, size, - IM_REGION_EXISTS | IM_REGION_SUBSET); - if (area == NULL) { - /* Determine whether subset regions exist. If so, unmap */ - rc = iounmap_subset_regions(addr, size); - if (rc) { - printk(KERN_ERR - "%s() cannot unmap nonexistent range 0x%lx\n", - __FUNCTION__, addr); - return 1; - } - } else { - iounmap((void __iomem *) area->addr); - } - /* - * FIXME! This can't be right: - iounmap(area->addr); - * Maybe it should be "iounmap(area);" - */ - return 0; -} - EXPORT_SYMBOL(ioremap); EXPORT_SYMBOL(ioremap_flags); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); - -static DEFINE_SPINLOCK(phb_io_lock); - -void __iomem * reserve_phb_iospace(unsigned long size) -{ - void __iomem *virt_addr; - - if (phbs_io_bot >= IMALLOC_BASE) - panic("reserve_phb_iospace(): phb io space overflow\n"); - - spin_lock(&phb_io_lock); - virt_addr = (void __iomem *) phbs_io_bot; - phbs_io_bot += size; - spin_unlock(&phb_io_lock); - - return virt_addr; -} diff -Nurb linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c linux-2.6.22-591/arch/powerpc/mm/ppc_mmu_32.c --- linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/ppc_mmu_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -11,7 +11,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds @@ -35,12 +34,12 @@ #include "mmu_decl.h" -PTE *Hash, *Hash_end; +struct hash_pte *Hash, *Hash_end; unsigned long Hash_size, Hash_mask; unsigned long _SDR1; union ubat { /* BAT register values to be loaded */ - BAT bat; + struct ppc_bat bat; u32 word[2]; } BATS[8][2]; /* 8 pairs of IBAT, DBAT */ @@ -245,7 +244,7 @@ cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; - Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); + Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", total_memory >> 20, Hash_size >> 10, Hash); diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_32.c linux-2.6.22-591/arch/powerpc/mm/tlb_32.c --- linux-2.6.22-570/arch/powerpc/mm/tlb_32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/tlb_32.c 2007-12-21 15:36:11.000000000 -0500 @@ -11,7 +11,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_64.c linux-2.6.22-591/arch/powerpc/mm/tlb_64.c --- linux-2.6.22-570/arch/powerpc/mm/tlb_64.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/mm/tlb_64.c 2007-12-21 15:36:11.000000000 -0500 @@ -8,7 +8,6 @@ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) * and Cort Dougan (PReP) (cort@cs.nmt.edu) * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds @@ -239,3 +238,59 @@ pte_free_submit(*batchp); *batchp = NULL; } + +/** + * __flush_hash_table_range - Flush all HPTEs for a given address range + * from the hash table (and the TLB). But keeps + * the linux PTEs intact. + * + * @mm : mm_struct of the target address space (generally init_mm) + * @start : starting address + * @end : ending address (not included in the flush) + * + * This function is mostly to be used by some IO hotplug code in order + * to remove all hash entries from a given address range used to map IO + * space on a removed PCI-PCI bidge without tearing down the full mapping + * since 64K pages may overlap with other bridges when using 64K pages + * with 4K HW pages on IO space. + * + * Because of that usage pattern, it's only available with CONFIG_HOTPLUG + * and is implemented for small size rather than speed. + */ +#ifdef CONFIG_HOTPLUG + +void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + unsigned long flags; + + start = _ALIGN_DOWN(start, PAGE_SIZE); + end = _ALIGN_UP(end, PAGE_SIZE); + + BUG_ON(!mm->pgd); + + /* Note: Normally, we should only ever use a batch within a + * PTE locked section. This violates the rule, but will work + * since we don't actually modify the PTEs, we just flush the + * hash while leaving the PTEs intact (including their reference + * to being hashed). This is not the most performance oriented + * way to do things but is fine for our needs here. + */ + local_irq_save(flags); + arch_enter_lazy_mmu_mode(); + for (; start < end; start += PAGE_SIZE) { + pte_t *ptep = find_linux_pte(mm->pgd, start); + unsigned long pte; + + if (ptep == NULL) + continue; + pte = pte_val(*ptep); + if (!(pte & _PAGE_HASHPTE)) + continue; + hpte_need_flush(mm, start, ptep, pte, 0); + } + arch_leave_lazy_mmu_mode(); + local_irq_restore(flags); +} + +#endif /* CONFIG_HOTPLUG */ diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig linux-2.6.22-591/arch/powerpc/platforms/Kconfig --- linux-2.6.22-570/arch/powerpc/platforms/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -16,13 +16,6 @@ bool "Embedded 6xx/7xx/7xxx-based board" depends on PPC32 && (BROKEN||BROKEN_ON_SMP) -config APUS - bool "Amiga-APUS" - depends on PPC32 && BROKEN - help - Select APUS if configuring for a PowerUP Amiga. - More information is available at: - . endchoice source "arch/powerpc/platforms/pseries/Kconfig" diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype linux-2.6.22-591/arch/powerpc/platforms/Kconfig.cputype --- linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/platforms/Kconfig.cputype 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,252 @@ +config PPC64 + bool "64-bit kernel" + default n + help + This option selects whether a 32-bit or a 64-bit kernel + will be built. + +menu "Processor support" +choice + prompt "Processor Type" + depends on PPC32 + default 6xx + +config CLASSIC32 + bool "52xx/6xx/7xx/74xx" + select PPC_FPU + select 6xx + help + There are four families of PowerPC chips supported. The more common + types (601, 603, 604, 740, 750, 7400), the Motorola embedded + versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC + embedded versions (403 and 405) and the high end 64 bit Power + processors (POWER 3, POWER4, and IBM PPC970 also known as G5). + + This option is the catch-all for 6xx types, including some of the + embedded versions. Unless there is see an option for the specific + chip family you are using, you want this option. + + You do not want this if you are building a kernel for a 64 bit + IBM RS/6000 or an Apple G5, choose 6xx. + + If unsure, select this option + + Note that the kernel runs in 32-bit mode even on 64-bit chips. + +config PPC_82xx + bool "Freescale 82xx" + select 6xx + select PPC_FPU + +config PPC_83xx + bool "Freescale 83xx" + select 6xx + select FSL_SOC + select 83xx + select PPC_FPU + select WANT_DEVICE_TREE + +config PPC_85xx + bool "Freescale 85xx" + select E500 + select FSL_SOC + select 85xx + select WANT_DEVICE_TREE + +config PPC_86xx + bool "Freescale 86xx" + select 6xx + select FSL_SOC + select FSL_PCIE + select PPC_FPU + select ALTIVEC + help + The Freescale E600 SoCs have 74xx cores. + +config PPC_8xx + bool "Freescale 8xx" + select FSL_SOC + select 8xx + +config 40x + bool "AMCC 40x" + select PPC_DCR_NATIVE + +config 44x + bool "AMCC 44x" + select PPC_DCR_NATIVE + select WANT_DEVICE_TREE + +config E200 + bool "Freescale e200" + +endchoice + +config POWER4_ONLY + bool "Optimize for POWER4" + depends on PPC64 + default n + ---help--- + Cause the compiler to optimize for POWER4/POWER5/PPC970 processors. + The resulting binary will not work on POWER3 or RS64 processors + when compiled with binutils 2.15 or later. + +config POWER3 + bool + depends on PPC64 + default y if !POWER4_ONLY + +config POWER4 + depends on PPC64 + def_bool y + +config 6xx + bool + +# this is temp to handle compat with arch=ppc +config 8xx + bool + +# this is temp to handle compat with arch=ppc +config 83xx + bool + +# this is temp to handle compat with arch=ppc +config 85xx + bool + +config E500 + bool + +config PPC_FPU + bool + default y if PPC64 + +config 4xx + bool + depends on 40x || 44x + default y + +config BOOKE + bool + depends on E200 || E500 || 44x + default y + +config FSL_BOOKE + bool + depends on E200 || E500 + default y + +config PTE_64BIT + bool + depends on 44x || E500 + default y if 44x + default y if E500 && PHYS_64BIT + +config PHYS_64BIT + bool 'Large physical address support' if E500 + depends on 44x || E500 + select RESOURCES_64BIT + default y if 44x + ---help--- + This option enables kernel support for larger than 32-bit physical + addresses. This features is not be available on all e500 cores. + + If in doubt, say N here. + +config ALTIVEC + bool "AltiVec Support" + depends on CLASSIC32 || POWER4 + ---help--- + This option enables kernel support for the Altivec extensions to the + PowerPC processor. The kernel currently supports saving and restoring + altivec registers, and turning on the 'altivec enable' bit so user + processes can execute altivec instructions. + + This option is only usefully if you have a processor that supports + altivec (G4, otherwise known as 74xx series), but does not have + any affect on a non-altivec cpu (it does, however add code to the + kernel). + + If in doubt, say Y here. + +config SPE + bool "SPE Support" + depends on E200 || E500 + default y + ---help--- + This option enables kernel support for the Signal Processing + Extensions (SPE) to the PowerPC processor. The kernel currently + supports saving and restoring SPE registers, and turning on the + 'spe enable' bit so user processes can execute SPE instructions. + + This option is only useful if you have a processor that supports + SPE (e500, otherwise known as 85xx series), but does not have any + effect on a non-spe cpu (it does, however add code to the kernel). + + If in doubt, say Y here. + +config PPC_STD_MMU + bool + depends on 6xx || POWER3 || POWER4 || PPC64 + default y + +config PPC_STD_MMU_32 + def_bool y + depends on PPC_STD_MMU && PPC32 + +config PPC_MM_SLICES + bool + default y if HUGETLB_PAGE + default n + +config VIRT_CPU_ACCOUNTING + bool "Deterministic task and CPU time accounting" + depends on PPC64 + default y + help + Select this option to enable more accurate task and CPU time + accounting. This is done by reading a CPU counter on each + kernel entry and exit and on transitions within the kernel + between system, softirq and hardirq state, so there is a + small performance impact. This also enables accounting of + stolen time on logically-partitioned systems running on + IBM POWER5-based machines. + + If in doubt, say Y here. + +config SMP + depends on PPC_STD_MMU + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. Note that the kernel does not currently + support SMP machines with 603/603e/603ev or PPC750 ("G3") processors + since they have inadequate hardware support for multiprocessor + operation. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on single-processor machines. + On a single-processor machine, the kernel will run faster if you say + N here. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-128)" + range 2 128 + depends on SMP + default "32" if PPC64 + default "4" + +config NOT_COHERENT_CACHE + bool + depends on 4xx || 8xx || E200 + default y + +config CONFIG_CHECK_CACHE_COHERENCY + bool + +endmenu diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig linux-2.6.22-591/arch/powerpc/platforms/apus/Kconfig --- linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/apus/Kconfig 1969-12-31 19:00:00.000000000 -0500 @@ -1,130 +0,0 @@ - -config AMIGA - bool - depends on APUS - default y - help - This option enables support for the Amiga series of computers. - -config ZORRO - bool - depends on APUS - default y - help - This enables support for the Zorro bus in the Amiga. If you have - expansion cards in your Amiga that conform to the Amiga - AutoConfig(tm) specification, say Y, otherwise N. Note that even - expansion cards that do not fit in the Zorro slots but fit in e.g. - the CPU slot may fall in this category, so you have to say Y to let - Linux use these. - -config ABSTRACT_CONSOLE - bool - depends on APUS - default y - -config APUS_FAST_EXCEPT - bool - depends on APUS - default y - -config AMIGA_PCMCIA - bool "Amiga 1200/600 PCMCIA support" - depends on APUS && EXPERIMENTAL - help - Include support in the kernel for pcmcia on Amiga 1200 and Amiga - 600. If you intend to use pcmcia cards say Y; otherwise say N. - -config AMIGA_BUILTIN_SERIAL - tristate "Amiga builtin serial support" - depends on APUS - help - If you want to use your Amiga's built-in serial port in Linux, - answer Y. - - To compile this driver as a module, choose M here. - -config GVPIOEXT - tristate "GVP IO-Extender support" - depends on APUS - help - If you want to use a GVP IO-Extender serial card in Linux, say Y. - Otherwise, say N. - -config GVPIOEXT_LP - tristate "GVP IO-Extender parallel printer support" - depends on GVPIOEXT - help - Say Y to enable driving a printer from the parallel port on your - GVP IO-Extender card, N otherwise. - -config GVPIOEXT_PLIP - tristate "GVP IO-Extender PLIP support" - depends on GVPIOEXT - help - Say Y to enable doing IP over the parallel port on your GVP - IO-Extender card, N otherwise. - -config MULTIFACE_III_TTY - tristate "Multiface Card III serial support" - depends on APUS - help - If you want to use a Multiface III card's serial port in Linux, - answer Y. - - To compile this driver as a module, choose M here. - -config A2232 - tristate "Commodore A2232 serial support (EXPERIMENTAL)" - depends on EXPERIMENTAL && APUS - ---help--- - This option supports the 2232 7-port serial card shipped with the - Amiga 2000 and other Zorro-bus machines, dating from 1989. At - a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip - each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The - ports were connected with 8 pin DIN connectors on the card bracket, - for which 8 pin to DB25 adapters were supplied. The card also had - jumpers internally to toggle various pinning configurations. - - This driver can be built as a module; but then "generic_serial" - will also be built as a module. This has to be loaded before - "ser_a2232". If you want to do this, answer M here. - -config WHIPPET_SERIAL - tristate "Hisoft Whippet PCMCIA serial support" - depends on AMIGA_PCMCIA - help - HiSoft has a web page at , but there - is no listing for the Whippet in their Amiga section. - -config APNE - tristate "PCMCIA NE2000 support" - depends on AMIGA_PCMCIA - help - If you have a PCMCIA NE2000 compatible adapter, say Y. Otherwise, - say N. - - To compile this driver as a module, choose M here: the - module will be called apne. - -config SERIAL_CONSOLE - bool "Support for serial port console" - depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y) - -config HEARTBEAT - bool "Use power LED as a heartbeat" - depends on APUS - help - Use the power-on LED on your machine as a load meter. The exact - behavior is platform-dependent, but normally the flash frequency is - a hyperbolic function of the 5-minute load average. - -config PROC_HARDWARE - bool "/proc/hardware support" - depends on APUS - -source "drivers/zorro/Kconfig" - -config PCI_PERMEDIA - bool "PCI for Permedia2" - depends on !4xx && !8xx && APUS diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c linux-2.6.22-591/arch/powerpc/platforms/cell/io-workarounds.c --- linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/cell/io-workarounds.c 2007-12-21 15:36:11.000000000 -0500 @@ -102,7 +102,7 @@ vaddr = (unsigned long)PCI_FIX_ADDR(addr); /* Check if it's in allowed range for PIO */ - if (vaddr < PHBS_IO_BASE || vaddr >= IMALLOC_BASE) + if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END) return; /* Try to find a PTE. If not, clear the paddr, we'll do diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/file.c --- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/file.c 2007-12-21 15:36:11.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) + static int spufs_mem_open(struct inode *inode, struct file *file) { @@ -1797,6 +1799,29 @@ return 0; } +static int spufs_caps_show(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + if (!(ctx->flags & SPU_CREATE_NOSCHED)) + seq_puts(s, "sched\n"); + if (!(ctx->flags & SPU_CREATE_ISOLATE)) + seq_puts(s, "step\n"); + return 0; +} + +static int spufs_caps_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_caps_fops = { + .open = spufs_caps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { @@ -2015,6 +2040,7 @@ }; struct tree_descr spufs_dir_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, { "mem", &spufs_mem_fops, 0666, }, { "regs", &spufs_regs_fops, 0666, }, { "mbox", &spufs_mbox_fops, 0444, }, @@ -2050,6 +2076,7 @@ }; struct tree_descr spufs_dir_nosched_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, { "mem", &spufs_mem_fops, 0666, }, { "mbox", &spufs_mbox_fops, 0444, }, { "ibox", &spufs_ibox_fops, 0444, }, diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/run.c --- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/run.c 2007-12-21 15:36:11.000000000 -0500 @@ -142,8 +142,12 @@ runcntl = SPU_RUNCNTL_RUNNABLE; ctx->ops->runcntl_write(ctx, runcntl); } else { + unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL; spu_start_tick(ctx); ctx->ops->npc_write(ctx, *npc); + if (test_thread_flag(TIF_SINGLESTEP)) + mode = SPU_PRIVCNTL_MODE_SINGLE_STEP; + out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode); ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); } @@ -334,7 +338,8 @@ ret = spu_process_events(ctx); } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | - SPU_STATUS_STOPPED_BY_HALT))); + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_SINGLE_STEP))); ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); @@ -344,10 +349,15 @@ if ((ret == 0) || ((ret == -ERESTARTSYS) && ((status & SPU_STATUS_STOPPED_BY_HALT) || + (status & SPU_STATUS_SINGLE_STEP) || ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT != 0x2104))))) ret = status; + /* Note: we don't need to force_sig SIGTRAP on single-step + * since we have TIF_SINGLESTEP set, thus the kernel will do + * it upon return from the syscall anyawy + */ if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) { force_sig(SIGTRAP, current); diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h linux-2.6.22-591/arch/powerpc/platforms/iseries/call_hpt.h --- linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/iseries/call_hpt.h 2007-12-21 15:36:11.000000000 -0500 @@ -76,24 +76,25 @@ return compressedStatus; } -static inline u64 HvCallHpt_findValid(hpte_t *hpte, u64 vpn) +static inline u64 HvCallHpt_findValid(struct hash_pte *hpte, u64 vpn) { return HvCall3Ret16(HvCallHptFindValid, hpte, vpn, 0, 0); } -static inline u64 HvCallHpt_findNextValid(hpte_t *hpte, u32 hpteIndex, +static inline u64 HvCallHpt_findNextValid(struct hash_pte *hpte, u32 hpteIndex, u8 bitson, u8 bitsoff) { return HvCall3Ret16(HvCallHptFindNextValid, hpte, hpteIndex, bitson, bitsoff); } -static inline void HvCallHpt_get(hpte_t *hpte, u32 hpteIndex) +static inline void HvCallHpt_get(struct hash_pte *hpte, u32 hpteIndex) { HvCall2Ret16(HvCallHptGet, hpte, hpteIndex, 0); } -static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, hpte_t *hpte) +static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, + struct hash_pte *hpte) { HvCall4(HvCallHptAddValidate, hpteIndex, hBit, hpte->v, hpte->r); } diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c linux-2.6.22-591/arch/powerpc/platforms/iseries/htab.c --- linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/iseries/htab.c 2007-12-21 15:36:11.000000000 -0500 @@ -44,7 +44,7 @@ unsigned long vflags, int psize) { long slot; - hpte_t lhpte; + struct hash_pte lhpte; int secondary = 0; BUG_ON(psize != MMU_PAGE_4K); @@ -99,7 +99,7 @@ static unsigned long iSeries_hpte_getword0(unsigned long slot) { - hpte_t hpte; + struct hash_pte hpte; HvCallHpt_get(&hpte, slot); return hpte.v; @@ -144,7 +144,7 @@ static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long va, int psize, int local) { - hpte_t hpte; + struct hash_pte hpte; unsigned long want_v; iSeries_hlock(slot); @@ -176,7 +176,7 @@ */ static long iSeries_hpte_find(unsigned long vpn) { - hpte_t hpte; + struct hash_pte hpte; long slot; /* diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c linux-2.6.22-591/arch/powerpc/platforms/iseries/pci.c --- linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/iseries/pci.c 2007-12-21 15:36:11.000000000 -0500 @@ -742,6 +742,11 @@ /* Install IO hooks */ ppc_pci_io = iseries_pci_io; + /* iSeries has no IO space in the common sense, it needs to set + * the IO base to 0 + */ + pci_io_base = 0; + if (root == NULL) { printk(KERN_CRIT "iSeries_pcibios_init: can't find root " "of device tree\n"); diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c linux-2.6.22-591/arch/powerpc/platforms/maple/pci.c --- linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/maple/pci.c 2007-12-21 15:36:11.000000000 -0500 @@ -519,23 +519,6 @@ DBG(" <- maple_pci_irq_fixup\n"); } -static void __init maple_fixup_phb_resources(void) -{ - struct pci_controller *hose, *tmp; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; - - hose->io_resource.start += offset; - hose->io_resource.end += offset; - - printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n", - hose->global_number, - (unsigned long long)hose->io_resource.start, - (unsigned long long)hose->io_resource.end); - } -} - void __init maple_pci_init(void) { struct device_node *np, *root; @@ -573,24 +556,6 @@ if (ht && add_bridge(ht) != 0) of_node_put(ht); - /* - * We need to call pci_setup_phb_io for the HT bridge first - * so it gets the I/O port numbers starting at 0, and we - * need to call it for the AGP bridge after that so it gets - * small positive I/O port numbers. - */ - if (u3_ht) - pci_setup_phb_io(u3_ht, 1); - if (u3_agp) - pci_setup_phb_io(u3_agp, 0); - if (u4_pcie) - pci_setup_phb_io(u4_pcie, 0); - - /* Fixup the IO resources on our host bridges as the common code - * does it only for childs of the host bridges - */ - maple_fixup_phb_resources(); - /* Setup the linkage between OF nodes and PHBs */ pci_devs_phb_init(); diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c linux-2.6.22-591/arch/powerpc/platforms/pasemi/pci.c --- linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pasemi/pci.c 2007-12-21 15:36:11.000000000 -0500 @@ -150,29 +150,11 @@ printk(KERN_INFO "Found PA-PXP PCI host bridge.\n"); /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ pci_process_bridge_OF_ranges(hose, dev, 1); - pci_setup_phb_io(hose, 1); return 0; } - -static void __init pas_fixup_phb_resources(void) -{ - struct pci_controller *hose, *tmp; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; - hose->io_resource.start += offset; - hose->io_resource.end += offset; - printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", - hose->global_number, - hose->io_resource.start, hose->io_resource.end); - } -} - - void __init pas_pci_init(void) { struct device_node *np, *root; @@ -190,8 +172,6 @@ of_node_put(root); - pas_fixup_phb_resources(); - /* Setup the linkage between OF nodes and PHBs */ pci_devs_phb_init(); diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c linux-2.6.22-591/arch/powerpc/platforms/powermac/pci.c --- linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/powermac/pci.c 2007-12-21 15:36:11.000000000 -0500 @@ -1006,19 +1006,6 @@ #endif /* CONFIG_PPC32 */ } -#ifdef CONFIG_PPC64 -static void __init pmac_fixup_phb_resources(void) -{ - struct pci_controller *hose, *tmp; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", - hose->global_number, - hose->io_resource.start, hose->io_resource.end); - } -} -#endif - void __init pmac_pci_init(void) { struct device_node *np, *root; @@ -1053,25 +1040,6 @@ if (ht && add_bridge(ht) != 0) of_node_put(ht); - /* - * We need to call pci_setup_phb_io for the HT bridge first - * so it gets the I/O port numbers starting at 0, and we - * need to call it for the AGP bridge after that so it gets - * small positive I/O port numbers. - */ - if (u3_ht) - pci_setup_phb_io(u3_ht, 1); - if (u3_agp) - pci_setup_phb_io(u3_agp, 0); - if (u4_pcie) - pci_setup_phb_io(u4_pcie, 0); - - /* - * On ppc64, fixup the IO resources on our host bridges as - * the common code does it only for children of the host bridges - */ - pmac_fixup_phb_resources(); - /* Setup the linkage between OF nodes and PHBs */ pci_devs_phb_init(); diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c linux-2.6.22-591/arch/powerpc/platforms/powermac/setup.c --- linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/powermac/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -98,8 +98,6 @@ int sccdbg; #endif -extern void zs_kgdb_hook(int tty_num); - sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; EXPORT_SYMBOL(sys_ctrler); @@ -330,10 +328,6 @@ l2cr_init(); #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_KGDB - zs_kgdb_hook(0); -#endif - find_via_cuda(); find_via_pmu(); smu_init(); diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c linux-2.6.22-591/arch/powerpc/platforms/ps3/htab.c --- linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/ps3/htab.c 2007-12-21 15:36:11.000000000 -0500 @@ -34,7 +34,7 @@ #define DBG(fmt...) do{if(0)printk(fmt);}while(0) #endif -static hpte_t *htab; +static struct hash_pte *htab; static unsigned long htab_addr; static unsigned char *bolttab; static unsigned char *inusetab; @@ -44,8 +44,8 @@ #define debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g) \ _debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__) static void _debug_dump_hpte(unsigned long pa, unsigned long va, - unsigned long group, unsigned long bitmap, hpte_t lhpte, int psize, - unsigned long slot, const char* func, int line) + unsigned long group, unsigned long bitmap, struct hash_pte lhpte, + int psize, unsigned long slot, const char* func, int line) { DBG("%s:%d: pa = %lxh\n", func, line, pa); DBG("%s:%d: lpar = %lxh\n", func, line, @@ -63,7 +63,7 @@ unsigned long pa, unsigned long rflags, unsigned long vflags, int psize) { unsigned long slot; - hpte_t lhpte; + struct hash_pte lhpte; int secondary = 0; unsigned long result; unsigned long bitmap; @@ -255,7 +255,7 @@ ppc64_pft_size = __ilog2(htab_size); - bitmap_size = htab_size / sizeof(hpte_t) / 8; + bitmap_size = htab_size / sizeof(struct hash_pte) / 8; bolttab = __va(lmb_alloc(bitmap_size, 1)); inusetab = __va(lmb_alloc(bitmap_size, 1)); @@ -273,7 +273,7 @@ result = lv1_map_htab(0, &htab_addr); - htab = (hpte_t *)__ioremap(htab_addr, htab_size, + htab = (struct hash_pte *)__ioremap(htab_addr, htab_size, pgprot_val(PAGE_READONLY_X)); DBG("%s:%d: lpar %016lxh, virt %016lxh\n", __func__, __LINE__, diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile linux-2.6.22-591/arch/powerpc/platforms/pseries/Makefile --- linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -8,7 +8,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o +obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PCI_MSI) += msi.o diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh.c --- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh.c 2007-12-21 15:36:11.000000000 -0500 @@ -1,6 +1,8 @@ /* * eeh.c - * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation + * Copyright IBM Corporation 2001, 2005, 2006 + * Copyright Dave Engebretsen & Todd Inglett 2001 + * Copyright Linas Vepstas 2005, 2006 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,6 +17,8 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Please address comments and feedback to Linas Vepstas */ #include @@ -117,7 +121,6 @@ static unsigned long ignored_check; static unsigned long total_mmio_ffs; static unsigned long false_positives; -static unsigned long ignored_failures; static unsigned long slot_resets; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) @@ -505,6 +508,7 @@ printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n", ret, dn->full_name); false_positives++; + pdn->eeh_false_positives ++; rc = 0; goto dn_unlock; } @@ -513,6 +517,7 @@ * they are empty when they don't have children. */ if ((rets[0] == 5) && (dn->child == NULL)) { false_positives++; + pdn->eeh_false_positives ++; rc = 0; goto dn_unlock; } @@ -522,6 +527,7 @@ printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", ret, dn->full_name); false_positives++; + pdn->eeh_false_positives ++; rc = 0; goto dn_unlock; } @@ -529,6 +535,7 @@ /* If not the kind of error we know about, punt. */ if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { false_positives++; + pdn->eeh_false_positives ++; rc = 0; goto dn_unlock; } @@ -921,6 +928,7 @@ pdn->eeh_mode = 0; pdn->eeh_check_count = 0; pdn->eeh_freeze_count = 0; + pdn->eeh_false_positives = 0; if (status && strcmp(status, "ok") != 0) return NULL; /* ignore devices with bad status */ @@ -1139,7 +1147,8 @@ pdn = PCI_DN(dn); pdn->pcidev = dev; - pci_addr_cache_insert_device (dev); + pci_addr_cache_insert_device(dev); + eeh_sysfs_add_device(dev); } void eeh_add_device_tree_late(struct pci_bus *bus) @@ -1178,6 +1187,7 @@ printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev)); #endif pci_addr_cache_remove_device(dev); + eeh_sysfs_remove_device(dev); dn = pci_device_to_OF_node(dev); if (PCI_DN(dn)->pcidev) { @@ -1214,11 +1224,10 @@ "check not wanted=%ld\n" "eeh_total_mmio_ffs=%ld\n" "eeh_false_positives=%ld\n" - "eeh_ignored_failures=%ld\n" "eeh_slot_resets=%ld\n", no_device, no_dn, no_cfg_addr, ignored_check, total_mmio_ffs, - false_positives, ignored_failures, + false_positives, slot_resets); } diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_cache.c --- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_cache.c 2007-12-21 15:36:11.000000000 -0500 @@ -2,7 +2,8 @@ * eeh_cache.c * PCI address cache; allows the lookup of PCI devices based on I/O address * - * Copyright (C) 2004 Linas Vepstas IBM Corporation + * Copyright IBM Corporation 2004 + * Copyright Linas Vepstas 2004 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -295,6 +296,8 @@ continue; pci_dev_get (dev); /* matching put is in eeh_remove_device() */ PCI_DN(dn)->pcidev = dev; + + eeh_sysfs_add_device(dev); } #ifdef DEBUG diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_driver.c --- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_driver.c 2007-12-21 15:36:11.000000000 -0500 @@ -1,6 +1,7 @@ /* * PCI Error Recovery Driver for RPA-compliant PPC64 platform. - * Copyright (C) 2004, 2005 Linas Vepstas + * Copyright IBM Corp. 2004 2005 + * Copyright Linas Vepstas 2004, 2005 * * All rights reserved. * @@ -19,8 +20,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Send feedback to - * + * Send comments and feedback to Linas Vepstas */ #include #include diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_sysfs.c --- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_sysfs.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,87 @@ +/* + * Sysfs entries for PCI Error Recovery for PAPR-compliant platform. + * Copyright IBM Corporation 2007 + * Copyright Linas Vepstas 2007 + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send comments and feedback to Linas Vepstas + */ +#include +#include +#include +#include + +/** + * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic + * @_name: name of file in sysfs directory + * @_memb: name of member in struct pci_dn to access + * @_format: printf format for display + * + * All of the attributes look very similar, so just + * auto-gen a cut-n-paste routine to display them. + */ +#define EEH_SHOW_ATTR(_name,_memb,_format) \ +static ssize_t eeh_show_##_name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct pci_dev *pdev = to_pci_dev(dev); \ + struct device_node *dn = pci_device_to_OF_node(pdev); \ + struct pci_dn *pdn; \ + \ + if (!dn || PCI_DN(dn) == NULL) \ + return 0; \ + \ + pdn = PCI_DN(dn); \ + return sprintf(buf, _format "\n", pdn->_memb); \ +} \ +static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); + + +EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x"); +EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x"); +EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x"); +EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d"); +EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d"); +EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d"); + +void eeh_sysfs_add_device(struct pci_dev *pdev) +{ + int rc=0; + + rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count); + + if (rc) + printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); +} + +void eeh_sysfs_remove_device(struct pci_dev *pdev) +{ + device_remove_file(&pdev->dev, &dev_attr_eeh_mode); + device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_check_count); + device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives); + device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count); +} + diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c linux-2.6.22-591/arch/powerpc/platforms/pseries/pci_dlpar.c --- linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/pci_dlpar.c 2007-12-21 15:36:11.000000000 -0500 @@ -110,8 +110,6 @@ } } } - - eeh_add_device_tree_late(bus); } EXPORT_SYMBOL_GPL(pcibios_fixup_new_pci_devices); @@ -139,6 +137,8 @@ /* Make the discovered devices available */ pci_bus_add_devices(child_bus); + + eeh_add_device_tree_late(child_bus); return 0; } @@ -171,6 +171,7 @@ if (!list_empty(&bus->devices)) { pcibios_fixup_new_pci_devices(bus, 0); pci_bus_add_devices(bus); + eeh_add_device_tree_late(bus); } } else if (mode == PCI_PROBE_NORMAL) { /* use legacy probe */ @@ -179,6 +180,7 @@ if (num) { pcibios_fixup_new_pci_devices(bus, 1); pci_bus_add_devices(bus); + eeh_add_device_tree_late(bus); } list_for_each_entry(dev, &bus->devices, bus_list) @@ -200,8 +202,6 @@ rtas_setup_phb(phb); pci_process_bridge_OF_ranges(phb, dn, 0); - pci_setup_phb_io_dynamic(phb, primary); - pci_devs_phb_init_dynamic(phb); if (dn->child) @@ -210,6 +210,7 @@ scan_phb(phb); pcibios_fixup_new_pci_devices(phb->bus, 0); pci_bus_add_devices(phb->bus); + eeh_add_device_tree_late(phb->bus); return phb; } diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h linux-2.6.22-591/arch/powerpc/platforms/pseries/pseries.h --- linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/pseries.h 2007-12-21 15:36:11.000000000 -0500 @@ -33,6 +33,8 @@ static inline void setup_kexec_cpu_down_mpic(void) { } #endif +extern void pSeries_final_fixup(void); + /* Poweron flag used for enabling auto ups restart */ extern unsigned long rtas_poweron_auto; diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c linux-2.6.22-591/arch/powerpc/platforms/pseries/setup.c --- linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/platforms/pseries/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -399,6 +399,7 @@ * a good time to find other work to dispatch. */ get_lppaca()->idle = 1; + get_lppaca()->donate_dedicated_cpu = 1; /* * We come in with interrupts disabled, and need_resched() @@ -431,6 +432,7 @@ out: HMT_medium(); + get_lppaca()->donate_dedicated_cpu = 0; get_lppaca()->idle = 0; } diff -Nurb linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c linux-2.6.22-591/arch/powerpc/sysdev/tsi108_dev.c --- linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/powerpc/sysdev/tsi108_dev.c 2007-12-21 15:36:11.000000000 -0500 @@ -72,12 +72,11 @@ int ret; for (np = NULL, i = 0; - (np = of_find_compatible_node(np, "network", "tsi-ethernet")) != NULL; + (np = of_find_compatible_node(np, "network", "tsi108-ethernet")) != NULL; i++) { struct resource r[2]; - struct device_node *phy; + struct device_node *phy, *mdio; hw_info tsi_eth_data; - const unsigned int *id; const unsigned int *phy_id; const void *mac_addr; const phandle *ph; @@ -111,6 +110,13 @@ if (mac_addr) memcpy(tsi_eth_data.mac_addr, mac_addr, 6); + ph = of_get_property(np, "mdio-handle", NULL); + mdio = of_find_node_by_phandle(*ph); + ret = of_address_to_resource(mdio, 0, &res); + of_node_put(mdio); + if (ret) + goto unreg; + ph = of_get_property(np, "phy-handle", NULL); phy = of_find_node_by_phandle(*ph); @@ -119,20 +125,25 @@ goto unreg; } - id = of_get_property(phy, "reg", NULL); - phy_id = of_get_property(phy, "phy-id", NULL); - ret = of_address_to_resource(phy, 0, &res); - if (ret) { - of_node_put(phy); - goto unreg; - } + phy_id = of_get_property(phy, "reg", NULL); + tsi_eth_data.regs = r[0].start; tsi_eth_data.phyregs = res.start; tsi_eth_data.phy = *phy_id; tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0); - if (of_device_is_compatible(phy, "bcm54xx")) + + /* Some boards with the TSI108 bridge (e.g. Holly) + * have a miswiring of the ethernet PHYs which + * requires a workaround. The special + * "txc-rxc-delay-disable" property enables this + * workaround. FIXME: Need to port the tsi108_eth + * driver itself to phylib and use a non-misleading + * name for the workaround flag - it's not actually to + * do with the model of PHY in use */ + if (of_get_property(phy, "txc-rxc-delay-disable", NULL)) tsi_eth_data.phy_type = TSI108_PHY_BCM54XX; of_node_put(phy); + ret = platform_device_add_data(tsi_eth_dev, &tsi_eth_data, sizeof(hw_info)); diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/enet.c linux-2.6.22-591/arch/ppc/8260_io/enet.c --- linux-2.6.22-570/arch/ppc/8260_io/enet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/8260_io/enet.c 2007-12-21 15:36:11.000000000 -0500 @@ -477,9 +477,9 @@ } else { skb_put(skb,pkt_len-4); /* Make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)__va(bdp->cbd_bufaddr), - pkt_len-4, 0); + pkt_len-4); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); } diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c linux-2.6.22-591/arch/ppc/8260_io/fcc_enet.c --- linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/8260_io/fcc_enet.c 2007-12-21 15:36:11.000000000 -0500 @@ -734,9 +734,9 @@ } else { skb_put(skb,pkt_len); /* Make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)__va(bdp->cbd_bufaddr), - pkt_len, 0); + pkt_len); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); } diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/enet.c linux-2.6.22-591/arch/ppc/8xx_io/enet.c --- linux-2.6.22-570/arch/ppc/8xx_io/enet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/8xx_io/enet.c 2007-12-21 15:36:11.000000000 -0500 @@ -506,9 +506,9 @@ } else { skb_put(skb,pkt_len-4); /* Make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, cep->rx_vaddr[bdp - cep->rx_bd_base], - pkt_len-4, 0); + pkt_len-4); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); } diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/fec.c linux-2.6.22-591/arch/ppc/8xx_io/fec.c --- linux-2.6.22-570/arch/ppc/8xx_io/fec.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/8xx_io/fec.c 2007-12-21 15:36:11.000000000 -0500 @@ -725,7 +725,7 @@ fep->stats.rx_dropped++; } else { skb_put(skb,pkt_len-4); /* Make room */ - eth_copy_and_sum(skb, data, pkt_len-4, 0); + skb_copy_to_linear_data(skb, data, pkt_len-4); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); } diff -Nurb linux-2.6.22-570/arch/ppc/Kconfig.debug linux-2.6.22-591/arch/ppc/Kconfig.debug --- linux-2.6.22-570/arch/ppc/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 @@ -2,42 +2,6 @@ source "lib/Kconfig.debug" -config KGDB - bool "Include kgdb kernel debugger" - depends on DEBUG_KERNEL && (BROKEN || PPC_GEN550 || 4xx) - select DEBUG_INFO - help - Include in-kernel hooks for kgdb, the Linux kernel source level - debugger. See for more information. - Unless you are intending to debug the kernel, say N here. - -choice - prompt "Serial Port" - depends on KGDB - default KGDB_TTYS1 - -config KGDB_TTYS0 - bool "ttyS0" - -config KGDB_TTYS1 - bool "ttyS1" - -config KGDB_TTYS2 - bool "ttyS2" - -config KGDB_TTYS3 - bool "ttyS3" - -endchoice - -config KGDB_CONSOLE - bool "Enable serial console thru kgdb port" - depends on KGDB && 8xx || CPM2 - help - If you enable this, all serial console messages will be sent - over the gdb stub. - If unsure, say N. - config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL diff -Nurb linux-2.6.22-570/arch/ppc/amiga/config.c linux-2.6.22-591/arch/ppc/amiga/config.c --- linux-2.6.22-570/arch/ppc/amiga/config.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/amiga/config.c 2007-12-21 15:36:11.000000000 -0500 @@ -753,17 +753,11 @@ void amiga_serial_console_write(struct console *co, const char *s, unsigned int count) { -#if 0 /* def CONFIG_KGDB */ - /* FIXME:APUS GDB doesn't seem to like O-packages before it is - properly connected with the target. */ - __gdb_output_string (s, count); -#else while (count--) { if (*s == '\n') amiga_serial_putc('\r'); amiga_serial_putc(*s++); } -#endif } #ifdef CONFIG_SERIAL_CONSOLE diff -Nurb linux-2.6.22-570/arch/ppc/kernel/Makefile linux-2.6.22-591/arch/ppc/kernel/Makefile --- linux-2.6.22-570/arch/ppc/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -14,7 +14,7 @@ obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_RAPIDIO) += rio.o -obj-$(CONFIG_KGDB) += ppc-stub.o +obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp32.o obj-$(CONFIG_SMP) += smp.o smp-tbsync.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb.c linux-2.6.22-591/arch/ppc/kernel/kgdb.c --- linux-2.6.22-570/arch/ppc/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/ppc/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,348 @@ +/* + * arch/ppc/kernel/kgdb.c + * + * PowerPC backend to the KGDB stub. + * + * Maintainer: Tom Rini + * + * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) + * Copyright (C) 2003 Timesys Corporation. + * Copyright (C) 2004, 2006 MontaVista Software, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This table contains the mapping between PowerPC hardware trap types, and + * signals, which are primarily what GDB understands. GDB and the kernel + * don't always agree on values, so we use constants taken from gdb-6.2. + */ +static struct hard_trap_info +{ + unsigned int tt; /* Trap type code for powerpc */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ + { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ + { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ + { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ + { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ + { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ + { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ + { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ +#if defined(CONFIG_FSL_BOOKE) + { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ + { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ + { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ +#else + { 0x1000, 0x0e /* SIGALRM */ }, /* programmable interval timer */ + { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ + { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ +#endif +#else + { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ +#if defined(CONFIG_8xx) + { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ +#else + { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ + { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ + { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ + { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ + { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ + { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ +#endif +#endif + { 0x0000, 0x00 } /* Must be last */ +}; + +extern atomic_t cpu_doing_single_step; + +static int computeSignal(unsigned int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +/* KGDB functions to use existing PowerPC hooks. */ +static void kgdb_debugger(struct pt_regs *regs) +{ + kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); +} + +static int kgdb_breakpoint(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + regs->nip += 4; + + return 1; +} + +static int kgdb_singlestep(struct pt_regs *regs) +{ + struct thread_info *thread_info, *exception_thread_info; + + if (user_mode(regs)) + return 0; + /* + * On Book E and perhaps other processsors, singlestep is handled on + * the critical exception stack. This causes current_thread_info() + * to fail, since it it locates the thread_info by masking off + * the low bits of the current stack pointer. We work around + * this issue by copying the thread_info from the kernel stack + * before calling kgdb_handle_exception, and copying it back + * afterwards. On most processors the copy is avoided since + * exception_thread_info == thread_info. + */ + thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); + exception_thread_info = current_thread_info(); + + if (thread_info != exception_thread_info) + memcpy(exception_thread_info, thread_info, sizeof *thread_info); + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (thread_info != exception_thread_info) + memcpy(thread_info, exception_thread_info, sizeof *thread_info); + + return 1; +} + +int kgdb_iabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); + return 1; +} + +int kgdb_dabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); + return 1; +} + +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, MAXREG * 4); + + for (reg = 0; reg < 32; reg++) + *(ptr++) = regs->gpr[reg]; + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + *(ptr++) = current->thread.evr[reg]; +#else + ptr += 32; +#endif +#else + ptr += 64; +#endif + + *(ptr++) = regs->nip; + *(ptr++) = regs->msr; + *(ptr++) = regs->ccr; + *(ptr++) = regs->link; + *(ptr++) = regs->ctr; + *(ptr++) = regs->xer; + +#ifdef CONFIG_SPE + /* u64 acc */ + *(ptr++) = current->thread.acc >> 32; + *(ptr++) = current->thread.acc & 0xffffffff; + *(ptr++) = current->thread.spefscr; +#endif +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + + STACK_FRAME_OVERHEAD); + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, MAXREG * 4); + + /* Regs GPR0-2 */ + for (reg = 0; reg < 3; reg++) + *(ptr++) = regs->gpr[reg]; + + /* Regs GPR3-13 are not saved */ + ptr += 11; + + /* Regs GPR14-31 */ + for (reg = 14; reg < 32; reg++) + *(ptr++) = regs->gpr[reg]; + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + *(ptr++) = p->thread.evr[reg]; +#else + ptr += 32; +#endif +#else + ptr += 64; +#endif + + *(ptr++) = regs->nip; + *(ptr++) = regs->msr; + *(ptr++) = regs->ccr; + *(ptr++) = regs->link; + *(ptr++) = regs->ctr; + *(ptr++) = regs->xer; + +#ifdef CONFIG_SPE + /* u64 acc */ + *(ptr++) = p->thread.acc >> 32; + *(ptr++) = p->thread.acc & 0xffffffff; + *(ptr++) = p->thread.spefscr; +#endif +} + +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; +#ifdef CONFIG_SPE + union { + u32 v32[2]; + u64 v64; + } acc; +#endif + + for (reg = 0; reg < 32; reg++) + regs->gpr[reg] = *(ptr++); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + current->thread.evr[reg] = *(ptr++); +#else + ptr += 32; +#endif +#else + ptr += 64; +#endif + + regs->nip = *(ptr++); + regs->msr = *(ptr++); + regs->ccr = *(ptr++); + regs->link = *(ptr++); + regs->ctr = *(ptr++); + regs->xer = *(ptr++); + +#ifdef CONFIG_SPE + /* u64 acc */ + acc.v32[0] = *(ptr++); + acc.v32[1] = *(ptr++); + current->thread.acc = acc.v64; + current->thread.spefscr = *(ptr++); +#endif +} + +/* + * This function does PowerPC specific processing for interfacing to gdb. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr; + + switch (remcom_in_buffer[0]) + { + /* + * sAA..AA Step one instruction from AA..AA + * This will return an error to gdb .. + */ + case 's': + case 'c': + /* handle the optional parameter */ + if (kgdb_hex2long (&ptr, &addr)) + linux_regs->nip = addr; + + atomic_set(&cpu_doing_single_step, -1); + /* set the trace bit if we're stepping */ + if (remcom_in_buffer[0] == 's') { +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, + mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); + linux_regs->msr |= MSR_DE; +#else + linux_regs->msr |= MSR_SE; +#endif + debugger_step = 1; + if (kgdb_contthread) + atomic_set(&cpu_doing_single_step, + smp_processor_id()); + } + return 0; + } + + return -1; +} + +/* + * Global data + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +}; + +int kgdb_arch_init(void) +{ + debugger = kgdb_debugger; + debugger_bpt = kgdb_breakpoint; + debugger_sstep = kgdb_singlestep; + debugger_iabr_match = kgdb_iabr_match; + debugger_dabr_match = kgdb_dabr_match; + + return 0; +} + +arch_initcall(kgdb_arch_init); diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S linux-2.6.22-591/arch/ppc/kernel/kgdb_setjmp32.S --- linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/ppc/kernel/kgdb_setjmp32.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,41 @@ +/* + * Copyright (C) 1996 Paul Mackerras + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include + + .text + +/* + * Save/restore state in case a memory access causes a fault. + * + * int kgdb_fault_setjmp(unsigned long *curr_context); + * void kgdb_fault_longjmp(unsigned long *curr_context); + */ + +_GLOBAL(kgdb_fault_setjmp) + mflr r0 + stw r0,0(r3) + stw r1,4(r3) + stw r2,8(r3) + mfcr r0 + stw r0,12(r3) + stmw r13,16(r3) + li r3,0 + blr + +_GLOBAL(kgdb_fault_longjmp) + lmw r13,16(r3) + lwz r0,12(r3) + mtcrf 0x38,r0 + lwz r0,0(r3) + lwz r1,4(r3) + lwz r2,8(r3) + mtlr r0 + mr r3,r1 + blr diff -Nurb linux-2.6.22-570/arch/ppc/kernel/misc.S linux-2.6.22-591/arch/ppc/kernel/misc.S --- linux-2.6.22-570/arch/ppc/kernel/misc.S 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/ppc/kernel/misc.S 2007-12-21 15:36:11.000000000 -0500 @@ -328,7 +328,7 @@ mtspr SPRN_L1CSR0,r3 isync blr -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 @@ -355,7 +355,7 @@ _GLOBAL(__flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 andc r3,r3,r5 subf r4,r3,r4 @@ -472,7 +472,7 @@ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r3,r3,0,0,19 /* Get page base address */ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 @@ -500,7 +500,7 @@ _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION blr /* for 601, do nothing */ -END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mfmsr r10 rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 diff -Nurb linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c linux-2.6.22-591/arch/ppc/kernel/ppc-stub.c --- linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/kernel/ppc-stub.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,866 +0,0 @@ -/* - * ppc-stub.c: KGDB support for the Linux kernel. - * - * adapted from arch/sparc/kernel/sparc-stub.c for the PowerPC - * some stuff borrowed from Paul Mackerras' xmon - * Copyright (C) 1998 Michael AK Tesch (tesch@cs.wisc.edu) - * - * Modifications to run under Linux - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * - * This file originally came from the gdb sources, and the - * copyright notices have been retained below. - */ - -/**************************************************************************** - - THIS SOFTWARE IS NOT COPYRIGHTED - - HP offers the following for use in the public domain. HP makes no - warranty with regard to the software or its performance and the - user accepts the software "AS IS" with all faults. - - HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD - TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -****************************************************************************/ - -/**************************************************************************** - * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ - * - * Module name: remcom.c $ - * Revision: 1.34 $ - * Date: 91/03/09 12:29:49 $ - * Contributor: Lake Stevens Instrument Division$ - * - * Description: low level support for gdb debugger. $ - * - * Considerations: only works on target hardware $ - * - * Written by: Glenn Engel $ - * ModuleState: Experimental $ - * - * NOTES: See Below $ - * - * Modified for SPARC by Stu Grossman, Cygnus Support. - * - * This code has been extensively tested on the Fujitsu SPARClite demo board. - * - * To enable debugger support, two things need to happen. One, a - * call to set_debug_traps() is necessary in order to allow any breakpoints - * or error conditions to be properly intercepted and reported to gdb. - * Two, a breakpoint needs to be generated to begin communication. This - * is most easily accomplished by a call to breakpoint(). Breakpoint() - * simulates a breakpoint by executing a trap #1. - * - ************* - * - * The following gdb commands are supported: - * - * command function Return value - * - * g return the value of the CPU registers hex data or ENN - * G set the value of the CPU registers OK or ENN - * qOffsets Get section offsets. Reply is Text=xxx;Data=yyy;Bss=zzz - * - * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN - * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN - * - * c Resume at current address SNN ( signal NN) - * cAA..AA Continue at address AA..AA SNN - * - * s Step one instruction SNN - * sAA..AA Step one instruction from AA..AA SNN - * - * k kill - * - * ? What was the last sigval ? SNN (signal NN) - * - * bBB..BB Set baud rate to BB..BB OK or BNN, then sets - * baud rate - * - * All commands and responses are sent with a packet which includes a - * checksum. A packet consists of - * - * $#. - * - * where - * :: - * :: > - * - * When a packet is received, it is first acknowledged with either '+' or '-'. - * '+' indicates a successful transfer. '-' indicates a failed transfer. - * - * Example: - * - * Host: Reply: - * $m0,10#2a +$00010203040506070809101112131415#42 - * - ****************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -void breakinst(void); - -/* - * BUFMAX defines the maximum number of characters in inbound/outbound buffers - * at least NUMREGBYTES*2 are needed for register packets - */ -#define BUFMAX 2048 -static char remcomInBuffer[BUFMAX]; -static char remcomOutBuffer[BUFMAX]; - -static int initialized; -static int kgdb_active; -static int kgdb_started; -static u_int fault_jmp_buf[100]; -static int kdebug; - - -static const char hexchars[]="0123456789abcdef"; - -/* Place where we save old trap entries for restoration - sparc*/ -/* struct tt_entry kgdb_savettable[256]; */ -/* typedef void (*trapfunc_t)(void); */ - -static void kgdb_fault_handler(struct pt_regs *regs); -static int handle_exception (struct pt_regs *regs); - -#if 0 -/* Install an exception handler for kgdb */ -static void exceptionHandler(int tnum, unsigned int *tfunc) -{ - /* We are dorking with a live trap table, all irqs off */ -} -#endif - -int -kgdb_setjmp(long *buf) -{ - asm ("mflr 0; stw 0,0(%0);" - "stw 1,4(%0); stw 2,8(%0);" - "mfcr 0; stw 0,12(%0);" - "stmw 13,16(%0)" - : : "r" (buf)); - /* XXX should save fp regs as well */ - return 0; -} -void -kgdb_longjmp(long *buf, int val) -{ - if (val == 0) - val = 1; - asm ("lmw 13,16(%0);" - "lwz 0,12(%0); mtcrf 0x38,0;" - "lwz 0,0(%0); lwz 1,4(%0); lwz 2,8(%0);" - "mtlr 0; mr 3,%1" - : : "r" (buf), "r" (val)); -} -/* Convert ch from a hex digit to an int */ -static int -hex(unsigned char ch) -{ - if (ch >= 'a' && ch <= 'f') - return ch-'a'+10; - if (ch >= '0' && ch <= '9') - return ch-'0'; - if (ch >= 'A' && ch <= 'F') - return ch-'A'+10; - return -1; -} - -/* Convert the memory pointed to by mem into hex, placing result in buf. - * Return a pointer to the last char put in buf (null), in case of mem fault, - * return 0. - */ -static unsigned char * -mem2hex(const char *mem, char *buf, int count) -{ - unsigned char ch; - unsigned short tmp_s; - unsigned long tmp_l; - - if (kgdb_setjmp((long*)fault_jmp_buf) == 0) { - debugger_fault_handler = kgdb_fault_handler; - - /* Accessing 16 bit and 32 bit objects in a single - ** load instruction is required to avoid bad side - ** effects for some IO registers. - */ - - if ((count == 2) && (((long)mem & 1) == 0)) { - tmp_s = *(unsigned short *)mem; - mem += 2; - *buf++ = hexchars[(tmp_s >> 12) & 0xf]; - *buf++ = hexchars[(tmp_s >> 8) & 0xf]; - *buf++ = hexchars[(tmp_s >> 4) & 0xf]; - *buf++ = hexchars[tmp_s & 0xf]; - - } else if ((count == 4) && (((long)mem & 3) == 0)) { - tmp_l = *(unsigned int *)mem; - mem += 4; - *buf++ = hexchars[(tmp_l >> 28) & 0xf]; - *buf++ = hexchars[(tmp_l >> 24) & 0xf]; - *buf++ = hexchars[(tmp_l >> 20) & 0xf]; - *buf++ = hexchars[(tmp_l >> 16) & 0xf]; - *buf++ = hexchars[(tmp_l >> 12) & 0xf]; - *buf++ = hexchars[(tmp_l >> 8) & 0xf]; - *buf++ = hexchars[(tmp_l >> 4) & 0xf]; - *buf++ = hexchars[tmp_l & 0xf]; - - } else { - while (count-- > 0) { - ch = *mem++; - *buf++ = hexchars[ch >> 4]; - *buf++ = hexchars[ch & 0xf]; - } - } - - } else { - /* error condition */ - } - debugger_fault_handler = NULL; - *buf = 0; - return buf; -} - -/* convert the hex array pointed to by buf into binary to be placed in mem - * return a pointer to the character AFTER the last byte written. -*/ -static char * -hex2mem(char *buf, char *mem, int count) -{ - unsigned char ch; - int i; - char *orig_mem; - unsigned short tmp_s; - unsigned long tmp_l; - - orig_mem = mem; - - if (kgdb_setjmp((long*)fault_jmp_buf) == 0) { - debugger_fault_handler = kgdb_fault_handler; - - /* Accessing 16 bit and 32 bit objects in a single - ** store instruction is required to avoid bad side - ** effects for some IO registers. - */ - - if ((count == 2) && (((long)mem & 1) == 0)) { - tmp_s = hex(*buf++) << 12; - tmp_s |= hex(*buf++) << 8; - tmp_s |= hex(*buf++) << 4; - tmp_s |= hex(*buf++); - - *(unsigned short *)mem = tmp_s; - mem += 2; - - } else if ((count == 4) && (((long)mem & 3) == 0)) { - tmp_l = hex(*buf++) << 28; - tmp_l |= hex(*buf++) << 24; - tmp_l |= hex(*buf++) << 20; - tmp_l |= hex(*buf++) << 16; - tmp_l |= hex(*buf++) << 12; - tmp_l |= hex(*buf++) << 8; - tmp_l |= hex(*buf++) << 4; - tmp_l |= hex(*buf++); - - *(unsigned long *)mem = tmp_l; - mem += 4; - - } else { - for (i=0; i# */ -static void -getpacket(char *buffer) -{ - unsigned char checksum; - unsigned char xmitcsum; - int i; - int count; - unsigned char ch; - - do { - /* wait around for the start character, ignore all other - * characters */ - while ((ch = (getDebugChar() & 0x7f)) != '$') ; - - checksum = 0; - xmitcsum = -1; - - count = 0; - - /* now, read until a # or end of buffer is found */ - while (count < BUFMAX) { - ch = getDebugChar() & 0x7f; - if (ch == '#') - break; - checksum = checksum + ch; - buffer[count] = ch; - count = count + 1; - } - - if (count >= BUFMAX) - continue; - - buffer[count] = 0; - - if (ch == '#') { - xmitcsum = hex(getDebugChar() & 0x7f) << 4; - xmitcsum |= hex(getDebugChar() & 0x7f); - if (checksum != xmitcsum) - putDebugChar('-'); /* failed checksum */ - else { - putDebugChar('+'); /* successful transfer */ - /* if a sequence char is present, reply the ID */ - if (buffer[2] == ':') { - putDebugChar(buffer[0]); - putDebugChar(buffer[1]); - /* remove sequence chars from buffer */ - count = strlen(buffer); - for (i=3; i <= count; i++) - buffer[i-3] = buffer[i]; - } - } - } - } while (checksum != xmitcsum); -} - -/* send the packet in buffer. */ -static void putpacket(unsigned char *buffer) -{ - unsigned char checksum; - int count; - unsigned char ch, recv; - - /* $#. */ - do { - putDebugChar('$'); - checksum = 0; - count = 0; - - while ((ch = buffer[count])) { - putDebugChar(ch); - checksum += ch; - count += 1; - } - - putDebugChar('#'); - putDebugChar(hexchars[checksum >> 4]); - putDebugChar(hexchars[checksum & 0xf]); - recv = getDebugChar(); - } while ((recv & 0x7f) != '+'); -} - -static void kgdb_flush_cache_all(void) -{ - flush_instruction_cache(); -} - -/* Set up exception handlers for tracing and breakpoints - * [could be called kgdb_init()] - */ -void set_debug_traps(void) -{ -#if 0 - unsigned char c; - - save_and_cli(flags); - - /* In case GDB is started before us, ack any packets (presumably - * "$?#xx") sitting there. - * - * I've found this code causes more problems than it solves, - * so that's why it's commented out. GDB seems to work fine - * now starting either before or after the kernel -bwb - */ - - while((c = getDebugChar()) != '$'); - while((c = getDebugChar()) != '#'); - c = getDebugChar(); /* eat first csum byte */ - c = getDebugChar(); /* eat second csum byte */ - putDebugChar('+'); /* ack it */ -#endif - debugger = kgdb; - debugger_bpt = kgdb_bpt; - debugger_sstep = kgdb_sstep; - debugger_iabr_match = kgdb_iabr_match; - debugger_dabr_match = kgdb_dabr_match; - - initialized = 1; -} - -static void kgdb_fault_handler(struct pt_regs *regs) -{ - kgdb_longjmp((long*)fault_jmp_buf, 1); -} - -int kgdb_bpt(struct pt_regs *regs) -{ - return handle_exception(regs); -} - -int kgdb_sstep(struct pt_regs *regs) -{ - return handle_exception(regs); -} - -void kgdb(struct pt_regs *regs) -{ - handle_exception(regs); -} - -int kgdb_iabr_match(struct pt_regs *regs) -{ - printk(KERN_ERR "kgdb doesn't support iabr, what?!?\n"); - return handle_exception(regs); -} - -int kgdb_dabr_match(struct pt_regs *regs) -{ - printk(KERN_ERR "kgdb doesn't support dabr, what?!?\n"); - return handle_exception(regs); -} - -/* Convert the hardware trap type code to a unix signal number. */ -/* - * This table contains the mapping between PowerPC hardware trap types, and - * signals, which are primarily what GDB understands. - */ -static struct hard_trap_info -{ - unsigned int tt; /* Trap type code for powerpc */ - unsigned char signo; /* Signal that we map this trap into */ -} hard_trap_info[] = { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - { 0x100, SIGINT }, /* critical input interrupt */ - { 0x200, SIGSEGV }, /* machine check */ - { 0x300, SIGSEGV }, /* data storage */ - { 0x400, SIGBUS }, /* instruction storage */ - { 0x500, SIGINT }, /* interrupt */ - { 0x600, SIGBUS }, /* alignment */ - { 0x700, SIGILL }, /* program */ - { 0x800, SIGILL }, /* reserved */ - { 0x900, SIGILL }, /* reserved */ - { 0xa00, SIGILL }, /* reserved */ - { 0xb00, SIGILL }, /* reserved */ - { 0xc00, SIGCHLD }, /* syscall */ - { 0xd00, SIGILL }, /* reserved */ - { 0xe00, SIGILL }, /* reserved */ - { 0xf00, SIGILL }, /* reserved */ - /* - ** 0x1000 PIT - ** 0x1010 FIT - ** 0x1020 watchdog - ** 0x1100 data TLB miss - ** 0x1200 instruction TLB miss - */ - { 0x2002, SIGTRAP}, /* debug */ -#else - { 0x200, SIGSEGV }, /* machine check */ - { 0x300, SIGSEGV }, /* address error (store) */ - { 0x400, SIGBUS }, /* instruction bus error */ - { 0x500, SIGINT }, /* interrupt */ - { 0x600, SIGBUS }, /* alingment */ - { 0x700, SIGTRAP }, /* breakpoint trap */ - { 0x800, SIGFPE }, /* fpu unavail */ - { 0x900, SIGALRM }, /* decrementer */ - { 0xa00, SIGILL }, /* reserved */ - { 0xb00, SIGILL }, /* reserved */ - { 0xc00, SIGCHLD }, /* syscall */ - { 0xd00, SIGTRAP }, /* single-step/watch */ - { 0xe00, SIGFPE }, /* fp assist */ -#endif - { 0, 0} /* Must be last */ - -}; - -static int computeSignal(unsigned int tt) -{ - struct hard_trap_info *ht; - - for (ht = hard_trap_info; ht->tt && ht->signo; ht++) - if (ht->tt == tt) - return ht->signo; - - return SIGHUP; /* default for things we don't know about */ -} - -#define PC_REGNUM 64 -#define SP_REGNUM 1 - -/* - * This function does all command processing for interfacing to gdb. - */ -static int -handle_exception (struct pt_regs *regs) -{ - int sigval; - int addr; - int length; - char *ptr; - unsigned int msr; - - /* We don't handle user-mode breakpoints. */ - if (user_mode(regs)) - return 0; - - if (debugger_fault_handler) { - debugger_fault_handler(regs); - panic("kgdb longjump failed!\n"); - } - if (kgdb_active) { - printk(KERN_ERR "interrupt while in kgdb, returning\n"); - return 0; - } - - kgdb_active = 1; - kgdb_started = 1; - -#ifdef KGDB_DEBUG - printk("kgdb: entering handle_exception; trap [0x%x]\n", - (unsigned int)regs->trap); -#endif - - kgdb_interruptible(0); - lock_kernel(); - msr = mfmsr(); - mtmsr(msr & ~MSR_EE); /* disable interrupts */ - - if (regs->nip == (unsigned long)breakinst) { - /* Skip over breakpoint trap insn */ - regs->nip += 4; - } - - /* reply to host that an exception has occurred */ - sigval = computeSignal(regs->trap); - ptr = remcomOutBuffer; - - *ptr++ = 'T'; - *ptr++ = hexchars[sigval >> 4]; - *ptr++ = hexchars[sigval & 0xf]; - *ptr++ = hexchars[PC_REGNUM >> 4]; - *ptr++ = hexchars[PC_REGNUM & 0xf]; - *ptr++ = ':'; - ptr = mem2hex((char *)®s->nip, ptr, 4); - *ptr++ = ';'; - *ptr++ = hexchars[SP_REGNUM >> 4]; - *ptr++ = hexchars[SP_REGNUM & 0xf]; - *ptr++ = ':'; - ptr = mem2hex(((char *)regs) + SP_REGNUM*4, ptr, 4); - *ptr++ = ';'; - *ptr++ = 0; - - putpacket(remcomOutBuffer); - if (kdebug) - printk("remcomOutBuffer: %s\n", remcomOutBuffer); - - /* XXX We may want to add some features dealing with poking the - * XXX page tables, ... (look at sparc-stub.c for more info) - * XXX also required hacking to the gdb sources directly... - */ - - while (1) { - remcomOutBuffer[0] = 0; - - getpacket(remcomInBuffer); - switch (remcomInBuffer[0]) { - case '?': /* report most recent signal */ - remcomOutBuffer[0] = 'S'; - remcomOutBuffer[1] = hexchars[sigval >> 4]; - remcomOutBuffer[2] = hexchars[sigval & 0xf]; - remcomOutBuffer[3] = 0; - break; -#if 0 - case 'q': /* this screws up gdb for some reason...*/ - { - extern long _start, sdata, __bss_start; - - ptr = &remcomInBuffer[1]; - if (strncmp(ptr, "Offsets", 7) != 0) - break; - - ptr = remcomOutBuffer; - sprintf(ptr, "Text=%8.8x;Data=%8.8x;Bss=%8.8x", - &_start, &sdata, &__bss_start); - break; - } -#endif - case 'd': - /* toggle debug flag */ - kdebug ^= 1; - break; - - case 'g': /* return the value of the CPU registers. - * some of them are non-PowerPC names :( - * they are stored in gdb like: - * struct { - * u32 gpr[32]; - * f64 fpr[32]; - * u32 pc, ps, cnd, lr; (ps=msr) - * u32 cnt, xer, mq; - * } - */ - { - int i; - ptr = remcomOutBuffer; - /* General Purpose Regs */ - ptr = mem2hex((char *)regs, ptr, 32 * 4); - /* Floating Point Regs - FIXME */ - /*ptr = mem2hex((char *), ptr, 32 * 8);*/ - for(i=0; i<(32*8*2); i++) { /* 2chars/byte */ - ptr[i] = '0'; - } - ptr += 32*8*2; - /* pc, msr, cr, lr, ctr, xer, (mq is unused) */ - ptr = mem2hex((char *)®s->nip, ptr, 4); - ptr = mem2hex((char *)®s->msr, ptr, 4); - ptr = mem2hex((char *)®s->ccr, ptr, 4); - ptr = mem2hex((char *)®s->link, ptr, 4); - ptr = mem2hex((char *)®s->ctr, ptr, 4); - ptr = mem2hex((char *)®s->xer, ptr, 4); - } - break; - - case 'G': /* set the value of the CPU registers */ - { - ptr = &remcomInBuffer[1]; - - /* - * If the stack pointer has moved, you should pray. - * (cause only god can help you). - */ - - /* General Purpose Regs */ - hex2mem(ptr, (char *)regs, 32 * 4); - - /* Floating Point Regs - FIXME?? */ - /*ptr = hex2mem(ptr, ??, 32 * 8);*/ - ptr += 32*8*2; - - /* pc, msr, cr, lr, ctr, xer, (mq is unused) */ - ptr = hex2mem(ptr, (char *)®s->nip, 4); - ptr = hex2mem(ptr, (char *)®s->msr, 4); - ptr = hex2mem(ptr, (char *)®s->ccr, 4); - ptr = hex2mem(ptr, (char *)®s->link, 4); - ptr = hex2mem(ptr, (char *)®s->ctr, 4); - ptr = hex2mem(ptr, (char *)®s->xer, 4); - - strcpy(remcomOutBuffer,"OK"); - } - break; - case 'H': - /* don't do anything, yet, just acknowledge */ - hexToInt(&ptr, &addr); - strcpy(remcomOutBuffer,"OK"); - break; - - case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ - /* Try to read %x,%x. */ - - ptr = &remcomInBuffer[1]; - - if (hexToInt(&ptr, &addr) && *ptr++ == ',' - && hexToInt(&ptr, &length)) { - if (mem2hex((char *)addr, remcomOutBuffer, - length)) - break; - strcpy(remcomOutBuffer, "E03"); - } else - strcpy(remcomOutBuffer, "E01"); - break; - - case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ - /* Try to read '%x,%x:'. */ - - ptr = &remcomInBuffer[1]; - - if (hexToInt(&ptr, &addr) && *ptr++ == ',' - && hexToInt(&ptr, &length) - && *ptr++ == ':') { - if (hex2mem(ptr, (char *)addr, length)) - strcpy(remcomOutBuffer, "OK"); - else - strcpy(remcomOutBuffer, "E03"); - flush_icache_range(addr, addr+length); - } else - strcpy(remcomOutBuffer, "E02"); - break; - - - case 'k': /* kill the program, actually just continue */ - case 'c': /* cAA..AA Continue; address AA..AA optional */ - /* try to read optional parameter, pc unchanged if no parm */ - - ptr = &remcomInBuffer[1]; - if (hexToInt(&ptr, &addr)) - regs->nip = addr; - -/* Need to flush the instruction cache here, as we may have deposited a - * breakpoint, and the icache probably has no way of knowing that a data ref to - * some location may have changed something that is in the instruction cache. - */ - kgdb_flush_cache_all(); - mtmsr(msr); - - kgdb_interruptible(1); - unlock_kernel(); - kgdb_active = 0; - if (kdebug) { - printk("remcomInBuffer: %s\n", remcomInBuffer); - printk("remcomOutBuffer: %s\n", remcomOutBuffer); - } - return 1; - - case 's': - kgdb_flush_cache_all(); -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC); - regs->msr |= MSR_DE; -#else - regs->msr |= MSR_SE; -#endif - unlock_kernel(); - kgdb_active = 0; - if (kdebug) { - printk("remcomInBuffer: %s\n", remcomInBuffer); - printk("remcomOutBuffer: %s\n", remcomOutBuffer); - } - return 1; - - case 'r': /* Reset (if user process..exit ???)*/ - panic("kgdb reset."); - break; - } /* switch */ - if (remcomOutBuffer[0] && kdebug) { - printk("remcomInBuffer: %s\n", remcomInBuffer); - printk("remcomOutBuffer: %s\n", remcomOutBuffer); - } - /* reply to the request */ - putpacket(remcomOutBuffer); - } /* while(1) */ -} - -/* This function will generate a breakpoint exception. It is used at the - beginning of a program to sync up with a debugger and can be used - otherwise as a quick means to stop program execution and "break" into - the debugger. */ - -void -breakpoint(void) -{ - if (!initialized) { - printk("breakpoint() called b4 kgdb init\n"); - return; - } - - asm(" .globl breakinst \n\ - breakinst: .long 0x7d821008"); -} - -#ifdef CONFIG_KGDB_CONSOLE -/* Output string in GDB O-packet format if GDB has connected. If nothing - output, returns 0 (caller must then handle output). */ -int -kgdb_output_string (const char* s, unsigned int count) -{ - char buffer[512]; - - if (!kgdb_started) - return 0; - - count = (count <= (sizeof(buffer) / 2 - 2)) - ? count : (sizeof(buffer) / 2 - 2); - - buffer[0] = 'O'; - mem2hex (s, &buffer[1], count); - putpacket(buffer); - - return 1; -} -#endif - -static void sysrq_handle_gdb(int key, struct pt_regs *pt_regs, - struct tty_struct *tty) -{ - printk("Entering GDB stub\n"); - breakpoint(); -} -static struct sysrq_key_op sysrq_gdb_op = { - .handler = sysrq_handle_gdb, - .help_msg = "Gdb", - .action_msg = "GDB", -}; - -static int gdb_register_sysrq(void) -{ - printk("Registering GDB sysrq handler\n"); - register_sysrq_key('g', &sysrq_gdb_op); - return 0; -} -module_init(gdb_register_sysrq); diff -Nurb linux-2.6.22-570/arch/ppc/kernel/setup.c linux-2.6.22-591/arch/ppc/kernel/setup.c --- linux-2.6.22-570/arch/ppc/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -48,10 +48,6 @@ #include #endif -#if defined CONFIG_KGDB -#include -#endif - extern void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7); extern void reloc_got2(unsigned long offset); @@ -509,24 +505,12 @@ #endif /* CONFIG_XMON */ if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab); -#if defined(CONFIG_KGDB) - if (ppc_md.kgdb_map_scc) - ppc_md.kgdb_map_scc(); - set_debug_traps(); - if (strstr(cmd_line, "gdb")) { - if (ppc_md.progress) - ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); - printk("kgdb breakpoint activated\n"); - breakpoint(); - } -#endif - /* * Set cache line size based on type of cpu as a default. * Systems with OF can look in the properties on the cpu node(s) * for a possibly more accurate value. */ - if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) { + if (! cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) { dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; ucache_bsize = 0; diff -Nurb linux-2.6.22-570/arch/ppc/mm/fault.c linux-2.6.22-591/arch/ppc/mm/fault.c --- linux-2.6.22-570/arch/ppc/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/ppc/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -330,6 +331,14 @@ return; } +#ifdef CONFIG_KGDB + if (atomic_read(&debugger_active) && kgdb_may_fault) { + /* Restore our previous state. */ + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + /* Not reached. */ + } +#endif + /* kernel has accessed a bad area */ #if defined(CONFIG_XMON) || defined(CONFIG_KGDB) if (debugger_kernel_faults) diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c linux-2.6.22-591/arch/ppc/platforms/4xx/bamboo.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/bamboo.c 2007-12-21 15:36:11.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -337,10 +338,13 @@ printk("Early serial init of port 0 failed\n"); } -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(0, &port); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &port); +#endif port.membase = ioremap64(PPC440EP_UART1_ADDR, 8); port.irq = 1; @@ -351,10 +355,13 @@ printk("Early serial init of port 1 failed\n"); } -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(1, &port); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &port); +#endif port.membase = ioremap64(PPC440EP_UART2_ADDR, 8); port.irq = 3; @@ -365,10 +372,13 @@ printk("Early serial init of port 2 failed\n"); } -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(2, &port); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(2, &port); +#endif port.membase = ioremap64(PPC440EP_UART3_ADDR, 8); port.irq = 4; @@ -378,6 +388,10 @@ if (early_serial_setup(&port) != 0) { printk("Early serial init of port 3 failed\n"); } + +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(3, &port); +#endif } static void __init @@ -435,8 +449,5 @@ ppc_md.nvram_read_val = todc_direct_read_val; ppc_md.nvram_write_val = todc_direct_write_val; -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = bamboo_early_serial_map; -#endif } diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c linux-2.6.22-591/arch/ppc/platforms/4xx/bubinga.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/bubinga.c 2007-12-21 15:36:11.000000000 -0500 @@ -4,7 +4,7 @@ * Author: SAW (IBM), derived from walnut.c. * Maintained by MontaVista Software * - * 2003 (c) MontaVista Softare Inc. This file is licensed under the + * 2003-2004 (c) MontaVista Softare Inc. This file is licensed under the * terms of the GNU General Public License version 2. This program is * licensed "as is" without any warranty of any kind, whether express * or implied. @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include @@ -100,17 +100,26 @@ port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; port.line = 0; - if (early_serial_setup(&port) != 0) { +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&port) != 0) printk("Early serial init of port 0 failed\n"); - } +#endif + +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &port); +#endif port.membase = (void*)ACTING_UART1_IO_BASE; port.irq = ACTING_UART1_INT; port.line = 1; - if (early_serial_setup(&port) != 0) { +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&port) != 0) printk("Early serial init of port 1 failed\n"); - } +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &port); +#endif } void __init @@ -257,8 +266,4 @@ ppc_md.nvram_read_val = todc_direct_read_val; ppc_md.nvram_write_val = todc_direct_write_val; #endif -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = bubinga_early_serial_map; -#endif } - diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c linux-2.6.22-591/arch/ppc/platforms/4xx/ebony.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/ebony.c 2007-12-21 15:36:11.000000000 -0500 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -226,14 +227,20 @@ port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; port.line = 0; - if (early_serial_setup(&port) != 0) { +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&port) != 0) printk("Early serial init of port 0 failed\n"); - } +#endif -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(0, &port); +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &port); +#endif +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) /* Purge TLB entry added in head_44x.S for early serial access */ _tlbie(UART0_IO_BASE); #endif @@ -243,14 +250,18 @@ port.uartclk = clocks.uart1; port.line = 1; - if (early_serial_setup(&port) != 0) { +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&port) != 1) printk("Early serial init of port 1 failed\n"); - } +#endif -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(1, &port); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &port); +#endif } static void __init @@ -327,8 +338,4 @@ ppc_md.nvram_read_val = todc_direct_read_val; ppc_md.nvram_write_val = todc_direct_write_val; -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = ebony_early_serial_map; -#endif } - diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c linux-2.6.22-591/arch/ppc/platforms/4xx/luan.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/luan.c 2007-12-21 15:36:11.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -283,6 +284,9 @@ if (early_serial_setup(&port) != 0) { printk("Early serial init of port 0 failed\n"); } +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &port); +#endif port.membase = ioremap64(PPC440SP_UART1_ADDR, 8); port.irq = UART1_INT; @@ -292,6 +296,9 @@ if (early_serial_setup(&port) != 0) { printk("Early serial init of port 1 failed\n"); } +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &port); +#endif port.membase = ioremap64(PPC440SP_UART2_ADDR, 8); port.irq = UART2_INT; @@ -301,6 +308,9 @@ if (early_serial_setup(&port) != 0) { printk("Early serial init of port 2 failed\n"); } +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(2, &port); +#endif } static void __init @@ -360,7 +370,4 @@ ppc_md.get_irq = NULL; /* Set in ppc4xx_pic_init() */ ppc_md.calibrate_decr = luan_calibrate_decr; -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = luan_early_serial_map; -#endif } diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c linux-2.6.22-591/arch/ppc/platforms/4xx/ocotea.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/ocotea.c 2007-12-21 15:36:11.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -249,14 +250,20 @@ port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; port.line = 0; - if (early_serial_setup(&port) != 0) { +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&port) != 0) printk("Early serial init of port 0 failed\n"); - } +#endif -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(0, &port); +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &port); +#endif +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) /* Purge TLB entry added in head_44x.S for early serial access */ _tlbie(UART0_IO_BASE); #endif @@ -266,14 +273,18 @@ port.uartclk = clocks.uart1; port.line = 1; - if (early_serial_setup(&port) != 0) { +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&port) != 1) printk("Early serial init of port 1 failed\n"); - } +#endif -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(1, &port); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &port); +#endif } static void __init @@ -343,8 +354,5 @@ ppc_md.nvram_read_val = todc_direct_read_val; ppc_md.nvram_write_val = todc_direct_write_val; -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = ocotea_early_serial_map; -#endif ppc_md.init = ocotea_init; } diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c linux-2.6.22-591/arch/ppc/platforms/4xx/taishan.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/taishan.c 2007-12-21 15:36:11.000000000 -0500 @@ -310,7 +310,7 @@ if (early_serial_setup(&port) != 0) printk("Early serial init of port 0 failed\n"); -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(0, &port); @@ -326,7 +326,7 @@ if (early_serial_setup(&port) != 0) printk("Early serial init of port 1 failed\n"); -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG /* Configure debug serial access */ gen550_init(1, &port); #endif @@ -387,9 +387,6 @@ ppc_md.calibrate_decr = taishan_calibrate_decr; -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = taishan_early_serial_map; -#endif ppc_md.init = taishan_init; } diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml300.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml300.c 2007-12-21 15:36:11.000000000 -0500 @@ -16,6 +16,8 @@ #include #include #include +#include + #include #include @@ -41,9 +43,6 @@ * ppc4xx_map_io arch/ppc/syslib/ppc4xx_setup.c * start_kernel init/main.c * setup_arch arch/ppc/kernel/setup.c - * #if defined(CONFIG_KGDB) - * *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc - * #endif * *ppc_md.setup_arch == ml300_setup_arch this file * ppc4xx_setup_arch arch/ppc/syslib/ppc4xx_setup.c * ppc4xx_find_bridges arch/ppc/syslib/ppc405_pci.c diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml403.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml403.c 2007-12-21 15:36:11.000000000 -0500 @@ -43,9 +43,6 @@ * ppc4xx_map_io arch/ppc/syslib/ppc4xx_setup.c * start_kernel init/main.c * setup_arch arch/ppc/kernel/setup.c - * #if defined(CONFIG_KGDB) - * *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc - * #endif * *ppc_md.setup_arch == ml403_setup_arch this file * ppc4xx_setup_arch arch/ppc/syslib/ppc4xx_setup.c * ppc4xx_find_bridges arch/ppc/syslib/ppc405_pci.c diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c linux-2.6.22-591/arch/ppc/platforms/4xx/yucca.c --- linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/4xx/yucca.c 2007-12-21 15:36:11.000000000 -0500 @@ -386,7 +386,4 @@ ppc_md.get_irq = NULL; /* Set in ppc4xx_pic_init() */ ppc_md.calibrate_decr = yucca_calibrate_decr; -#ifdef CONFIG_KGDB - ppc_md.early_serial_map = yucca_early_serial_map; -#endif } diff -Nurb linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c linux-2.6.22-591/arch/ppc/platforms/83xx/mpc834x_sys.c --- linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/83xx/mpc834x_sys.c 2007-12-21 15:36:11.000000000 -0500 @@ -42,11 +42,11 @@ #include #include #include -#include #include #include #include +#include #ifndef CONFIG_PCI unsigned long isa_io_base = 0; @@ -114,7 +114,9 @@ /* setup PCI host bridges */ mpc83xx_setup_hose(); #endif +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) mpc83xx_early_serial_map(); +#endif /* setup the board related info for the MDIO bus */ mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC83xx_MDIO); @@ -334,7 +336,6 @@ ppc_md.get_rtc_time = NULL; ppc_md.calibrate_decr = mpc83xx_calibrate_decr; - ppc_md.early_serial_map = mpc83xx_early_serial_map; #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8540_ads.c --- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8540_ads.c 2007-12-21 15:36:11.000000000 -0500 @@ -43,11 +43,11 @@ #include #include #include -#include #include #include #include +#include /* ************************************************************************ * @@ -77,7 +77,7 @@ mpc85xx_setup_hose(); #endif -#ifdef CONFIG_SERIAL_8250 +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) mpc85xx_early_serial_map(); #endif @@ -215,9 +215,6 @@ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ -#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) - ppc_md.early_serial_map = mpc85xx_early_serial_map; -#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ if (ppc_md.progress) ppc_md.progress("mpc8540ads_init(): exit", 0); diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8560_ads.c --- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8560_ads.c 2007-12-21 15:36:11.000000000 -0500 @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc85xx_cds_common.c --- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2007-12-21 15:36:11.000000000 -0500 @@ -47,12 +47,12 @@ #include #include #include -#include #include #include #include #include +#include #ifndef CONFIG_PCI @@ -436,7 +436,7 @@ mpc85xx_setup_hose(); #endif -#ifdef CONFIG_SERIAL_8250 +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) mpc85xx_early_serial_map(); #endif @@ -590,9 +590,6 @@ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ -#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) - ppc_md.early_serial_map = mpc85xx_early_serial_map; -#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ if (ppc_md.progress) ppc_md.progress("mpc85xx_cds_init(): exit", 0); diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c linux-2.6.22-591/arch/ppc/platforms/85xx/sbc8560.c --- linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/85xx/sbc8560.c 2007-12-21 15:36:11.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -43,14 +44,13 @@ #include #include #include -#include #include #include #include #include +#include -#ifdef CONFIG_SERIAL_8250 static void __init sbc8560_early_serial_map(void) { @@ -66,12 +66,16 @@ uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART0_SIZE); uart_req.type = PORT_16650; -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) - gen550_init(0, &uart_req); -#endif - +#ifdef CONFIG_SERIAL_8250 if (early_serial_setup(&uart_req) != 0) printk("Early serial init of port 0 failed\n"); +#endif +#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(0, &uart_req); +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &uart_req); +#endif /* Assume early_serial_setup() doesn't modify uart_req */ uart_req.line = 1; @@ -79,14 +83,17 @@ uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART1_SIZE); uart_req.irq = MPC85xx_IRQ_EXT10; -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) - gen550_init(1, &uart_req); -#endif - +#ifdef CONFIG_SERIAL_8250 if (early_serial_setup(&uart_req) != 0) - printk("Early serial init of port 1 failed\n"); -} + printk("Early serial init of port 0 failed\n"); #endif +#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(0, &uart_req); +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &uart_req); +#endif +} /* ************************************************************************ * @@ -115,9 +122,7 @@ /* setup PCI host bridges */ mpc85xx_setup_hose(); #endif -#ifdef CONFIG_SERIAL_8250 sbc8560_early_serial_map(); -#endif #ifdef CONFIG_SERIAL_TEXT_DEBUG /* Invalidate the entry we stole earlier the serial ports * should be properly mapped */ @@ -224,9 +229,6 @@ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ -#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) - ppc_md.early_serial_map = sbc8560_early_serial_map; -#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ if (ppc_md.progress) ppc_md.progress("sbc8560_init(): exit", 0); diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c linux-2.6.22-591/arch/ppc/platforms/85xx/tqm85xx.c --- linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/85xx/tqm85xx.c 2007-12-21 15:36:11.000000000 -0500 @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -55,6 +54,7 @@ #include #include #include +#include #ifndef CONFIG_PCI unsigned long isa_io_base = 0; @@ -121,7 +121,7 @@ #endif #ifndef CONFIG_MPC8560 -#if defined(CONFIG_SERIAL_8250) +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) mpc85xx_early_serial_map(); #endif @@ -400,9 +400,6 @@ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ -#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) - ppc_md.early_serial_map = mpc85xx_early_serial_map; -#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ #endif /* CONFIG_MPC8560 */ if (ppc_md.progress) diff -Nurb linux-2.6.22-570/arch/ppc/platforms/apus_setup.c linux-2.6.22-591/arch/ppc/platforms/apus_setup.c --- linux-2.6.22-570/arch/ppc/platforms/apus_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/apus_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -598,12 +598,6 @@ ciab.ddra |= (SER_DTR | SER_RTS); /* outputs */ ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR); /* inputs */ -#ifdef CONFIG_KGDB - /* turn Rx interrupts on for GDB */ - amiga_custom.intena = IF_SETCLR | IF_RBF; - ser_RTSon(); -#endif - return 0; } diff -Nurb linux-2.6.22-570/arch/ppc/platforms/chestnut.c linux-2.6.22-591/arch/ppc/platforms/chestnut.c --- linux-2.6.22-570/arch/ppc/platforms/chestnut.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/chestnut.c 2007-12-21 15:36:11.000000000 -0500 @@ -34,9 +34,9 @@ #include #include #include -#include #include #include +#include #include static void __iomem *sram_base; /* Virtual addr of Internal SRAM */ @@ -492,7 +492,7 @@ static void __init chestnut_map_io(void) { -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) io_block_mapping(CHESTNUT_UART_BASE, CHESTNUT_UART_BASE, 0x100000, _PAGE_IO); #endif @@ -566,9 +566,6 @@ #if defined(CONFIG_SERIAL_TEXT_DEBUG) ppc_md.progress = gen550_progress; #endif -#if defined(CONFIG_KGDB) - ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; -#endif if (ppc_md.progress) ppc_md.progress("chestnut_init(): exit", 0); diff -Nurb linux-2.6.22-570/arch/ppc/platforms/ev64260.c linux-2.6.22-591/arch/ppc/platforms/ev64260.c --- linux-2.6.22-570/arch/ppc/platforms/ev64260.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/ev64260.c 2007-12-21 15:36:11.000000000 -0500 @@ -330,7 +330,7 @@ port.iotype = UPIO_MEM; port.flags = STD_COM_FLAGS; -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG gen550_init(0, &port); #endif @@ -568,7 +568,7 @@ return; } -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#ifdef CONFIG_SERIAL_TEXT_DEBUG static void __init ev64260_map_io(void) { @@ -624,20 +624,12 @@ ppc_md.setup_io_mappings = ev64260_map_io; ppc_md.progress = gen550_progress; #endif -#if defined(CONFIG_KGDB) - ppc_md.setup_io_mappings = ev64260_map_io; - ppc_md.early_serial_map = ev64260_early_serial_map; -#endif #elif defined(CONFIG_SERIAL_MPSC_CONSOLE) #ifdef CONFIG_SERIAL_TEXT_DEBUG ppc_md.setup_io_mappings = ev64260_map_io; ppc_md.progress = mv64x60_mpsc_progress; mv64x60_progress_init(CONFIG_MV64X60_NEW_BASE); #endif /* CONFIG_SERIAL_TEXT_DEBUG */ -#ifdef CONFIG_KGDB - ppc_md.setup_io_mappings = ev64260_map_io; - ppc_md.early_serial_map = ev64260_early_serial_map; -#endif /* CONFIG_KGDB */ #endif diff -Nurb linux-2.6.22-570/arch/ppc/platforms/hdpu.c linux-2.6.22-591/arch/ppc/platforms/hdpu.c --- linux-2.6.22-570/arch/ppc/platforms/hdpu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/hdpu.c 2007-12-21 15:36:11.000000000 -0500 @@ -281,25 +281,6 @@ #if defined(CONFIG_SERIAL_MPSC_CONSOLE) static void __init hdpu_early_serial_map(void) { -#ifdef CONFIG_KGDB - static char first_time = 1; - -#if defined(CONFIG_KGDB_TTYS0) -#define KGDB_PORT 0 -#elif defined(CONFIG_KGDB_TTYS1) -#define KGDB_PORT 1 -#else -#error "Invalid kgdb_tty port" -#endif - - if (first_time) { - gt_early_mpsc_init(KGDB_PORT, - B9600 | CS8 | CREAD | HUPCL | CLOCAL); - first_time = 0; - } - - return; -#endif } #endif diff -Nurb linux-2.6.22-570/arch/ppc/platforms/lopec.c linux-2.6.22-591/arch/ppc/platforms/lopec.c --- linux-2.6.22-570/arch/ppc/platforms/lopec.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/lopec.c 2007-12-21 15:36:11.000000000 -0500 @@ -32,7 +32,8 @@ #include #include #include -#include + +#include /* * Define all of the IRQ senses and polarities. Taken from the diff -Nurb linux-2.6.22-570/arch/ppc/platforms/pplus.c linux-2.6.22-591/arch/ppc/platforms/pplus.c --- linux-2.6.22-570/arch/ppc/platforms/pplus.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/pplus.c 2007-12-21 15:36:11.000000000 -0500 @@ -35,9 +35,9 @@ #include #include #include -#include #include +#include #include "pplus.h" #undef DUMP_DBATS @@ -893,9 +893,6 @@ #ifdef CONFIG_SERIAL_TEXT_DEBUG ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_TEXT_DEBUG */ -#ifdef CONFIG_KGDB - ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; -#endif #ifdef CONFIG_SMP smp_ops = &pplus_smp_ops; #endif /* CONFIG_SMP */ diff -Nurb linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c linux-2.6.22-591/arch/ppc/platforms/radstone_ppc7d.c --- linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/radstone_ppc7d.c 2007-12-21 15:36:11.000000000 -0500 @@ -84,7 +84,7 @@ * Serial port code *****************************************************************************/ -#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) +#ifdef CONFIG_SERIAL_TEXT_DEBUG static void __init ppc7d_early_serial_map(void) { #if defined(CONFIG_SERIAL_MPSC_CONSOLE) @@ -113,10 +113,10 @@ if (early_serial_setup(&serial_req) != 0) printk(KERN_ERR "Early serial init of port 1 failed\n"); #else -#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX +#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX #endif } -#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */ +#endif /* CONFIG_SERIAL_TEXT_DEBUG */ /***************************************************************************** * Low-level board support code @@ -1459,18 +1459,16 @@ PPC7D_CPLD_COMS_COM4_TXEN, PPC7D_CPLD_COMS); #endif /* CONFIG_SERIAL_MPSC */ -#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) - ppc7d_early_serial_map(); #ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc7d_early_serial_map(); #if defined(CONFIG_SERIAL_MPSC_CONSOLE) ppc_md.progress = mv64x60_mpsc_progress; #elif defined(CONFIG_SERIAL_8250) ppc_md.progress = gen550_progress; #else -#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX +#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX #endif /* CONFIG_SERIAL_8250 */ #endif /* CONFIG_SERIAL_TEXT_DEBUG */ -#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */ /* Enable write access to user flash. This is necessary for * flash probe. diff -Nurb linux-2.6.22-570/arch/ppc/platforms/sandpoint.c linux-2.6.22-591/arch/ppc/platforms/sandpoint.c --- linux-2.6.22-570/arch/ppc/platforms/sandpoint.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/sandpoint.c 2007-12-21 15:36:11.000000000 -0500 @@ -95,9 +95,9 @@ #include #include #include -#include #include +#include #include "sandpoint.h" /* Set non-zero if an X2 Sandpoint detected. */ @@ -730,9 +730,6 @@ ppc_md.nvram_read_val = todc_mc146818_read_val; ppc_md.nvram_write_val = todc_mc146818_write_val; -#ifdef CONFIG_KGDB - ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; -#endif #ifdef CONFIG_SERIAL_TEXT_DEBUG ppc_md.progress = gen550_progress; #endif diff -Nurb linux-2.6.22-570/arch/ppc/platforms/spruce.c linux-2.6.22-591/arch/ppc/platforms/spruce.c --- linux-2.6.22-570/arch/ppc/platforms/spruce.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/platforms/spruce.c 2007-12-21 15:36:11.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -37,9 +38,9 @@ #include #include #include -#include #include +#include #include "spruce.h" @@ -178,26 +179,32 @@ serial_req.membase = (u_char *)UART0_IO_BASE; serial_req.regshift = 0; -#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) - gen550_init(0, &serial_req); -#endif #ifdef CONFIG_SERIAL_8250 if (early_serial_setup(&serial_req) != 0) printk("Early serial init of port 0 failed\n"); #endif +#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(0, &serial_req); +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &port); +#endif /* Assume early_serial_setup() doesn't modify serial_req */ serial_req.line = 1; serial_req.irq = UART1_INT; serial_req.membase = (u_char *)UART1_IO_BASE; -#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) - gen550_init(1, &serial_req); -#endif #ifdef CONFIG_SERIAL_8250 if (early_serial_setup(&serial_req) != 0) printk("Early serial init of port 1 failed\n"); #endif +#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(1, &serial_req); +#endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &serial_req); +#endif } TODC_ALLOC(); @@ -316,7 +323,4 @@ #ifdef CONFIG_SERIAL_TEXT_DEBUG ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_TEXT_DEBUG */ -#ifdef CONFIG_KGDB - ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; -#endif } diff -Nurb linux-2.6.22-570/arch/ppc/syslib/Makefile linux-2.6.22-591/arch/ppc/syslib/Makefile --- linux-2.6.22-570/arch/ppc/syslib/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -77,7 +77,6 @@ obj-$(CONFIG_8260_PCI9) += m8260_pci_erratum9.o obj-$(CONFIG_CPM2) += cpm2_common.o cpm2_pic.o ifeq ($(CONFIG_PPC_GEN550),y) -obj-$(CONFIG_KGDB) += gen550_kgdb.o gen550_dbg.o obj-$(CONFIG_SERIAL_TEXT_DEBUG) += gen550_dbg.o endif ifeq ($(CONFIG_SERIAL_MPSC_CONSOLE),y) diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550.h linux-2.6.22-591/arch/ppc/syslib/gen550.h --- linux-2.6.22-570/arch/ppc/syslib/gen550.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/gen550.h 2007-12-21 15:36:11.000000000 -0500 @@ -11,4 +11,3 @@ extern void gen550_progress(char *, unsigned short); extern void gen550_init(int, struct uart_port *); -extern void gen550_kgdb_map_scc(void); diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c linux-2.6.22-591/arch/ppc/syslib/gen550_kgdb.c --- linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/gen550_kgdb.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,83 +0,0 @@ -/* - * Generic 16550 kgdb support intended to be useful on a variety - * of platforms. To enable this support, it is necessary to set - * the CONFIG_GEN550 option. Any virtual mapping of the serial - * port(s) to be used can be accomplished by setting - * ppc_md.early_serial_map to a platform-specific mapping function. - * - * Adapted from ppc4xx_kgdb.c. - * - * Author: Matt Porter - * - * 2002-2004 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include -#include - -#include - -extern unsigned long serial_init(int, void *); -extern unsigned long serial_getc(unsigned long); -extern unsigned long serial_putc(unsigned long, unsigned char); - -#if defined(CONFIG_KGDB_TTYS0) -#define KGDB_PORT 0 -#elif defined(CONFIG_KGDB_TTYS1) -#define KGDB_PORT 1 -#elif defined(CONFIG_KGDB_TTYS2) -#define KGDB_PORT 2 -#elif defined(CONFIG_KGDB_TTYS3) -#define KGDB_PORT 3 -#else -#error "invalid kgdb_tty port" -#endif - -static volatile unsigned int kgdb_debugport; - -void putDebugChar(unsigned char c) -{ - if (kgdb_debugport == 0) - kgdb_debugport = serial_init(KGDB_PORT, NULL); - - serial_putc(kgdb_debugport, c); -} - -int getDebugChar(void) -{ - if (kgdb_debugport == 0) - kgdb_debugport = serial_init(KGDB_PORT, NULL); - - return(serial_getc(kgdb_debugport)); -} - -void kgdb_interruptible(int enable) -{ - return; -} - -void putDebugString(char* str) -{ - while (*str != '\0') { - putDebugChar(*str); - str++; - } - putDebugChar('\r'); - return; -} - -/* - * Note: gen550_init() must be called already on the port we are going - * to use. - */ -void -gen550_kgdb_map_scc(void) -{ - printk(KERN_DEBUG "kgdb init\n"); - if (ppc_md.early_serial_map) - ppc_md.early_serial_map(); - kgdb_debugport = serial_init(KGDB_PORT, NULL); -} diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c linux-2.6.22-591/arch/ppc/syslib/ibm44x_common.c --- linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/ibm44x_common.c 2007-12-21 15:36:11.000000000 -0500 @@ -192,9 +192,6 @@ #ifdef CONFIG_SERIAL_TEXT_DEBUG ppc_md.progress = gen550_progress; #endif /* CONFIG_SERIAL_TEXT_DEBUG */ -#ifdef CONFIG_KGDB - ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; -#endif /* * The Abatron BDI JTAG debugger does not tolerate others diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60.c linux-2.6.22-591/arch/ppc/syslib/mv64x60.c --- linux-2.6.22-570/arch/ppc/syslib/mv64x60.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/mv64x60.c 2007-12-21 15:36:11.000000000 -0500 @@ -241,6 +241,12 @@ .end = MV64x60_IRQ_SDMA_0, .flags = IORESOURCE_IRQ, }, + [4] = { + .name = "mpsc 0 irq", + .start = MV64x60_IRQ_MPSC_0, + .end = MV64x60_IRQ_MPSC_0, + .flags = IORESOURCE_IRQ, + }, }; static struct platform_device mpsc0_device = { @@ -298,6 +304,12 @@ .end = MV64360_IRQ_SDMA_1, .flags = IORESOURCE_IRQ, }, + [4] = { + .name = "mpsc 1 irq", + .start = MV64360_IRQ_MPSC_1, + .end = MV64360_IRQ_MPSC_1, + .flags = IORESOURCE_IRQ, + }, }; static struct platform_device mpsc1_device = { @@ -1432,12 +1444,46 @@ static int __init mv64x60_add_pds(void) { - return platform_add_devices(mv64x60_pd_devs, - ARRAY_SIZE(mv64x60_pd_devs)); + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(mv64x60_pd_devs); i++) { + if (mv64x60_pd_devs[i]) { + ret = platform_device_register(mv64x60_pd_devs[i]); + } + if (ret) { + while (--i >= 0) + platform_device_unregister(mv64x60_pd_devs[i]); + break; + } + } + return ret; } arch_initcall(mv64x60_add_pds); /* + * mv64x60_early_get_pdev_data() + * + * Get the data associated with a platform device by name and number. + */ +struct platform_device * __init +mv64x60_early_get_pdev_data(const char *name, int id, int remove) +{ + int i; + struct platform_device *pdev; + + for (i = 0; i id == id && + !strcmp(pdev->name, name)) { + if (remove) + mv64x60_pd_devs[i] = NULL; + return pdev; + } + } + return NULL; +} + +/* ***************************************************************************** * * GT64260-Specific Routines @@ -1770,6 +1816,11 @@ r->start = MV64x60_IRQ_SDMA_0; r->end = MV64x60_IRQ_SDMA_0; } + if ((r = platform_get_resource(&mpsc1_device, IORESOURCE_IRQ, 1)) + != NULL) { + r->start = GT64260_IRQ_MPSC_1; + r->end = GT64260_IRQ_MPSC_1; + } #endif } @@ -2415,7 +2466,6 @@ .attr = { .name = "hs_reg", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = VAL_LEN_MAX, .read = mv64xxx_hs_reg_read, diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c linux-2.6.22-591/arch/ppc/syslib/mv64x60_dbg.c --- linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/mv64x60_dbg.c 2007-12-21 15:36:11.000000000 -0500 @@ -34,7 +34,7 @@ void mv64x60_progress_init(u32 base) { - mv64x60_dbg_bh.v_base = base; + mv64x60_dbg_bh.v_base = (void*)base; return; } @@ -69,53 +69,3 @@ return; } #endif /* CONFIG_SERIAL_TEXT_DEBUG */ - - -#if defined(CONFIG_KGDB) - -#if defined(CONFIG_KGDB_TTYS0) -#define KGDB_PORT 0 -#elif defined(CONFIG_KGDB_TTYS1) -#define KGDB_PORT 1 -#else -#error "Invalid kgdb_tty port" -#endif - -void -putDebugChar(unsigned char c) -{ - mv64x60_polled_putc(KGDB_PORT, (char)c); -} - -int -getDebugChar(void) -{ - unsigned char c; - - while (!mv64x60_polled_getc(KGDB_PORT, &c)); - return (int)c; -} - -void -putDebugString(char* str) -{ - while (*str != '\0') { - putDebugChar(*str); - str++; - } - putDebugChar('\r'); - return; -} - -void -kgdb_interruptible(int enable) -{ -} - -void -kgdb_map_scc(void) -{ - if (ppc_md.early_serial_map) - ppc_md.early_serial_map(); -} -#endif /* CONFIG_KGDB */ diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc4xx_setup.c --- linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/ppc4xx_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc83xx_setup.c --- linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/ppc83xx_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -30,12 +30,12 @@ #include /* for linux/serial_core.h */ #include #include +#include #include #include #include #include -#include #include #include @@ -44,6 +44,7 @@ #include #include #endif +#include phys_addr_t immrbar; @@ -87,11 +88,11 @@ tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); } -#ifdef CONFIG_SERIAL_8250 +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) void __init mpc83xx_early_serial_map(void) { -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) struct uart_port serial_req; #endif struct plat_serial8250_port *pdata; @@ -103,27 +104,40 @@ pdata[0].mapbase += binfo->bi_immr_base; pdata[0].membase = ioremap(pdata[0].mapbase, 0x100); -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) memset(&serial_req, 0, sizeof (serial_req)); serial_req.iotype = UPIO_MEM; serial_req.mapbase = pdata[0].mapbase; serial_req.membase = pdata[0].membase; serial_req.regshift = 0; + serial_req.irq = pdata[0].irq; + serial_req.flags = pdata[0].flags; + serial_req.uartclk = pdata[0].uartclk; +#ifdef CONFIG_SERIAL_TEXT_DEBUG gen550_init(0, &serial_req); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &serial_req); +#endif +#endif pdata[1].uartclk = binfo->bi_busfreq; pdata[1].mapbase += binfo->bi_immr_base; pdata[1].membase = ioremap(pdata[1].mapbase, 0x100); -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) /* Assume gen550_init() doesn't modify serial_req */ serial_req.mapbase = pdata[1].mapbase; serial_req.membase = pdata[1].membase; +#ifdef CONFIG_SERIAL_TEXT_DEBUG gen550_init(1, &serial_req); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &serial_req); +#endif +#endif } #endif diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc85xx_setup.c --- linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/ppc/syslib/ppc85xx_setup.c 2007-12-21 15:36:11.000000000 -0500 @@ -19,16 +19,17 @@ #include /* for linux/serial_core.h */ #include #include +#include #include #include #include #include #include -#include #include #include +#include extern void abort(void); @@ -69,11 +70,11 @@ mtspr(SPRN_TCR, TCR_DIE); } -#ifdef CONFIG_SERIAL_8250 +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) void __init mpc85xx_early_serial_map(void) { -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) struct uart_port serial_req; #endif struct plat_serial8250_port *pdata; @@ -85,27 +86,40 @@ pdata[0].mapbase += binfo->bi_immr_base; pdata[0].membase = ioremap(pdata[0].mapbase, MPC85xx_UART0_SIZE); -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) memset(&serial_req, 0, sizeof (serial_req)); serial_req.iotype = UPIO_MEM; serial_req.mapbase = pdata[0].mapbase; serial_req.membase = pdata[0].membase; serial_req.regshift = 0; + serial_req.irq = pdata[0].irq; + serial_req.flags = pdata[0].flags; + serial_req.uartclk = pdata[0].uartclk; +#ifdef CONFIG_SERIAL_TEXT_DEBUG gen550_init(0, &serial_req); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(0, &serial_req); +#endif +#endif pdata[1].uartclk = binfo->bi_busfreq; pdata[1].mapbase += binfo->bi_immr_base; pdata[1].membase = ioremap(pdata[1].mapbase, MPC85xx_UART0_SIZE); -#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) /* Assume gen550_init() doesn't modify serial_req */ serial_req.mapbase = pdata[1].mapbase; serial_req.membase = pdata[1].membase; +#ifdef CONFIG_SERIAL_TEXT_DEBUG gen550_init(1, &serial_req); #endif +#ifdef CONFIG_KGDB_8250 + kgdb8250_add_port(1, &serial_req); +#endif +#endif } #endif @@ -363,5 +377,3 @@ return; } #endif /* CONFIG_PCI */ - - diff -Nurb linux-2.6.22-570/arch/s390/appldata/appldata_net_sum.c linux-2.6.22-591/arch/s390/appldata/appldata_net_sum.c --- linux-2.6.22-570/arch/s390/appldata/appldata_net_sum.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/s390/appldata/appldata_net_sum.c 2007-12-21 15:36:14.000000000 -0500 @@ -16,6 +16,7 @@ #include #include #include +#include #include "appldata.h" @@ -107,7 +108,7 @@ tx_dropped = 0; collisions = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { stats = dev->get_stats(dev); rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; diff -Nurb linux-2.6.22-570/arch/s390/kernel/ipl.c linux-2.6.22-591/arch/s390/kernel/ipl.c --- linux-2.6.22-570/arch/s390/kernel/ipl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/s390/kernel/ipl.c 2007-12-21 15:36:11.000000000 -0500 @@ -314,7 +314,6 @@ .attr = { .name = "binary_parameter", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = PAGE_SIZE, .read = &ipl_parameter_read, @@ -338,7 +337,6 @@ .attr = { .name = "scp_data", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = PAGE_SIZE, .read = &ipl_scp_data_read, diff -Nurb linux-2.6.22-570/arch/sh/Kconfig.debug linux-2.6.22-591/arch/sh/Kconfig.debug --- linux-2.6.22-570/arch/sh/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 @@ -78,82 +78,4 @@ on the VM subsystem for higher order allocations. This option will also use IRQ stacks to compensate for the reduced stackspace. -config SH_KGDB - bool "Include KGDB kernel debugger" - select FRAME_POINTER - select DEBUG_INFO - help - Include in-kernel hooks for kgdb, the Linux kernel source level - debugger. See for more information. - Unless you are intending to debug the kernel, say N here. - -menu "KGDB configuration options" - depends on SH_KGDB - -config MORE_COMPILE_OPTIONS - bool "Add any additional compile options" - help - If you want to add additional CFLAGS to the kernel build, enable this - option and then enter what you would like to add in the next question. - Note however that -g is already appended with the selection of KGDB. - -config COMPILE_OPTIONS - string "Additional compile arguments" - depends on MORE_COMPILE_OPTIONS - -config KGDB_NMI - bool "Enter KGDB on NMI" - default n - -config SH_KGDB_CONSOLE - bool "Console messages through GDB" - depends on !SERIAL_SH_SCI_CONSOLE - select SERIAL_CORE_CONSOLE - default n - -config KGDB_SYSRQ - bool "Allow SysRq 'G' to enter KGDB" - default y - -comment "Serial port setup" - -config KGDB_DEFPORT - int "Port number (ttySCn)" - default "1" - -config KGDB_DEFBAUD - int "Baud rate" - default "115200" - -choice - prompt "Parity" - depends on SH_KGDB - default KGDB_DEFPARITY_N - -config KGDB_DEFPARITY_N - bool "None" - -config KGDB_DEFPARITY_E - bool "Even" - -config KGDB_DEFPARITY_O - bool "Odd" - -endchoice - -choice - prompt "Data bits" - depends on SH_KGDB - default KGDB_DEFBITS_8 - -config KGDB_DEFBITS_8 - bool "8" - -config KGDB_DEFBITS_7 - bool "7" - -endchoice - -endmenu - endmenu diff -Nurb linux-2.6.22-570/arch/sh/kernel/Makefile linux-2.6.22-591/arch/sh/kernel/Makefile --- linux-2.6.22-570/arch/sh/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -15,7 +15,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_CF_ENABLER) += cf-enabler.o obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o -obj-$(CONFIG_SH_KGDB) += kgdb_stub.o kgdb_jmp.o +obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o obj-$(CONFIG_SH_CPU_FREQ) += cpufreq.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff -Nurb linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S linux-2.6.22-591/arch/sh/kernel/cpu/sh3/ex.S --- linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/kernel/cpu/sh3/ex.S 2007-12-21 15:36:11.000000000 -0500 @@ -45,7 +45,7 @@ .long exception_error ! reserved_instruction (filled by trap_init) /* 180 */ .long exception_error ! illegal_slot_instruction (filled by trap_init) /*1A0*/ ENTRY(nmi_slot) -#if defined (CONFIG_KGDB_NMI) +#if defined (CONFIG_KGDB) .long debug_enter /* 1C0 */ ! Allow trap to debugger #else .long exception_none /* 1C0 */ ! Not implemented yet diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S linux-2.6.22-591/arch/sh/kernel/kgdb-jmp.S --- linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/sh/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,32 @@ +#include + +ENTRY(kgdb_fault_setjmp) + add #(9*4), r4 + sts.l pr, @-r4 + mov.l r15, @-r4 + mov.l r14, @-r4 + mov.l r13, @-r4 + mov.l r12, @-r4 + mov.l r11, @-r4 + mov.l r10, @-r4 + mov.l r9, @-r4 + mov.l r8, @-r4 + rts + mov #0, r0 + +ENTRY(kgdb_fault_longjmp) + mov.l @r4+, r8 + mov.l @r4+, r9 + mov.l @r4+, r10 + mov.l @r4+, r11 + mov.l @r4+, r12 + mov.l @r4+, r13 + mov.l @r4+, r14 + mov.l @r4+, r15 + lds.l @r4+, pr + mov r5, r0 + tst r0, r0 + bf 1f + mov #1, r0 +1: rts + nop diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb.c linux-2.6.22-591/arch/sh/kernel/kgdb.c --- linux-2.6.22-570/arch/sh/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/sh/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,363 @@ +/* + * arch/sh/kernel/kgdb.c + * + * Contains SH-specific low-level support for KGDB. + * + * Containes extracts from code by Glenn Engel, Jim Kingdon, + * David Grothe , Tigran Aivazian , + * Amit S. Kale , William Gatliff , + * Ben Lee, Steve Chamberlain and Benoit Miller , + * Henry Bell and Jeremy Siegel + * + * Maintainer: Tom Rini + * + * 2004 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern void per_cpu_trap_init(void); +extern atomic_t cpu_doing_single_step; + +/* Function pointers for linkage */ +static struct kgdb_regs trap_registers; + +/* Globals. */ +char in_nmi; /* Set during NMI to prevent reentry */ + +/* TRA differs sh3/4 */ +#if defined(CONFIG_CPU_SH3) +#define TRA 0xffffffd0 +#elif defined(CONFIG_CPU_SH4) +#define TRA 0xff000020 +#endif + +/* Macros for single step instruction identification */ +#define OPCODE_BT(op) (((op) & 0xff00) == 0x8900) +#define OPCODE_BF(op) (((op) & 0xff00) == 0x8b00) +#define OPCODE_BTF_DISP(op) (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \ + (((op) & 0x7f ) << 1)) +#define OPCODE_BFS(op) (((op) & 0xff00) == 0x8f00) +#define OPCODE_BTS(op) (((op) & 0xff00) == 0x8d00) +#define OPCODE_BRA(op) (((op) & 0xf000) == 0xa000) +#define OPCODE_BRA_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ + (((op) & 0x7ff) << 1)) +#define OPCODE_BRAF(op) (((op) & 0xf0ff) == 0x0023) +#define OPCODE_BRAF_REG(op) (((op) & 0x0f00) >> 8) +#define OPCODE_BSR(op) (((op) & 0xf000) == 0xb000) +#define OPCODE_BSR_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ + (((op) & 0x7ff) << 1)) +#define OPCODE_BSRF(op) (((op) & 0xf0ff) == 0x0003) +#define OPCODE_BSRF_REG(op) (((op) >> 8) & 0xf) +#define OPCODE_JMP(op) (((op) & 0xf0ff) == 0x402b) +#define OPCODE_JMP_REG(op) (((op) >> 8) & 0xf) +#define OPCODE_JSR(op) (((op) & 0xf0ff) == 0x400b) +#define OPCODE_JSR_REG(op) (((op) >> 8) & 0xf) +#define OPCODE_RTS(op) ((op) == 0xb) +#define OPCODE_RTE(op) ((op) == 0x2b) + +#define SR_T_BIT_MASK 0x1 +#define STEP_OPCODE 0xc320 +#define BIOS_CALL_TRAP 0x3f + +/* Exception codes as per SH-4 core manual */ +#define ADDRESS_ERROR_LOAD_VEC 7 +#define ADDRESS_ERROR_STORE_VEC 8 +#define TRAP_VEC 11 +#define INVALID_INSN_VEC 12 +#define INVALID_SLOT_VEC 13 +#define NMI_VEC 14 +#define SERIAL_BREAK_VEC 58 + +/* Misc static */ +static int stepped_address; +static short stepped_opcode; + +/* Translate SH-3/4 exception numbers to unix-like signal values */ +static int compute_signal(const int excep_code) +{ + switch (excep_code) { + case INVALID_INSN_VEC: + case INVALID_SLOT_VEC: + return SIGILL; + case ADDRESS_ERROR_LOAD_VEC: + case ADDRESS_ERROR_STORE_VEC: + return SIGSEGV; + case SERIAL_BREAK_VEC: + case NMI_VEC: + return SIGINT; + default: + /* Act like it was a break/trap. */ + return SIGTRAP; + } +} + +/* + * Translate the registers of the system into the format that GDB wants. Since + * we use a local structure to store things, instead of getting them out + * of pt_regs, we can just do a memcpy. + */ +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *ign) +{ + memcpy(gdb_regs, &trap_registers, sizeof(trap_registers)); +} + +/* + * On SH we save: r1 (prev->thread.sp) r2 (prev->thread.pc) r4 (prev) r5 (next) + * r6 (next->thread.sp) r7 (next->thread.pc) + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + int count; + + for (count = 0; count < 16; count++) + *(gdb_regs++) = 0; + *(gdb_regs++) = p->thread.pc; + *(gdb_regs++) = 0; + *(gdb_regs++) = 0; + *(gdb_regs++) = 0; + *(gdb_regs++) = 0; + *(gdb_regs++) = 0; + *(gdb_regs++) = 0; +} + +/* + * Translate the registers values that GDB has given us back into the + * format of the system. See the comment above about memcpy. + */ +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *ign) +{ + memcpy(&trap_registers, gdb_regs, sizeof(trap_registers)); +} + +/* Calculate the new address for after a step */ +static short *get_step_address(void) +{ + short op = *(short *)trap_registers.pc; + long addr; + + /* BT */ + if (OPCODE_BT(op)) { + if (trap_registers.sr & SR_T_BIT_MASK) + addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); + else + addr = trap_registers.pc + 2; + } + + /* BTS */ + else if (OPCODE_BTS(op)) { + if (trap_registers.sr & SR_T_BIT_MASK) + addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); + else + addr = trap_registers.pc + 4; /* Not in delay slot */ + } + + /* BF */ + else if (OPCODE_BF(op)) { + if (!(trap_registers.sr & SR_T_BIT_MASK)) + addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); + else + addr = trap_registers.pc + 2; + } + + /* BFS */ + else if (OPCODE_BFS(op)) { + if (!(trap_registers.sr & SR_T_BIT_MASK)) + addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); + else + addr = trap_registers.pc + 4; /* Not in delay slot */ + } + + /* BRA */ + else if (OPCODE_BRA(op)) + addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op); + + /* BRAF */ + else if (OPCODE_BRAF(op)) + addr = trap_registers.pc + 4 + + trap_registers.regs[OPCODE_BRAF_REG(op)]; + + /* BSR */ + else if (OPCODE_BSR(op)) + addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op); + + /* BSRF */ + else if (OPCODE_BSRF(op)) + addr = trap_registers.pc + 4 + + trap_registers.regs[OPCODE_BSRF_REG(op)]; + + /* JMP */ + else if (OPCODE_JMP(op)) + addr = trap_registers.regs[OPCODE_JMP_REG(op)]; + + /* JSR */ + else if (OPCODE_JSR(op)) + addr = trap_registers.regs[OPCODE_JSR_REG(op)]; + + /* RTS */ + else if (OPCODE_RTS(op)) + addr = trap_registers.pr; + + /* RTE */ + else if (OPCODE_RTE(op)) + addr = trap_registers.regs[15]; + + /* Other */ + else + addr = trap_registers.pc + 2; + + kgdb_flush_icache_range(addr, addr + 2); + return (short *)addr; +} + +/* The command loop, read and act on requests */ +int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *ign) +{ + unsigned long addr; + char *ptr = &remcom_in_buffer[1]; + + /* Examine first char of buffer to see what we need to do */ + switch (remcom_in_buffer[0]) { + case 'c': /* Continue at address AA..AA (optional) */ + case 's': /* Step one instruction from AA..AA */ + /* Try to read optional parameter, PC unchanged if none */ + if (kgdb_hex2long(&ptr, &addr)) + trap_registers.pc = addr; + + atomic_set(&cpu_doing_single_step, -1); + if (remcom_in_buffer[0] == 's') { + /* Replace the instruction immediately after the + * current instruction (i.e. next in the expected + * flow of control) with a trap instruction, so that + * returning will cause only a single instruction to + * be executed. Note that this model is slightly + * broken for instructions with delay slots + * (e.g. B[TF]S, BSR, BRA etc), where both the branch + * and the instruction in the delay slot will be + * executed. + */ + /* Determine where the target instruction will send + * us to */ + unsigned short *next_addr = get_step_address(); + stepped_address = (int)next_addr; + + /* Replace it */ + stepped_opcode = *(short *)next_addr; + *next_addr = STEP_OPCODE; + + /* Flush and return */ + kgdb_flush_icache_range((long)next_addr, + (long)next_addr + 2); + if (kgdb_contthread) + atomic_set(&cpu_doing_single_step, + smp_processor_id()); + } + return 0; + } + return -1; +} + +/* + * When an exception has occured, we are called. We need to set things + * up so that we can call kgdb_handle_exception to handle requests from + * the remote GDB. + */ +void kgdb_exception_handler(struct pt_regs *regs) +{ + int excep_code, vbr_val; + int count; + + /* Copy kernel regs (from stack) */ + for (count = 0; count < 16; count++) + trap_registers.regs[count] = regs->regs[count]; + trap_registers.pc = regs->pc; + trap_registers.pr = regs->pr; + trap_registers.sr = regs->sr; + trap_registers.gbr = regs->gbr; + trap_registers.mach = regs->mach; + trap_registers.macl = regs->macl; + + __asm__ __volatile__("stc vbr, %0":"=r"(vbr_val)); + trap_registers.vbr = vbr_val; + + /* Get the execption code. */ + __asm__ __volatile__("stc r2_bank, %0":"=r"(excep_code)); + + excep_code >>= 5; + + /* If we got an NMI, and KGDB is not yet initialized, call + * breakpoint() to try and initialize everything for us. */ + if (excep_code == NMI_VEC && !kgdb_initialized) { + breakpoint(); + return; + } + + /* TRAP_VEC exception indicates a software trap inserted in place of + * code by GDB so back up PC by one instruction, as this instruction + * will later be replaced by its original one. Do NOT do this for + * trap 0xff, since that indicates a compiled-in breakpoint which + * will not be replaced (and we would retake the trap forever) */ + if (excep_code == TRAP_VEC && + (*(volatile unsigned long *)TRA != (0xff << 2))) + trap_registers.pc -= 2; + + /* If we have been single-stepping, put back the old instruction. + * We use stepped_address in case we have stopped more than one + * instruction away. */ + if (stepped_opcode != 0) { + *(short *)stepped_address = stepped_opcode; + kgdb_flush_icache_range(stepped_address, stepped_address + 2); + } + stepped_opcode = 0; + + /* Call the stub to do the processing. Note that not everything we + * need to send back and forth lives in pt_regs. */ + kgdb_handle_exception(excep_code, compute_signal(excep_code), 0, regs); + + /* Copy back the (maybe modified) registers */ + for (count = 0; count < 16; count++) + regs->regs[count] = trap_registers.regs[count]; + regs->pc = trap_registers.pc; + regs->pr = trap_registers.pr; + regs->sr = trap_registers.sr; + regs->gbr = trap_registers.gbr; + regs->mach = trap_registers.mach; + regs->macl = trap_registers.macl; + + vbr_val = trap_registers.vbr; + __asm__ __volatile__("ldc %0, vbr": :"r"(vbr_val)); +} + +int __init kgdb_arch_init(void) +{ + per_cpu_trap_init(); + + return 0; +} + +struct kgdb_arch arch_kgdb_ops = { +#ifdef CONFIG_CPU_LITTLE_ENDIAN + .gdb_bpt_instr = {0xff, 0xc3}, +#else + .gdb_bpt_instr = {0xc3, 0xff}, +#endif +}; diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S linux-2.6.22-591/arch/sh/kernel/kgdb_jmp.S --- linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/kernel/kgdb_jmp.S 1969-12-31 19:00:00.000000000 -0500 @@ -1,33 +0,0 @@ -#include - -ENTRY(setjmp) - add #(9*4), r4 - sts.l pr, @-r4 - mov.l r15, @-r4 - mov.l r14, @-r4 - mov.l r13, @-r4 - mov.l r12, @-r4 - mov.l r11, @-r4 - mov.l r10, @-r4 - mov.l r9, @-r4 - mov.l r8, @-r4 - rts - mov #0, r0 - -ENTRY(longjmp) - mov.l @r4+, r8 - mov.l @r4+, r9 - mov.l @r4+, r10 - mov.l @r4+, r11 - mov.l @r4+, r12 - mov.l @r4+, r13 - mov.l @r4+, r14 - mov.l @r4+, r15 - lds.l @r4+, pr - mov r5, r0 - tst r0, r0 - bf 1f - mov #1, r0 ! in case val==0 -1: rts - nop - diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c linux-2.6.22-591/arch/sh/kernel/kgdb_stub.c --- linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/kernel/kgdb_stub.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,1093 +0,0 @@ -/* - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * Contains extracts from code by Glenn Engel, Jim Kingdon, - * David Grothe , Tigran Aivazian , - * Amit S. Kale , William Gatliff , - * Ben Lee, Steve Chamberlain and Benoit Miller . - * - * This version by Henry Bell - * Minor modifications by Jeremy Siegel - * - * Contains low-level support for remote debug using GDB. - * - * To enable debugger support, two things need to happen. A call to - * set_debug_traps() is necessary in order to allow any breakpoints - * or error conditions to be properly intercepted and reported to gdb. - * A breakpoint also needs to be generated to begin communication. This - * is most easily accomplished by a call to breakpoint() which does - * a trapa if the initialisation phase has been successfully completed. - * - * In this case, set_debug_traps() is not used to "take over" exceptions; - * other kernel code is modified instead to enter the kgdb functions here - * when appropriate (see entry.S for breakpoint traps and NMI interrupts, - * see traps.c for kernel error exceptions). - * - * The following gdb commands are supported: - * - * Command Function Return value - * - * g return the value of the CPU registers hex data or ENN - * G set the value of the CPU registers OK or ENN - * - * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN - * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN - * XAA..AA,LLLL: Same, but data is binary (not hex) OK or ENN - * - * c Resume at current address SNN ( signal NN) - * cAA..AA Continue at address AA..AA SNN - * CNN; Resume at current address with signal SNN - * CNN;AA..AA Resume at address AA..AA with signal SNN - * - * s Step one instruction SNN - * sAA..AA Step one instruction from AA..AA SNN - * SNN; Step one instruction with signal SNN - * SNNAA..AA Step one instruction from AA..AA w/NN SNN - * - * k kill (Detach GDB) - * - * d Toggle debug flag - * D Detach GDB - * - * Hct Set thread t for operations, OK or ENN - * c = 'c' (step, cont), c = 'g' (other - * operations) - * - * qC Query current thread ID QCpid - * qfThreadInfo Get list of current threads (first) m - * qsThreadInfo " " " " " (subsequent) - * qOffsets Get section offsets Text=x;Data=y;Bss=z - * - * TXX Find if thread XX is alive OK or ENN - * ? What was the last sigval ? SNN (signal NN) - * O Output to GDB console - * - * Remote communication protocol. - * - * A debug packet whose contents are is encapsulated for - * transmission in the form: - * - * $ # CSUM1 CSUM2 - * - * must be ASCII alphanumeric and cannot include characters - * '$' or '#'. If starts with two characters followed by - * ':', then the existing stubs interpret this as a sequence number. - * - * CSUM1 and CSUM2 are ascii hex representation of an 8-bit - * checksum of , the most significant nibble is sent first. - * the hex digits 0-9,a-f are used. - * - * Receiver responds with: - * - * + - if CSUM is correct and ready for next packet - * - - if CSUM is incorrect - * - * Responses can be run-length encoded to save space. A '*' means that - * the next character is an ASCII encoding giving a repeat count which - * stands for that many repetitions of the character preceding the '*'. - * The encoding is n+29, yielding a printable character where n >=3 - * (which is where RLE starts to win). Don't use an n > 126. - * - * So "0* " means the same as "0000". - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Function pointers for linkage */ -kgdb_debug_hook_t *kgdb_debug_hook; -kgdb_bus_error_hook_t *kgdb_bus_err_hook; - -int (*kgdb_getchar)(void); -void (*kgdb_putchar)(int); - -static void put_debug_char(int c) -{ - if (!kgdb_putchar) - return; - (*kgdb_putchar)(c); -} -static int get_debug_char(void) -{ - if (!kgdb_getchar) - return -1; - return (*kgdb_getchar)(); -} - -/* Num chars in in/out bound buffers, register packets need NUMREGBYTES * 2 */ -#define BUFMAX 1024 -#define NUMREGBYTES (MAXREG*4) -#define OUTBUFMAX (NUMREGBYTES*2+512) - -enum regs { - R0 = 0, R1, R2, R3, R4, R5, R6, R7, - R8, R9, R10, R11, R12, R13, R14, R15, - PC, PR, GBR, VBR, MACH, MACL, SR, - /* */ - MAXREG -}; - -static unsigned int registers[MAXREG]; -struct kgdb_regs trap_registers; - -char kgdb_in_gdb_mode; -char in_nmi; /* Set during NMI to prevent reentry */ -int kgdb_nofault; /* Boolean to ignore bus errs (i.e. in GDB) */ -int kgdb_enabled = 1; /* Default to enabled, cmdline can disable */ - -/* Exposed for user access */ -struct task_struct *kgdb_current; -unsigned int kgdb_g_imask; -int kgdb_trapa_val; -int kgdb_excode; - -/* Default values for SCI (can override via kernel args in setup.c) */ -#ifndef CONFIG_KGDB_DEFPORT -#define CONFIG_KGDB_DEFPORT 1 -#endif - -#ifndef CONFIG_KGDB_DEFBAUD -#define CONFIG_KGDB_DEFBAUD 115200 -#endif - -#if defined(CONFIG_KGDB_DEFPARITY_E) -#define CONFIG_KGDB_DEFPARITY 'E' -#elif defined(CONFIG_KGDB_DEFPARITY_O) -#define CONFIG_KGDB_DEFPARITY 'O' -#else /* CONFIG_KGDB_DEFPARITY_N */ -#define CONFIG_KGDB_DEFPARITY 'N' -#endif - -#ifdef CONFIG_KGDB_DEFBITS_7 -#define CONFIG_KGDB_DEFBITS '7' -#else /* CONFIG_KGDB_DEFBITS_8 */ -#define CONFIG_KGDB_DEFBITS '8' -#endif - -/* SCI/UART settings, used in kgdb_console_setup() */ -int kgdb_portnum = CONFIG_KGDB_DEFPORT; -int kgdb_baud = CONFIG_KGDB_DEFBAUD; -char kgdb_parity = CONFIG_KGDB_DEFPARITY; -char kgdb_bits = CONFIG_KGDB_DEFBITS; - -/* Jump buffer for setjmp/longjmp */ -static jmp_buf rem_com_env; - -/* TRA differs sh3/4 */ -#if defined(CONFIG_CPU_SH3) -#define TRA 0xffffffd0 -#elif defined(CONFIG_CPU_SH4) -#define TRA 0xff000020 -#endif - -/* Macros for single step instruction identification */ -#define OPCODE_BT(op) (((op) & 0xff00) == 0x8900) -#define OPCODE_BF(op) (((op) & 0xff00) == 0x8b00) -#define OPCODE_BTF_DISP(op) (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \ - (((op) & 0x7f ) << 1)) -#define OPCODE_BFS(op) (((op) & 0xff00) == 0x8f00) -#define OPCODE_BTS(op) (((op) & 0xff00) == 0x8d00) -#define OPCODE_BRA(op) (((op) & 0xf000) == 0xa000) -#define OPCODE_BRA_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ - (((op) & 0x7ff) << 1)) -#define OPCODE_BRAF(op) (((op) & 0xf0ff) == 0x0023) -#define OPCODE_BRAF_REG(op) (((op) & 0x0f00) >> 8) -#define OPCODE_BSR(op) (((op) & 0xf000) == 0xb000) -#define OPCODE_BSR_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ - (((op) & 0x7ff) << 1)) -#define OPCODE_BSRF(op) (((op) & 0xf0ff) == 0x0003) -#define OPCODE_BSRF_REG(op) (((op) >> 8) & 0xf) -#define OPCODE_JMP(op) (((op) & 0xf0ff) == 0x402b) -#define OPCODE_JMP_REG(op) (((op) >> 8) & 0xf) -#define OPCODE_JSR(op) (((op) & 0xf0ff) == 0x400b) -#define OPCODE_JSR_REG(op) (((op) >> 8) & 0xf) -#define OPCODE_RTS(op) ((op) == 0xb) -#define OPCODE_RTE(op) ((op) == 0x2b) - -#define SR_T_BIT_MASK 0x1 -#define STEP_OPCODE 0xc320 -#define BIOS_CALL_TRAP 0x3f - -/* Exception codes as per SH-4 core manual */ -#define ADDRESS_ERROR_LOAD_VEC 7 -#define ADDRESS_ERROR_STORE_VEC 8 -#define TRAP_VEC 11 -#define INVALID_INSN_VEC 12 -#define INVALID_SLOT_VEC 13 -#define NMI_VEC 14 -#define USER_BREAK_VEC 15 -#define SERIAL_BREAK_VEC 58 - -/* Misc static */ -static int stepped_address; -static short stepped_opcode; -static char in_buffer[BUFMAX]; -static char out_buffer[OUTBUFMAX]; - -static void kgdb_to_gdb(const char *s); - -/* Convert ch to hex */ -static int hex(const char ch) -{ - if ((ch >= 'a') && (ch <= 'f')) - return (ch - 'a' + 10); - if ((ch >= '0') && (ch <= '9')) - return (ch - '0'); - if ((ch >= 'A') && (ch <= 'F')) - return (ch - 'A' + 10); - return (-1); -} - -/* Convert the memory pointed to by mem into hex, placing result in buf. - Returns a pointer to the last char put in buf (null) */ -static char *mem_to_hex(const char *mem, char *buf, const int count) -{ - int i; - int ch; - unsigned short s_val; - unsigned long l_val; - - /* Check for 16 or 32 */ - if (count == 2 && ((long) mem & 1) == 0) { - s_val = *(unsigned short *) mem; - mem = (char *) &s_val; - } else if (count == 4 && ((long) mem & 3) == 0) { - l_val = *(unsigned long *) mem; - mem = (char *) &l_val; - } - for (i = 0; i < count; i++) { - ch = *mem++; - *buf++ = highhex(ch); - *buf++ = lowhex(ch); - } - *buf = 0; - return (buf); -} - -/* Convert the hex array pointed to by buf into binary, to be placed in mem. - Return a pointer to the character after the last byte written */ -static char *hex_to_mem(const char *buf, char *mem, const int count) -{ - int i; - unsigned char ch; - - for (i = 0; i < count; i++) { - ch = hex(*buf++) << 4; - ch = ch + hex(*buf++); - *mem++ = ch; - } - return (mem); -} - -/* While finding valid hex chars, convert to an integer, then return it */ -static int hex_to_int(char **ptr, int *int_value) -{ - int num_chars = 0; - int hex_value; - - *int_value = 0; - - while (**ptr) { - hex_value = hex(**ptr); - if (hex_value >= 0) { - *int_value = (*int_value << 4) | hex_value; - num_chars++; - } else - break; - (*ptr)++; - } - return num_chars; -} - -/* Copy the binary array pointed to by buf into mem. Fix $, #, - and 0x7d escaped with 0x7d. Return a pointer to the character - after the last byte written. */ -static char *ebin_to_mem(const char *buf, char *mem, int count) -{ - for (; count > 0; count--, buf++) { - if (*buf == 0x7d) - *mem++ = *(++buf) ^ 0x20; - else - *mem++ = *buf; - } - return mem; -} - -/* Pack a hex byte */ -static char *pack_hex_byte(char *pkt, int byte) -{ - *pkt++ = hexchars[(byte >> 4) & 0xf]; - *pkt++ = hexchars[(byte & 0xf)]; - return pkt; -} - -/* Scan for the start char '$', read the packet and check the checksum */ -static void get_packet(char *buffer, int buflen) -{ - unsigned char checksum; - unsigned char xmitcsum; - int i; - int count; - char ch; - - do { - /* Ignore everything until the start character */ - while ((ch = get_debug_char()) != '$'); - - checksum = 0; - xmitcsum = -1; - count = 0; - - /* Now, read until a # or end of buffer is found */ - while (count < (buflen - 1)) { - ch = get_debug_char(); - - if (ch == '#') - break; - - checksum = checksum + ch; - buffer[count] = ch; - count = count + 1; - } - - buffer[count] = 0; - - /* Continue to read checksum following # */ - if (ch == '#') { - xmitcsum = hex(get_debug_char()) << 4; - xmitcsum += hex(get_debug_char()); - - /* Checksum */ - if (checksum != xmitcsum) - put_debug_char('-'); /* Failed checksum */ - else { - /* Ack successful transfer */ - put_debug_char('+'); - - /* If a sequence char is present, reply - the sequence ID */ - if (buffer[2] == ':') { - put_debug_char(buffer[0]); - put_debug_char(buffer[1]); - - /* Remove sequence chars from buffer */ - count = strlen(buffer); - for (i = 3; i <= count; i++) - buffer[i - 3] = buffer[i]; - } - } - } - } - while (checksum != xmitcsum); /* Keep trying while we fail */ -} - -/* Send the packet in the buffer with run-length encoding */ -static void put_packet(char *buffer) -{ - int checksum; - char *src; - int runlen; - int encode; - - do { - src = buffer; - put_debug_char('$'); - checksum = 0; - - /* Continue while we still have chars left */ - while (*src) { - /* Check for runs up to 99 chars long */ - for (runlen = 1; runlen < 99; runlen++) { - if (src[0] != src[runlen]) - break; - } - - if (runlen > 3) { - /* Got a useful amount, send encoding */ - encode = runlen + ' ' - 4; - put_debug_char(*src); checksum += *src; - put_debug_char('*'); checksum += '*'; - put_debug_char(encode); checksum += encode; - src += runlen; - } else { - /* Otherwise just send the current char */ - put_debug_char(*src); checksum += *src; - src += 1; - } - } - - /* '#' Separator, put high and low components of checksum */ - put_debug_char('#'); - put_debug_char(highhex(checksum)); - put_debug_char(lowhex(checksum)); - } - while ((get_debug_char()) != '+'); /* While no ack */ -} - -/* A bus error has occurred - perform a longjmp to return execution and - allow handling of the error */ -static void kgdb_handle_bus_error(void) -{ - longjmp(rem_com_env, 1); -} - -/* Translate SH-3/4 exception numbers to unix-like signal values */ -static int compute_signal(const int excep_code) -{ - int sigval; - - switch (excep_code) { - - case INVALID_INSN_VEC: - case INVALID_SLOT_VEC: - sigval = SIGILL; - break; - case ADDRESS_ERROR_LOAD_VEC: - case ADDRESS_ERROR_STORE_VEC: - sigval = SIGSEGV; - break; - - case SERIAL_BREAK_VEC: - case NMI_VEC: - sigval = SIGINT; - break; - - case USER_BREAK_VEC: - case TRAP_VEC: - sigval = SIGTRAP; - break; - - default: - sigval = SIGBUS; /* "software generated" */ - break; - } - - return (sigval); -} - -/* Make a local copy of the registers passed into the handler (bletch) */ -static void kgdb_regs_to_gdb_regs(const struct kgdb_regs *regs, - int *gdb_regs) -{ - gdb_regs[R0] = regs->regs[R0]; - gdb_regs[R1] = regs->regs[R1]; - gdb_regs[R2] = regs->regs[R2]; - gdb_regs[R3] = regs->regs[R3]; - gdb_regs[R4] = regs->regs[R4]; - gdb_regs[R5] = regs->regs[R5]; - gdb_regs[R6] = regs->regs[R6]; - gdb_regs[R7] = regs->regs[R7]; - gdb_regs[R8] = regs->regs[R8]; - gdb_regs[R9] = regs->regs[R9]; - gdb_regs[R10] = regs->regs[R10]; - gdb_regs[R11] = regs->regs[R11]; - gdb_regs[R12] = regs->regs[R12]; - gdb_regs[R13] = regs->regs[R13]; - gdb_regs[R14] = regs->regs[R14]; - gdb_regs[R15] = regs->regs[R15]; - gdb_regs[PC] = regs->pc; - gdb_regs[PR] = regs->pr; - gdb_regs[GBR] = regs->gbr; - gdb_regs[MACH] = regs->mach; - gdb_regs[MACL] = regs->macl; - gdb_regs[SR] = regs->sr; - gdb_regs[VBR] = regs->vbr; -} - -/* Copy local gdb registers back to kgdb regs, for later copy to kernel */ -static void gdb_regs_to_kgdb_regs(const int *gdb_regs, - struct kgdb_regs *regs) -{ - regs->regs[R0] = gdb_regs[R0]; - regs->regs[R1] = gdb_regs[R1]; - regs->regs[R2] = gdb_regs[R2]; - regs->regs[R3] = gdb_regs[R3]; - regs->regs[R4] = gdb_regs[R4]; - regs->regs[R5] = gdb_regs[R5]; - regs->regs[R6] = gdb_regs[R6]; - regs->regs[R7] = gdb_regs[R7]; - regs->regs[R8] = gdb_regs[R8]; - regs->regs[R9] = gdb_regs[R9]; - regs->regs[R10] = gdb_regs[R10]; - regs->regs[R11] = gdb_regs[R11]; - regs->regs[R12] = gdb_regs[R12]; - regs->regs[R13] = gdb_regs[R13]; - regs->regs[R14] = gdb_regs[R14]; - regs->regs[R15] = gdb_regs[R15]; - regs->pc = gdb_regs[PC]; - regs->pr = gdb_regs[PR]; - regs->gbr = gdb_regs[GBR]; - regs->mach = gdb_regs[MACH]; - regs->macl = gdb_regs[MACL]; - regs->sr = gdb_regs[SR]; - regs->vbr = gdb_regs[VBR]; -} - -/* Calculate the new address for after a step */ -static short *get_step_address(void) -{ - short op = *(short *) trap_registers.pc; - long addr; - - /* BT */ - if (OPCODE_BT(op)) { - if (trap_registers.sr & SR_T_BIT_MASK) - addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); - else - addr = trap_registers.pc + 2; - } - - /* BTS */ - else if (OPCODE_BTS(op)) { - if (trap_registers.sr & SR_T_BIT_MASK) - addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); - else - addr = trap_registers.pc + 4; /* Not in delay slot */ - } - - /* BF */ - else if (OPCODE_BF(op)) { - if (!(trap_registers.sr & SR_T_BIT_MASK)) - addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); - else - addr = trap_registers.pc + 2; - } - - /* BFS */ - else if (OPCODE_BFS(op)) { - if (!(trap_registers.sr & SR_T_BIT_MASK)) - addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); - else - addr = trap_registers.pc + 4; /* Not in delay slot */ - } - - /* BRA */ - else if (OPCODE_BRA(op)) - addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op); - - /* BRAF */ - else if (OPCODE_BRAF(op)) - addr = trap_registers.pc + 4 - + trap_registers.regs[OPCODE_BRAF_REG(op)]; - - /* BSR */ - else if (OPCODE_BSR(op)) - addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op); - - /* BSRF */ - else if (OPCODE_BSRF(op)) - addr = trap_registers.pc + 4 - + trap_registers.regs[OPCODE_BSRF_REG(op)]; - - /* JMP */ - else if (OPCODE_JMP(op)) - addr = trap_registers.regs[OPCODE_JMP_REG(op)]; - - /* JSR */ - else if (OPCODE_JSR(op)) - addr = trap_registers.regs[OPCODE_JSR_REG(op)]; - - /* RTS */ - else if (OPCODE_RTS(op)) - addr = trap_registers.pr; - - /* RTE */ - else if (OPCODE_RTE(op)) - addr = trap_registers.regs[15]; - - /* Other */ - else - addr = trap_registers.pc + 2; - - kgdb_flush_icache_range(addr, addr + 2); - return (short *) addr; -} - -/* Set up a single-step. Replace the instruction immediately after the - current instruction (i.e. next in the expected flow of control) with a - trap instruction, so that returning will cause only a single instruction - to be executed. Note that this model is slightly broken for instructions - with delay slots (e.g. B[TF]S, BSR, BRA etc), where both the branch - and the instruction in the delay slot will be executed. */ -static void do_single_step(void) -{ - unsigned short *addr = 0; - - /* Determine where the target instruction will send us to */ - addr = get_step_address(); - stepped_address = (int)addr; - - /* Replace it */ - stepped_opcode = *(short *)addr; - *addr = STEP_OPCODE; - - /* Flush and return */ - kgdb_flush_icache_range((long) addr, (long) addr + 2); - return; -} - -/* Undo a single step */ -static void undo_single_step(void) -{ - /* If we have stepped, put back the old instruction */ - /* Use stepped_address in case we stopped elsewhere */ - if (stepped_opcode != 0) { - *(short*)stepped_address = stepped_opcode; - kgdb_flush_icache_range(stepped_address, stepped_address + 2); - } - stepped_opcode = 0; -} - -/* Send a signal message */ -static void send_signal_msg(const int signum) -{ - out_buffer[0] = 'S'; - out_buffer[1] = highhex(signum); - out_buffer[2] = lowhex(signum); - out_buffer[3] = 0; - put_packet(out_buffer); -} - -/* Reply that all was well */ -static void send_ok_msg(void) -{ - strcpy(out_buffer, "OK"); - put_packet(out_buffer); -} - -/* Reply that an error occurred */ -static void send_err_msg(void) -{ - strcpy(out_buffer, "E01"); - put_packet(out_buffer); -} - -/* Empty message indicates unrecognised command */ -static void send_empty_msg(void) -{ - put_packet(""); -} - -/* Read memory due to 'm' message */ -static void read_mem_msg(void) -{ - char *ptr; - int addr; - int length; - - /* Jmp, disable bus error handler */ - if (setjmp(rem_com_env) == 0) { - - kgdb_nofault = 1; - - /* Walk through, have m, */ - ptr = &in_buffer[1]; - if (hex_to_int(&ptr, &addr) && (*ptr++ == ',')) - if (hex_to_int(&ptr, &length)) { - ptr = 0; - if (length * 2 > OUTBUFMAX) - length = OUTBUFMAX / 2; - mem_to_hex((char *) addr, out_buffer, length); - } - if (ptr) - send_err_msg(); - else - put_packet(out_buffer); - } else - send_err_msg(); - - /* Restore bus error handler */ - kgdb_nofault = 0; -} - -/* Write memory due to 'M' or 'X' message */ -static void write_mem_msg(int binary) -{ - char *ptr; - int addr; - int length; - - if (setjmp(rem_com_env) == 0) { - - kgdb_nofault = 1; - - /* Walk through, have M,: */ - ptr = &in_buffer[1]; - if (hex_to_int(&ptr, &addr) && (*ptr++ == ',')) - if (hex_to_int(&ptr, &length) && (*ptr++ == ':')) { - if (binary) - ebin_to_mem(ptr, (char*)addr, length); - else - hex_to_mem(ptr, (char*)addr, length); - kgdb_flush_icache_range(addr, addr + length); - ptr = 0; - send_ok_msg(); - } - if (ptr) - send_err_msg(); - } else - send_err_msg(); - - /* Restore bus error handler */ - kgdb_nofault = 0; -} - -/* Continue message */ -static void continue_msg(void) -{ - /* Try to read optional parameter, PC unchanged if none */ - char *ptr = &in_buffer[1]; - int addr; - - if (hex_to_int(&ptr, &addr)) - trap_registers.pc = addr; -} - -/* Continue message with signal */ -static void continue_with_sig_msg(void) -{ - int signal; - char *ptr = &in_buffer[1]; - int addr; - - /* Report limitation */ - kgdb_to_gdb("Cannot force signal in kgdb, continuing anyway.\n"); - - /* Signal */ - hex_to_int(&ptr, &signal); - if (*ptr == ';') - ptr++; - - /* Optional address */ - if (hex_to_int(&ptr, &addr)) - trap_registers.pc = addr; -} - -/* Step message */ -static void step_msg(void) -{ - continue_msg(); - do_single_step(); -} - -/* Step message with signal */ -static void step_with_sig_msg(void) -{ - continue_with_sig_msg(); - do_single_step(); -} - -/* Send register contents */ -static void send_regs_msg(void) -{ - kgdb_regs_to_gdb_regs(&trap_registers, registers); - mem_to_hex((char *) registers, out_buffer, NUMREGBYTES); - put_packet(out_buffer); -} - -/* Set register contents - currently can't set other thread's registers */ -static void set_regs_msg(void) -{ - kgdb_regs_to_gdb_regs(&trap_registers, registers); - hex_to_mem(&in_buffer[1], (char *) registers, NUMREGBYTES); - gdb_regs_to_kgdb_regs(registers, &trap_registers); - send_ok_msg(); -} - -#ifdef CONFIG_SH_KGDB_CONSOLE -/* - * Bring up the ports.. - */ -static int kgdb_serial_setup(void) -{ - extern int kgdb_console_setup(struct console *co, char *options); - struct console dummy; - - kgdb_console_setup(&dummy, 0); - - return 0; -} -#else -#define kgdb_serial_setup() 0 -#endif - -/* The command loop, read and act on requests */ -static void kgdb_command_loop(const int excep_code, const int trapa_value) -{ - int sigval; - - if (excep_code == NMI_VEC) { -#ifndef CONFIG_KGDB_NMI - printk(KERN_NOTICE "KGDB: Ignoring unexpected NMI?\n"); - return; -#else /* CONFIG_KGDB_NMI */ - if (!kgdb_enabled) { - kgdb_enabled = 1; - kgdb_init(); - } -#endif /* CONFIG_KGDB_NMI */ - } - - /* Ignore if we're disabled */ - if (!kgdb_enabled) - return; - - /* Enter GDB mode (e.g. after detach) */ - if (!kgdb_in_gdb_mode) { - /* Do serial setup, notify user, issue preemptive ack */ - printk(KERN_NOTICE "KGDB: Waiting for GDB\n"); - kgdb_in_gdb_mode = 1; - put_debug_char('+'); - } - - /* Reply to host that an exception has occurred */ - sigval = compute_signal(excep_code); - send_signal_msg(sigval); - - /* TRAP_VEC exception indicates a software trap inserted in place of - code by GDB so back up PC by one instruction, as this instruction - will later be replaced by its original one. Do NOT do this for - trap 0xff, since that indicates a compiled-in breakpoint which - will not be replaced (and we would retake the trap forever) */ - if ((excep_code == TRAP_VEC) && (trapa_value != (0x3c << 2))) - trap_registers.pc -= 2; - - /* Undo any stepping we may have done */ - undo_single_step(); - - while (1) { - out_buffer[0] = 0; - get_packet(in_buffer, BUFMAX); - - /* Examine first char of buffer to see what we need to do */ - switch (in_buffer[0]) { - case '?': /* Send which signal we've received */ - send_signal_msg(sigval); - break; - - case 'g': /* Return the values of the CPU registers */ - send_regs_msg(); - break; - - case 'G': /* Set the value of the CPU registers */ - set_regs_msg(); - break; - - case 'm': /* Read LLLL bytes address AA..AA */ - read_mem_msg(); - break; - - case 'M': /* Write LLLL bytes address AA..AA, ret OK */ - write_mem_msg(0); /* 0 = data in hex */ - break; - - case 'X': /* Write LLLL bytes esc bin address AA..AA */ - if (kgdb_bits == '8') - write_mem_msg(1); /* 1 = data in binary */ - else - send_empty_msg(); - break; - - case 'C': /* Continue, signum included, we ignore it */ - continue_with_sig_msg(); - return; - - case 'c': /* Continue at address AA..AA (optional) */ - continue_msg(); - return; - - case 'S': /* Step, signum included, we ignore it */ - step_with_sig_msg(); - return; - - case 's': /* Step one instruction from AA..AA */ - step_msg(); - return; - - case 'k': /* 'Kill the program' with a kernel ? */ - break; - - case 'D': /* Detach from program, send reply OK */ - kgdb_in_gdb_mode = 0; - send_ok_msg(); - get_debug_char(); - return; - - default: - send_empty_msg(); - break; - } - } -} - -/* There has been an exception, most likely a breakpoint. */ -static void handle_exception(struct pt_regs *regs) -{ - int excep_code, vbr_val; - int count; - int trapa_value = ctrl_inl(TRA); - - /* Copy kernel regs (from stack) */ - for (count = 0; count < 16; count++) - trap_registers.regs[count] = regs->regs[count]; - trap_registers.pc = regs->pc; - trap_registers.pr = regs->pr; - trap_registers.sr = regs->sr; - trap_registers.gbr = regs->gbr; - trap_registers.mach = regs->mach; - trap_registers.macl = regs->macl; - - asm("stc vbr, %0":"=r"(vbr_val)); - trap_registers.vbr = vbr_val; - - /* Get excode for command loop call, user access */ - asm("stc r2_bank, %0":"=r"(excep_code)); - kgdb_excode = excep_code; - - /* Other interesting environment items for reference */ - asm("stc r6_bank, %0":"=r"(kgdb_g_imask)); - kgdb_current = current; - kgdb_trapa_val = trapa_value; - - /* Act on the exception */ - kgdb_command_loop(excep_code, trapa_value); - - kgdb_current = NULL; - - /* Copy back the (maybe modified) registers */ - for (count = 0; count < 16; count++) - regs->regs[count] = trap_registers.regs[count]; - regs->pc = trap_registers.pc; - regs->pr = trap_registers.pr; - regs->sr = trap_registers.sr; - regs->gbr = trap_registers.gbr; - regs->mach = trap_registers.mach; - regs->macl = trap_registers.macl; - - vbr_val = trap_registers.vbr; - asm("ldc %0, vbr": :"r"(vbr_val)); -} - -asmlinkage void kgdb_handle_exception(unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7, - struct pt_regs __regs) -{ - struct pt_regs *regs = RELOC_HIDE(&__regs, 0); - handle_exception(regs); -} - -/* Initialise the KGDB data structures and serial configuration */ -int kgdb_init(void) -{ - if (!kgdb_enabled) - return 1; - - in_nmi = 0; - kgdb_nofault = 0; - stepped_opcode = 0; - kgdb_in_gdb_mode = 0; - - if (kgdb_serial_setup() != 0) { - printk(KERN_NOTICE "KGDB: serial setup error\n"); - return -1; - } - - /* Init ptr to exception handler */ - kgdb_debug_hook = handle_exception; - kgdb_bus_err_hook = kgdb_handle_bus_error; - - /* Enter kgdb now if requested, or just report init done */ - printk(KERN_NOTICE "KGDB: stub is initialized.\n"); - - return 0; -} - -/* Make function available for "user messages"; console will use it too. */ - -char gdbmsgbuf[BUFMAX]; -#define MAXOUT ((BUFMAX-2)/2) - -static void kgdb_msg_write(const char *s, unsigned count) -{ - int i; - int wcount; - char *bufptr; - - /* 'O'utput */ - gdbmsgbuf[0] = 'O'; - - /* Fill and send buffers... */ - while (count > 0) { - bufptr = gdbmsgbuf + 1; - - /* Calculate how many this time */ - wcount = (count > MAXOUT) ? MAXOUT : count; - - /* Pack in hex chars */ - for (i = 0; i < wcount; i++) - bufptr = pack_hex_byte(bufptr, s[i]); - *bufptr = '\0'; - - /* Move up */ - s += wcount; - count -= wcount; - - /* Write packet */ - put_packet(gdbmsgbuf); - } -} - -static void kgdb_to_gdb(const char *s) -{ - kgdb_msg_write(s, strlen(s)); -} - -#ifdef CONFIG_SH_KGDB_CONSOLE -void kgdb_console_write(struct console *co, const char *s, unsigned count) -{ - /* Bail if we're not talking to GDB */ - if (!kgdb_in_gdb_mode) - return; - - kgdb_msg_write(s, count); -} -#endif - -#ifdef CONFIG_KGDB_SYSRQ -static void sysrq_handle_gdb(int key, struct tty_struct *tty) -{ - printk("Entering GDB stub\n"); - breakpoint(); -} - -static struct sysrq_key_op sysrq_gdb_op = { - .handler = sysrq_handle_gdb, - .help_msg = "Gdb", - .action_msg = "GDB", -}; - -static int gdb_register_sysrq(void) -{ - printk("Registering GDB sysrq handler\n"); - register_sysrq_key('g', &sysrq_gdb_op); - return 0; -} -module_init(gdb_register_sysrq); -#endif diff -Nurb linux-2.6.22-570/arch/sh/kernel/time.c linux-2.6.22-591/arch/sh/kernel/time.c --- linux-2.6.22-570/arch/sh/kernel/time.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/kernel/time.c 2007-12-21 15:36:11.000000000 -0500 @@ -259,11 +259,4 @@ ((sh_hpt_frequency + 500) / 1000) / 1000, ((sh_hpt_frequency + 500) / 1000) % 1000); -#if defined(CONFIG_SH_KGDB) - /* - * Set up kgdb as requested. We do it here because the serial - * init uses the timer vars we just set up for figuring baud. - */ - kgdb_init(); -#endif } diff -Nurb linux-2.6.22-570/arch/sh/kernel/traps.c linux-2.6.22-591/arch/sh/kernel/traps.c --- linux-2.6.22-570/arch/sh/kernel/traps.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 @@ -25,16 +25,10 @@ #include #include #include +#include -#ifdef CONFIG_SH_KGDB -#include -#define CHK_REMOTE_DEBUG(regs) \ -{ \ - if (kgdb_debug_hook && !user_mode(regs))\ - (*kgdb_debug_hook)(regs); \ -} -#else -#define CHK_REMOTE_DEBUG(regs) +#ifndef CONFIG_KGDB +#define kgdb_handle_exception(t, s, e, r) #endif #ifdef CONFIG_CPU_SH2 @@ -91,7 +85,9 @@ printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); - CHK_REMOTE_DEBUG(regs); +#ifdef CONFIG_KGDB + kgdb_handle_exception(1, SIGTRAP, err, regs); +#endif print_modules(); show_regs(regs); @@ -700,7 +696,9 @@ lookup_exception_vector(error_code); local_irq_enable(); - CHK_REMOTE_DEBUG(regs); +#ifdef CONFIG_KGDB + kgdb_handle_exception(1, SIGILL, err, regs); +#endif force_sig(SIGILL, tsk); die_if_no_fixup("reserved instruction", regs, error_code); } @@ -771,7 +769,9 @@ lookup_exception_vector(error_code); local_irq_enable(); - CHK_REMOTE_DEBUG(regs); +#ifdef CONFIG_KGDB + kgdb_handle_exception(1, SIGILL, err, regs); +#endif force_sig(SIGILL, tsk); die_if_no_fixup("illegal slot instruction", regs, error_code); } diff -Nurb linux-2.6.22-570/arch/sh/mm/extable.c linux-2.6.22-591/arch/sh/mm/extable.c --- linux-2.6.22-570/arch/sh/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 @@ -5,6 +5,7 @@ */ #include +#include #include int fixup_exception(struct pt_regs *regs) @@ -16,6 +17,12 @@ regs->pc = fixup->fixup; return 1; } +#ifdef CONFIG_KGDB + if (atomic_read(&debugger_active) && kgdb_may_fault) + /* Restore our previous state. */ + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + /* Never reached. */ +#endif return 0; } diff -Nurb linux-2.6.22-570/arch/sh/mm/fault-nommu.c linux-2.6.22-591/arch/sh/mm/fault-nommu.c --- linux-2.6.22-570/arch/sh/mm/fault-nommu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sh/mm/fault-nommu.c 2007-12-21 15:36:11.000000000 -0500 @@ -28,10 +28,6 @@ #include #include -#if defined(CONFIG_SH_KGDB) -#include -#endif - extern void die(const char *,struct pt_regs *,long); /* @@ -42,11 +38,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, unsigned long address) { -#if defined(CONFIG_SH_KGDB) - if (kgdb_nofault && kgdb_bus_err_hook) - kgdb_bus_err_hook(); -#endif - /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. @@ -68,11 +59,6 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess, unsigned long address) { -#if defined(CONFIG_SH_KGDB) - if (kgdb_nofault && kgdb_bus_err_hook) - kgdb_bus_err_hook(); -#endif - if (address >= TASK_SIZE) return 1; diff -Nurb linux-2.6.22-570/arch/sh/mm/fault.c linux-2.6.22-591/arch/sh/mm/fault.c --- linux-2.6.22-570/arch/sh/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/sh/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 @@ -18,7 +18,6 @@ #include #include #include -#include /* * This routine handles page faults. It determines the address, @@ -39,11 +38,6 @@ trace_hardirqs_on(); local_irq_enable(); -#ifdef CONFIG_SH_KGDB - if (kgdb_nofault && kgdb_bus_err_hook) - kgdb_bus_err_hook(); -#endif - tsk = current; mm = tsk->mm; si_code = SEGV_MAPERR; @@ -189,6 +183,7 @@ } die("Oops", regs, writeaccess); do_exit(SIGKILL); + dump_stack(); /* * We ran out of memory, or some other thing happened to us that made @@ -252,11 +247,6 @@ spinlock_t *ptl = NULL; int ret = 1; -#ifdef CONFIG_SH_KGDB - if (kgdb_nofault && kgdb_bus_err_hook) - kgdb_bus_err_hook(); -#endif - /* * We don't take page faults for P1, P2, and parts of P4, these * are always mapped, whether it be due to legacy behaviour in diff -Nurb linux-2.6.22-570/arch/sparc64/kernel/power.c linux-2.6.22-591/arch/sparc64/kernel/power.c --- linux-2.6.22-570/arch/sparc64/kernel/power.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sparc64/kernel/power.c 2007-12-21 15:36:11.000000000 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -33,14 +34,13 @@ #include static void __iomem *power_reg; -static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); static int button_pressed; static irqreturn_t power_handler(int irq, void *dev_id) { if (button_pressed == 0) { button_pressed = 1; - wake_up(&powerd_wait); + orderly_poweroff(true); } /* FIXME: Check registers for status... */ @@ -77,36 +77,6 @@ EXPORT_SYMBOL(pm_power_off); #ifdef CONFIG_PCI -static int powerd(void *__unused) -{ - static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; - DECLARE_WAITQUEUE(wait, current); - - daemonize("powerd"); - - add_wait_queue(&powerd_wait, &wait); -again: - for (;;) { - set_task_state(current, TASK_INTERRUPTIBLE); - if (button_pressed) - break; - flush_signals(current); - schedule(); - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&powerd_wait, &wait); - - /* Ok, down we go... */ - button_pressed = 0; - if (kernel_execve("/sbin/shutdown", argv, envp) < 0) { - printk("powerd: shutdown execution failed\n"); - add_wait_queue(&powerd_wait, &wait); - goto again; - } - return 0; -} - static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) { if (irq == PCI_IRQ_NONE) @@ -130,12 +100,6 @@ poweroff_method = machine_halt; /* able to use the standard halt */ if (has_button_interrupt(irq, op->node)) { - if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { - printk("Failed to start power daemon.\n"); - return 0; - } - printk("powerd running.\n"); - if (request_irq(irq, power_handler, 0, "power", NULL) < 0) printk("power: Error, cannot register IRQ handler.\n"); diff -Nurb linux-2.6.22-570/arch/sparc64/solaris/ioctl.c linux-2.6.22-591/arch/sparc64/solaris/ioctl.c --- linux-2.6.22-570/arch/sparc64/solaris/ioctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/sparc64/solaris/ioctl.c 2007-12-21 15:36:14.000000000 -0500 @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -686,7 +687,7 @@ int i = 0; read_lock_bh(&dev_base_lock); - for_each_netdev(d) + for_each_netdev(&init_net, d) i++; read_unlock_bh(&dev_base_lock); diff -Nurb linux-2.6.22-570/arch/um/Kconfig.debug linux-2.6.22-591/arch/um/Kconfig.debug --- linux-2.6.22-570/arch/um/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/um/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 @@ -47,4 +47,13 @@ If you're involved in UML kernel development and want to use gcov, say Y. If you're unsure, say N. +config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + default N + help + Track the maximum kernel stack usage - this will look at each + kernel stack at process exit and log it if it's the deepest + stack seen so far. + + This option will slow down process creation and destruction somewhat. endmenu diff -Nurb linux-2.6.22-570/arch/um/defconfig linux-2.6.22-591/arch/um/defconfig --- linux-2.6.22-570/arch/um/defconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/um/defconfig 2007-12-21 15:36:11.000000000 -0500 @@ -527,3 +527,4 @@ # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_GPROF is not set # CONFIG_GCOV is not set +# CONFIG_DEBUG_STACK_USAGE is not set diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig linux-2.6.22-591/arch/x86_64/Kconfig --- linux-2.6.22-570/arch/x86_64/Kconfig 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -698,6 +698,8 @@ source "arch/x86_64/kernel/cpufreq/Kconfig" +source "drivers/cpuidle/Kconfig" + endmenu menu "Bus options (PCI etc.)" diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig.debug linux-2.6.22-591/arch/x86_64/Kconfig.debug --- linux-2.6.22-570/arch/x86_64/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/x86_64/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 @@ -55,7 +55,4 @@ This option will slow down process creation somewhat. -#config X86_REMOTE_DEBUG -# bool "kgdb debugging stub" - endmenu diff -Nurb linux-2.6.22-570/arch/x86_64/Makefile linux-2.6.22-591/arch/x86_64/Makefile --- linux-2.6.22-570/arch/x86_64/Makefile 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -41,7 +41,9 @@ cflags-y += -mcmodel=kernel cflags-y += -pipe cflags-y += -Wno-sign-compare +ifneq ($(CONFIG_UNWIND_INFO),y) cflags-y += -fno-asynchronous-unwind-tables +endif ifneq ($(CONFIG_DEBUG_INFO),y) # -fweb shrinks the kernel a bit, but the difference is very small # it also messes up debugging, so don't use it for now. diff -Nurb linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S linux-2.6.22-591/arch/x86_64/ia32/ia32entry.S --- linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/ia32/ia32entry.S 2007-12-21 15:36:11.000000000 -0500 @@ -731,4 +731,7 @@ .quad compat_sys_signalfd .quad compat_sys_timerfd .quad sys_eventfd + .quad sys_revokeat + .quad sys_frevoke /* 325 */ + .quad sys_fallocate ia32_syscall_end: diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/Makefile linux-2.6.22-591/arch/x86_64/kernel/Makefile --- linux-2.6.22-570/arch/x86_64/kernel/Makefile 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -33,10 +33,12 @@ obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o obj-$(CONFIG_X86_VSMP) += vsmp.o obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_AUDIT) += audit.o +obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_PCI) += early-quirks.o diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S linux-2.6.22-591/arch/x86_64/kernel/kgdb-jmp.S --- linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,65 @@ +/* + * arch/x86_64/kernel/kgdb-jmp.S + * + * Save and restore system registers so that within a limited frame we + * may have a fault and "jump back" to a known safe location. + * + * Author: Tom Rini + * + * Cribbed from glibc, which carries the following: + * Copyright (C) 2001, 2003, 2004 Free Software Foundation, Inc. + * Copyright (C) 2005 by MontaVista Software. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#include + +#define JB_RBX 0 +#define JB_RBP 1 +#define JB_R12 2 +#define JB_R13 3 +#define JB_R14 4 +#define JB_R15 5 +#define JB_RSP 6 +#define JB_PC 7 + + .code64 + +/* This must be called prior to kgdb_fault_longjmp and + * kgdb_fault_longjmp must not be called outside of the context of the + * last call to kgdb_fault_setjmp. + */ +ENTRY(kgdb_fault_setjmp) + /* Save registers. */ + movq %rbx, (JB_RBX*8)(%rdi) + movq %rbp, (JB_RBP*8)(%rdi) + movq %r12, (JB_R12*8)(%rdi) + movq %r13, (JB_R13*8)(%rdi) + movq %r14, (JB_R14*8)(%rdi) + movq %r15, (JB_R15*8)(%rdi) + leaq 8(%rsp), %rdx /* Save SP as it will be after we return. */ + movq %rdx, (JB_RSP*8)(%rdi) + movq (%rsp), %rax /* Save PC we are returning to now. */ + movq %rax, (JB_PC*8)(%rdi) + /* Set return value for setjmp. */ + mov $0,%eax + movq (JB_PC*8)(%rdi),%rdx + movq (JB_RSP*8)(%rdi),%rsp + jmpq *%rdx + +ENTRY(kgdb_fault_longjmp) + /* Restore registers. */ + movq (JB_RBX*8)(%rdi),%rbx + movq (JB_RBP*8)(%rdi),%rbp + movq (JB_R12*8)(%rdi),%r12 + movq (JB_R13*8)(%rdi),%r13 + movq (JB_R14*8)(%rdi),%r14 + movq (JB_R15*8)(%rdi),%r15 + /* Set return value for setjmp. */ + movq (JB_PC*8)(%rdi),%rdx + movq (JB_RSP*8)(%rdi),%rsp + mov $1,%eax + jmpq *%rdx diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb.c linux-2.6.22-591/arch/x86_64/kernel/kgdb.c --- linux-2.6.22-570/arch/x86_64/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,461 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (C) 2004 Amit S. Kale + * Copyright (C) 2000-2001 VERITAS Software Corporation. + * Copyright (C) 2002 Andi Kleen, SuSE Labs + * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd. + * Copyright (C) 2007 Jason Wessel, Wind River Systems, Inc. + */ +/**************************************************************************** + * Contributor: Lake Stevens Instrument Division$ + * Written by: Glenn Engel $ + * Updated by: Amit Kale + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Origianl kgdb, compatibility with 2.1.xx kernel by + * David Grothe + * Integrated into 2.2.5 kernel by Tigran Aivazian + * X86_64 changes from Andi Kleen's patch merged by Jim Houston + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include + +/* Put the error code here just in case the user cares. */ +int gdb_x86_64errcode; +/* Likewise, the vector number here (since GDB only gets the signal + number through the usual means, and that's not very specific). */ +int gdb_x86_64vector = -1; + +extern atomic_t cpu_doing_single_step; + +void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_RAX] = regs->rax; + gdb_regs[_RBX] = regs->rbx; + gdb_regs[_RCX] = regs->rcx; + gdb_regs[_RDX] = regs->rdx; + gdb_regs[_RSI] = regs->rsi; + gdb_regs[_RDI] = regs->rdi; + gdb_regs[_RBP] = regs->rbp; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_PC] = regs->rip; + gdb_regs[_R8] = regs->r8; + gdb_regs[_R9] = regs->r9; + gdb_regs[_R10] = regs->r10; + gdb_regs[_R11] = regs->r11; + gdb_regs[_R12] = regs->r12; + gdb_regs[_R13] = regs->r13; + gdb_regs[_R14] = regs->r14; + gdb_regs[_R15] = regs->r15; + gdb_regs[_RSP] = regs->rsp; +} + +extern void thread_return(void); +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + gdb_regs[_RAX] = 0; + gdb_regs[_RBX] = 0; + gdb_regs[_RCX] = 0; + gdb_regs[_RDX] = 0; + gdb_regs[_RSI] = 0; + gdb_regs[_RDI] = 0; + gdb_regs[_RBP] = *(unsigned long *)p->thread.rsp; + gdb_regs[_PS] = *(unsigned long *)(p->thread.rsp + 8); + gdb_regs[_PC] = (unsigned long)&thread_return; + gdb_regs[_R8] = 0; + gdb_regs[_R9] = 0; + gdb_regs[_R10] = 0; + gdb_regs[_R11] = 0; + gdb_regs[_R12] = 0; + gdb_regs[_R13] = 0; + gdb_regs[_R14] = 0; + gdb_regs[_R15] = 0; + gdb_regs[_RSP] = p->thread.rsp; +} + +void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->rax = gdb_regs[_RAX]; + regs->rbx = gdb_regs[_RBX]; + regs->rcx = gdb_regs[_RCX]; + regs->rdx = gdb_regs[_RDX]; + regs->rsi = gdb_regs[_RSI]; + regs->rdi = gdb_regs[_RDI]; + regs->rbp = gdb_regs[_RBP]; + regs->eflags = gdb_regs[_PS]; + regs->rip = gdb_regs[_PC]; + regs->r8 = gdb_regs[_R8]; + regs->r9 = gdb_regs[_R9]; + regs->r10 = gdb_regs[_R10]; + regs->r11 = gdb_regs[_R11]; + regs->r12 = gdb_regs[_R12]; + regs->r13 = gdb_regs[_R13]; + regs->r14 = gdb_regs[_R14]; + regs->r15 = gdb_regs[_R15]; +#if 0 /* can't change these */ + regs->rsp = gdb_regs[_RSP]; + regs->ss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ + +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned long addr; +} breakinfo[4] = { { +enabled:0}, { +enabled:0}, { +enabled:0}, { +enabled:0}}; + +static void kgdb_correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned long dr7; + + get_debugreg(dr7, 7); + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + set_debugreg(breakinfo[breakno].addr, 0); + break; + + case 1: + set_debugreg(breakinfo[breakno].addr, 1); + break; + + case 2: + set_debugreg(breakinfo[breakno].addr, 2); + break; + + case 3: + set_debugreg(breakinfo[breakno].addr, 3); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) + set_debugreg(dr7, 7); +} + +static int kgdb_remove_hw_break(unsigned long addr, int len, + enum kgdb_bptype bptype) +{ + int i, idx = -1; + for (i = 0; i < 4; i++) { + if (breakinfo[i].addr == addr && breakinfo[i].enabled) { + idx = i; + break; + } + } + if (idx == -1) + return -1; + + breakinfo[idx].enabled = 0; + return 0; +} + +static void kgdb_remove_all_hw_break(void) +{ + int i; + + for (i = 0; i < 4; i++) { + memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); + } +} + +static int kgdb_set_hw_break(unsigned long addr, int len, + enum kgdb_bptype bptype) +{ + int i, idx = -1; + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) { + idx = i; + break; + } + } + if (idx == -1) + return -1; + if (bptype == bp_hardware_breakpoint) { + breakinfo[idx].type = 0; + breakinfo[idx].len = 0; + } else if (bptype == bp_write_watchpoint) { + breakinfo[idx].type = 1; + if (len == 1 || len == 2 || len == 4) + breakinfo[idx].len = len - 1; + else + return -1; + } else if (bptype == bp_access_watchpoint) { + breakinfo[idx].type = 3; + if (len == 1 || len == 2 || len == 4) + breakinfo[idx].len = len - 1; + else + return -1; + } else + return -1; + breakinfo[idx].enabled = 1; + breakinfo[idx].addr = addr; + return 0; +} + +void kgdb_disable_hw_debug(struct pt_regs *regs) +{ + /* Disable hardware debugging while we are in kgdb */ + set_debugreg(0UL, 7); +} + +void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code) +{ + /* Master processor is completely in the debugger */ + gdb_x86_64vector = e_vector; + gdb_x86_64errcode = err_code; +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + send_IPI_allbutself(APIC_DM_NMI); +} + +int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, + char *remcomInBuffer, char *remcomOutBuffer, + struct pt_regs *linux_regs) +{ + unsigned long addr; + unsigned long breakno; + char *ptr; + int newPC; + unsigned long dr6; + + switch (remcomInBuffer[0]) { + case 'c': + case 's': + /* try to read optional parameter, pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->rip = addr; + newPC = linux_regs->rip; + + /* clear the trace bit */ + linux_regs->eflags &= ~TF_MASK; + + atomic_set(&cpu_doing_single_step, -1); + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') { + linux_regs->eflags |= TF_MASK; + debugger_step = 1; + if (kgdb_contthread) + atomic_set(&cpu_doing_single_step, + raw_smp_processor_id()); + + } + + get_debugreg(dr6, 6); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno)) { + if (breakinfo[breakno].type == 0) { + /* Set restore flag */ + linux_regs->eflags |= + X86_EFLAGS_RF; + break; + } + } + } + } + set_debugreg(0UL, 6); + kgdb_correct_hw_break(); + + return (0); + } /* switch */ + return -1; +} + +static struct pt_regs *in_interrupt_stack(unsigned long rsp, int cpu) +{ + struct pt_regs *regs; + unsigned long end = (unsigned long)cpu_pda(cpu)->irqstackptr; + if (rsp <= end && rsp >= end - IRQSTACKSIZE + 8) { + regs = *(((struct pt_regs **)end) - 1); + return regs; + } + return NULL; +} + +static struct pt_regs *in_exception_stack(unsigned long rsp, int cpu) +{ + int i; + struct tss_struct *init_tss = &__get_cpu_var(init_tss); + for (i = 0; i < N_EXCEPTION_STACKS; i++) + if (rsp >= init_tss[cpu].ist[i] && + rsp <= init_tss[cpu].ist[i] + EXCEPTION_STKSZ) { + struct pt_regs *r = + (void *)init_tss[cpu].ist[i] + EXCEPTION_STKSZ; + return r - 1; + } + return NULL; +} + +void kgdb_shadowinfo(struct pt_regs *regs, char *buffer, unsigned threadid) +{ + static char intr_desc[] = "Stack at interrupt entrypoint"; + static char exc_desc[] = "Stack at exception entrypoint"; + struct pt_regs *stregs; + int cpu = raw_smp_processor_id(); + + if ((stregs = in_interrupt_stack(regs->rsp, cpu))) + kgdb_mem2hex(intr_desc, buffer, strlen(intr_desc)); + else if ((stregs = in_exception_stack(regs->rsp, cpu))) + kgdb_mem2hex(exc_desc, buffer, strlen(exc_desc)); +} + +struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs, int threadid) +{ + struct pt_regs *stregs; + int cpu = raw_smp_processor_id(); + + if ((stregs = in_interrupt_stack(regs->rsp, cpu))) + return current; + else if ((stregs = in_exception_stack(regs->rsp, cpu))) + return current; + + return NULL; +} + +struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid) +{ + struct pt_regs *stregs; + int cpu = raw_smp_processor_id(); + + if ((stregs = in_interrupt_stack(regs->rsp, cpu))) + return stregs; + else if ((stregs = in_exception_stack(regs->rsp, cpu))) + return stregs; + + return NULL; +} + +/* Register KGDB with the die_chain so that we hook into all of the right + * spots. */ +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = ptr; + struct pt_regs *regs = args->regs; + + if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active) + && kgdb_may_fault) { + kgdb_fault_longjmp(kgdb_fault_jmp_regs); + return NOTIFY_STOP; + /* CPU roundup? */ + } else if (atomic_read(&debugger_active) && cmd == DIE_NMI_IPI) { + kgdb_nmihook(raw_smp_processor_id(), regs); + return NOTIFY_STOP; + /* See if KGDB is interested. */ + } else if (cmd == DIE_DEBUG + && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id() + && user_mode(regs)) { + /* single step exception from kernel space to user space so + * eat the exception and continue the process + */ + printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n"); + kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs); + return NOTIFY_STOP; + } else if (cmd == DIE_PAGE_FAULT || user_mode(regs) || + cmd == DIE_NMI_IPI || (cmd == DIE_DEBUG && + atomic_read(&debugger_active))) + /* Userpace events, normal watchdog event, or spurious + * debug exception. Ignore. */ + return NOTIFY_DONE; + + kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); + + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + .priority = 0x7fffffff, /* we need to notified first */ +}; + +int kgdb_arch_init(void) +{ + register_die_notifier(&kgdb_notifier); + return 0; +} +/* + * Skip an int3 exception when it occurs after a breakpoint has been + * removed. Backtrack eip by 1 since the int3 would have caused it to + * increment by 1. + */ + +int kgdb_skipexception(int exception, struct pt_regs *regs) +{ + if (exception == 3 && kgdb_isremovedbreak(regs->rip - 1)) { + regs->rip -= 1; + return 1; + } + return 0; +} + +unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) +{ + if (exception == 3) { + return instruction_pointer(regs) - 1; + } + return instruction_pointer(regs); +} + +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0xcc}, + .flags = KGDB_HW_BREAKPOINT, + .shadowth = 1, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, +}; diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/mce.c linux-2.6.22-591/arch/x86_64/kernel/mce.c --- linux-2.6.22-570/arch/x86_64/kernel/mce.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/x86_64/kernel/mce.c 2007-12-21 15:36:11.000000000 -0500 @@ -174,7 +174,7 @@ if (events != atomic_read(&mce_logged) && trigger[0]) { /* Small race window, but should be harmless. */ atomic_set(&mce_logged, events); - call_usermodehelper(trigger, trigger_argv, NULL, -1); + call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); } } diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/signal.c linux-2.6.22-591/arch/x86_64/kernel/signal.c --- linux-2.6.22-570/arch/x86_64/kernel/signal.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/x86_64/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500 @@ -480,7 +480,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; - if (exception_trace) + if (show_unhandled_signals && printk_ratelimit()) printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n", me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax); diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/traps.c linux-2.6.22-591/arch/x86_64/kernel/traps.c --- linux-2.6.22-570/arch/x86_64/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 @@ -96,6 +96,11 @@ } int kstack_depth_to_print = 12; +#ifdef CONFIG_STACK_UNWIND +static int call_trace = 1; +#else +#define call_trace (-1) +#endif #ifdef CONFIG_KALLSYMS void printk_address(unsigned long address) @@ -198,6 +203,33 @@ return NULL; } +struct ops_and_data { + struct stacktrace_ops *ops; + void *data; +}; + +static int dump_trace_unwind(struct unwind_frame_info *info, void *context) +{ + struct ops_and_data *oad = (struct ops_and_data *)context; + int n = 0; + unsigned long sp = UNW_SP(info); + + if (arch_unw_user_mode(info)) + return -1; + while (unwind(info) == 0 && UNW_PC(info)) { + n++; + oad->ops->address(oad->data, UNW_PC(info)); + if (arch_unw_user_mode(info)) + break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); + touch_nmi_watchdog(); + } + return n; +} + #define MSG(txt) ops->warning(data, txt) /* @@ -225,6 +257,40 @@ if (!tsk) tsk = current; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + struct ops_and_data oad = { .ops = ops, .data = data }; + + if (regs) { + if (unwind_init_frame_info(&info, tsk, regs) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } else if (tsk == current) + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); + else { + if (unwind_init_blocked(&info, tsk) == 0) + unw_ret = dump_trace_unwind(&info, &oad); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s", + UNW_PC(&info)); + if ((long)UNW_SP(&info) < 0) { + MSG("Leftover inexact backtrace:"); + stack = (unsigned long *)UNW_SP(&info); + if (!stack) + goto out; + } else + MSG("Full inexact backtrace again:"); + } else if (call_trace >= 1) + goto out; + else + MSG("Full inexact backtrace again:"); + } else + MSG("Inexact backtrace:"); + } if (!stack) { unsigned long dummy; stack = &dummy; @@ -308,6 +374,7 @@ tinfo = task_thread_info(tsk); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK +out: put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -585,7 +652,8 @@ tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; - if (exception_trace && unhandled_signal(tsk, signr)) + if (show_unhandled_signals && unhandled_signal(tsk, signr) && + printk_ratelimit()) printk(KERN_INFO "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, tsk->xid, str, @@ -689,7 +757,8 @@ tsk->thread.error_code = error_code; tsk->thread.trap_no = 13; - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) printk(KERN_INFO "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, tsk->xid, @@ -1128,3 +1197,21 @@ return 0; } early_param("kstack", kstack_setup); + +#ifdef CONFIG_STACK_UNWIND +static int __init call_trace_setup(char *s) +{ + if (!s) + return -EINVAL; + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "newfallback") == 0) + call_trace = 1; + else if (strcmp(s, "new") == 0) + call_trace = 2; + return 0; +} +early_param("call_trace", call_trace_setup); +#endif diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/unwind.S linux-2.6.22-591/arch/x86_64/kernel/unwind.S --- linux-2.6.22-570/arch/x86_64/kernel/unwind.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/kernel/unwind.S 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,38 @@ +/* Assembler support for dwarf2 unwinder */ +#include +#include +#include +#include +#include + +ENTRY(arch_unwind_init_running) + CFI_STARTPROC + movq %r15, R15(%rdi) + movq %r14, R14(%rdi) + xchgq %rsi, %rdx + movq %r13, R13(%rdi) + movq %r12, R12(%rdi) + xorl %eax, %eax + movq %rbp, RBP(%rdi) + movq %rbx, RBX(%rdi) + movq (%rsp), %rcx + movq %rax, R11(%rdi) + movq %rax, R10(%rdi) + movq %rax, R9(%rdi) + movq %rax, R8(%rdi) + movq %rax, RAX(%rdi) + movq %rax, RCX(%rdi) + movq %rax, RDX(%rdi) + movq %rax, RSI(%rdi) + movq %rax, RDI(%rdi) + movq %rax, ORIG_RAX(%rdi) + movq %rcx, RIP(%rdi) + leaq 8(%rsp), %rcx + movq $__KERNEL_CS, CS(%rdi) + movq %rax, EFLAGS(%rdi) + movq %rcx, RSP(%rdi) + movq $__KERNEL_DS, SS(%rdi) + jmpq *%rdx + CFI_ENDPROC +ENDPROC(arch_unwind_init_running) + diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.22-591/arch/x86_64/kernel/vmlinux.lds.S --- linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/arch/x86_64/kernel/vmlinux.lds.S 2007-12-21 15:36:11.000000000 -0500 @@ -219,7 +219,9 @@ /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) +#ifndef CONFIG_UNWIND_INFO *(.eh_frame) +#endif } STABS_DEBUG diff -Nurb linux-2.6.22-570/arch/x86_64/mm/fault.c linux-2.6.22-591/arch/x86_64/mm/fault.c --- linux-2.6.22-570/arch/x86_64/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 @@ -221,16 +221,6 @@ return 0; } -int unhandled_signal(struct task_struct *tsk, int sig) -{ - if (is_init(tsk)) - return 1; - if (tsk->ptrace & PT_PTRACED) - return 0; - return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || - (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); -} - static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { @@ -302,7 +292,7 @@ } int page_fault_trace = 0; -int exception_trace = 1; +int show_unhandled_signals = 1; /* * This routine handles page faults. It determines the address, @@ -534,6 +524,10 @@ if (is_errata93(regs, address)) return; + if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs, + error_code, 14, SIGSEGV) == NOTIFY_STOP) + return; + /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. diff -Nurb linux-2.6.22-570/arch/x86_64/mm/init.c linux-2.6.22-591/arch/x86_64/mm/init.c --- linux-2.6.22-570/arch/x86_64/mm/init.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/arch/x86_64/mm/init.c 2007-12-21 15:36:11.000000000 -0500 @@ -697,41 +697,6 @@ return pfn_valid(pte_pfn(*pte)); } -#ifdef CONFIG_SYSCTL -#include - -extern int exception_trace, page_fault_trace; - -static ctl_table debug_table2[] = { - { - .ctl_name = 99, - .procname = "exception-trace", - .data = &exception_trace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - {} -}; - -static ctl_table debug_root_table2[] = { - { - .ctl_name = CTL_DEBUG, - .procname = "debug", - .mode = 0555, - .child = debug_table2 - }, - {} -}; - -static __init int x8664_sysctl_init(void) -{ - register_sysctl_table(debug_root_table2); - return 0; -} -__initcall(x8664_sysctl_init); -#endif - /* A pseudo VMA to allow ptrace access for the vsyscall page. This only covers the 64bit vsyscall page now. 32bit has a real VMA now and does not need special handling anymore. */ diff -Nurb linux-2.6.22-570/creatinst.sh linux-2.6.22-591/creatinst.sh --- linux-2.6.22-570/creatinst.sh 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/creatinst.sh 2007-12-23 02:56:35.000000000 -0500 @@ -0,0 +1,12 @@ +rm -fR inst +mkdir inst +make install INSTALL_PATH=inst +make modules_install INSTALL_MOD_PATH=inst +tar cfz inst.tar.gz inst +scp -i ~/newvici inst.tar.gz root@vici-03:/tmp +ssh -i ~/newvici root@vici-03 "cd /tmp;tar xvfz inst.tar.gz" +ssh -i ~/newvici root@vici-03 "wget www/~sapanb/vgup;sh vgup" +ssh -i ~/newvici root@vici-03 "cp -R /tmp/inst/lib/* /mnt/lib/" +ssh -i ~/newvici root@vici-03 "rm -fR /tmp/inst/lib; mv /tmp/inst/* /mnt/boot" +sleep 5 +ssh -i ~/newvici root@vici-03 reboot diff -Nurb linux-2.6.22-570/creatinst.sh.orig linux-2.6.22-591/creatinst.sh.orig --- linux-2.6.22-570/creatinst.sh.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/creatinst.sh.orig 2007-12-22 19:17:36.000000000 -0500 @@ -0,0 +1,5 @@ +rm -fR inst +mkdir inst +make install INSTALL_PATH=inst +make modules_install INSTALL_MOD_PATH=inst +tar cfz inst.tar.gz inst diff -Nurb linux-2.6.22-570/crypto/Kconfig linux-2.6.22-591/crypto/Kconfig --- linux-2.6.22-570/crypto/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/crypto/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -1,7 +1,17 @@ # -# Cryptographic API Configuration +# Generic algorithms support +# +config XOR_BLOCKS + tristate + # +# async_tx api: hardware offloaded memory transfer/transform support +# +source "crypto/async_tx/Kconfig" +# +# Cryptographic API Configuration +# menu "Cryptographic options" config CRYPTO diff -Nurb linux-2.6.22-570/crypto/Makefile linux-2.6.22-591/crypto/Makefile --- linux-2.6.22-570/crypto/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/crypto/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -50,3 +50,9 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o + +# +# generic algorithms and the async_tx api +# +obj-$(CONFIG_XOR_BLOCKS) += xor.o +obj-$(CONFIG_ASYNC_CORE) += async_tx/ diff -Nurb linux-2.6.22-570/crypto/async_tx/Kconfig linux-2.6.22-591/crypto/async_tx/Kconfig --- linux-2.6.22-570/crypto/async_tx/Kconfig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/async_tx/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,16 @@ +config ASYNC_CORE + tristate + +config ASYNC_MEMCPY + tristate + select ASYNC_CORE + +config ASYNC_XOR + tristate + select ASYNC_CORE + select XOR_BLOCKS + +config ASYNC_MEMSET + tristate + select ASYNC_CORE + diff -Nurb linux-2.6.22-570/crypto/async_tx/Makefile linux-2.6.22-591/crypto/async_tx/Makefile --- linux-2.6.22-570/crypto/async_tx/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/async_tx/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,4 @@ +obj-$(CONFIG_ASYNC_CORE) += async_tx.o +obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o +obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o +obj-$(CONFIG_ASYNC_XOR) += async_xor.o diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memcpy.c linux-2.6.22-591/crypto/async_tx/async_memcpy.c --- linux-2.6.22-570/crypto/async_tx/async_memcpy.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/async_tx/async_memcpy.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,131 @@ +/* + * copy offload engine support + * + * Copyright © 2006, Intel Corporation. + * + * Dan Williams + * + * with architecture considerations by: + * Neil Brown + * Jeff Garzik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +/** + * async_memcpy - attempt to copy memory with a dma engine. + * @dest: destination page + * @src: src page + * @offset: offset in pages to start transaction + * @len: length in bytes + * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, + * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST + * @depend_tx: memcpy depends on the result of this transaction + * @cb_fn: function to call when the memcpy completes + * @cb_param: parameter to pass to the callback routine + */ +struct dma_async_tx_descriptor * +async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, + unsigned int src_offset, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY); + struct dma_device *device = chan ? chan->device : NULL; + int int_en = cb_fn ? 1 : 0; + struct dma_async_tx_descriptor *tx = device ? + device->device_prep_dma_memcpy(chan, len, + int_en) : NULL; + + if (tx) { /* run the memcpy asynchronously */ + dma_addr_t addr; + enum dma_data_direction dir; + + pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); + + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? + DMA_NONE : DMA_FROM_DEVICE; + + addr = dma_map_page(device->dev, dest, dest_offset, len, dir); + tx->tx_set_dest(addr, tx, 0); + + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? + DMA_NONE : DMA_TO_DEVICE; + + addr = dma_map_page(device->dev, src, src_offset, len, dir); + tx->tx_set_src(addr, tx, 0); + + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + } else { /* run the memcpy synchronously */ + void *dest_buf, *src_buf; + pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); + + /* wait for any prerequisite operations */ + if (depend_tx) { + /* if ack is already set then we cannot be sure + * we are referring to the correct operation + */ + BUG_ON(depend_tx->ack); + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) + panic("%s: DMA_ERROR waiting for depend_tx\n", + __FUNCTION__); + } + + if (flags & ASYNC_TX_KMAP_DST) + dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; + else + dest_buf = page_address(dest) + dest_offset; + + if (flags & ASYNC_TX_KMAP_SRC) + src_buf = kmap_atomic(src, KM_USER0) + src_offset; + else + src_buf = page_address(src) + src_offset; + + memcpy(dest_buf, src_buf, len); + + if (flags & ASYNC_TX_KMAP_DST) + kunmap_atomic(dest_buf, KM_USER0); + + if (flags & ASYNC_TX_KMAP_SRC) + kunmap_atomic(src_buf, KM_USER0); + + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + } + + return tx; +} +EXPORT_SYMBOL_GPL(async_memcpy); + +static int __init async_memcpy_init(void) +{ + return 0; +} + +static void __exit async_memcpy_exit(void) +{ + do { } while (0); +} + +module_init(async_memcpy_init); +module_exit(async_memcpy_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("asynchronous memcpy api"); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memset.c linux-2.6.22-591/crypto/async_tx/async_memset.c --- linux-2.6.22-570/crypto/async_tx/async_memset.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/async_tx/async_memset.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,109 @@ +/* + * memory fill offload engine support + * + * Copyright © 2006, Intel Corporation. + * + * Dan Williams + * + * with architecture considerations by: + * Neil Brown + * Jeff Garzik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +/** + * async_memset - attempt to fill memory with a dma engine. + * @dest: destination page + * @val: fill value + * @offset: offset in pages to start transaction + * @len: length in bytes + * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @depend_tx: memset depends on the result of this transaction + * @cb_fn: function to call when the memcpy completes + * @cb_param: parameter to pass to the callback routine + */ +struct dma_async_tx_descriptor * +async_memset(struct page *dest, int val, unsigned int offset, + size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET); + struct dma_device *device = chan ? chan->device : NULL; + int int_en = cb_fn ? 1 : 0; + struct dma_async_tx_descriptor *tx = device ? + device->device_prep_dma_memset(chan, val, len, + int_en) : NULL; + + if (tx) { /* run the memset asynchronously */ + dma_addr_t dma_addr; + enum dma_data_direction dir; + + pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? + DMA_NONE : DMA_FROM_DEVICE; + + dma_addr = dma_map_page(device->dev, dest, offset, len, dir); + tx->tx_set_dest(dma_addr, tx, 0); + + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + } else { /* run the memset synchronously */ + void *dest_buf; + pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); + + dest_buf = (void *) (((char *) page_address(dest)) + offset); + + /* wait for any prerequisite operations */ + if (depend_tx) { + /* if ack is already set then we cannot be sure + * we are referring to the correct operation + */ + BUG_ON(depend_tx->ack); + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) + panic("%s: DMA_ERROR waiting for depend_tx\n", + __FUNCTION__); + } + + memset(dest_buf, val, len); + + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + } + + return tx; +} +EXPORT_SYMBOL_GPL(async_memset); + +static int __init async_memset_init(void) +{ + return 0; +} + +static void __exit async_memset_exit(void) +{ + do { } while (0); +} + +module_init(async_memset_init); +module_exit(async_memset_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("asynchronous memset api"); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/crypto/async_tx/async_tx.c linux-2.6.22-591/crypto/async_tx/async_tx.c --- linux-2.6.22-570/crypto/async_tx/async_tx.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/async_tx/async_tx.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,497 @@ +/* + * core routines for the asynchronous memory transfer/transform api + * + * Copyright © 2006, Intel Corporation. + * + * Dan Williams + * + * with architecture considerations by: + * Neil Brown + * Jeff Garzik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include + +#ifdef CONFIG_DMA_ENGINE +static enum dma_state_client +dma_channel_add_remove(struct dma_client *client, + struct dma_chan *chan, enum dma_state state); + +static struct dma_client async_tx_dma = { + .event_callback = dma_channel_add_remove, + /* .cap_mask == 0 defaults to all channels */ +}; + +/** + * dma_cap_mask_all - enable iteration over all operation types + */ +static dma_cap_mask_t dma_cap_mask_all; + +/** + * chan_ref_percpu - tracks channel allocations per core/opertion + */ +struct chan_ref_percpu { + struct dma_chan_ref *ref; +}; + +static int channel_table_initialized; +static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; + +/** + * async_tx_lock - protect modification of async_tx_master_list and serialize + * rebalance operations + */ +static spinlock_t async_tx_lock; + +static struct list_head +async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list); + +/* async_tx_issue_pending_all - start all transactions on all channels */ +void async_tx_issue_pending_all(void) +{ + struct dma_chan_ref *ref; + + rcu_read_lock(); + list_for_each_entry_rcu(ref, &async_tx_master_list, node) + ref->chan->device->device_issue_pending(ref->chan); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(async_tx_issue_pending_all); + +/* dma_wait_for_async_tx - spin wait for a transcation to complete + * @tx: transaction to wait on + */ +enum dma_status +dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + enum dma_status status; + struct dma_async_tx_descriptor *iter; + + if (!tx) + return DMA_SUCCESS; + + /* poll through the dependency chain, return when tx is complete */ + do { + iter = tx; + while (iter->cookie == -EBUSY) + iter = iter->parent; + + status = dma_sync_wait(iter->chan, iter->cookie); + } while (status == DMA_IN_PROGRESS || (iter != tx)); + + return status; +} +EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); + +/* async_tx_run_dependencies - helper routine for dma drivers to process + * (start) dependent operations on their target channel + * @tx: transaction with dependencies + */ +void +async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) +{ + struct dma_async_tx_descriptor *dep_tx, *_dep_tx; + struct dma_device *dev; + struct dma_chan *chan; + + list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list, + depend_node) { + chan = dep_tx->chan; + dev = chan->device; + /* we can't depend on ourselves */ + BUG_ON(chan == tx->chan); + list_del(&dep_tx->depend_node); + tx->tx_submit(dep_tx); + + /* we need to poke the engine as client code does not + * know about dependency submission events + */ + dev->device_issue_pending(chan); + } +} +EXPORT_SYMBOL_GPL(async_tx_run_dependencies); + +static void +free_dma_chan_ref(struct rcu_head *rcu) +{ + struct dma_chan_ref *ref; + ref = container_of(rcu, struct dma_chan_ref, rcu); + kfree(ref); +} + +static void +init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) +{ + INIT_LIST_HEAD(&ref->node); + INIT_RCU_HEAD(&ref->rcu); + ref->chan = chan; + atomic_set(&ref->count, 0); +} + +/** + * get_chan_ref_by_cap - returns the nth channel of the given capability + * defaults to returning the channel with the desired capability and the + * lowest reference count if the index can not be satisfied + * @cap: capability to match + * @index: nth channel desired, passing -1 has the effect of forcing the + * default return value + */ +static struct dma_chan_ref * +get_chan_ref_by_cap(enum dma_transaction_type cap, int index) +{ + struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; + + rcu_read_lock(); + list_for_each_entry_rcu(ref, &async_tx_master_list, node) + if (dma_has_cap(cap, ref->chan->device->cap_mask)) { + if (!min_ref) + min_ref = ref; + else if (atomic_read(&ref->count) < + atomic_read(&min_ref->count)) + min_ref = ref; + + if (index-- == 0) { + ret_ref = ref; + break; + } + } + rcu_read_unlock(); + + if (!ret_ref) + ret_ref = min_ref; + + if (ret_ref) + atomic_inc(&ret_ref->count); + + return ret_ref; +} + +/** + * async_tx_rebalance - redistribute the available channels, optimize + * for cpu isolation in the SMP case, and opertaion isolation in the + * uniprocessor case + */ +static void async_tx_rebalance(void) +{ + int cpu, cap, cpu_idx = 0; + unsigned long flags; + + if (!channel_table_initialized) + return; + + spin_lock_irqsave(&async_tx_lock, flags); + + /* undo the last distribution */ + for_each_dma_cap_mask(cap, dma_cap_mask_all) + for_each_possible_cpu(cpu) { + struct dma_chan_ref *ref = + per_cpu_ptr(channel_table[cap], cpu)->ref; + if (ref) { + atomic_set(&ref->count, 0); + per_cpu_ptr(channel_table[cap], cpu)->ref = + NULL; + } + } + + for_each_dma_cap_mask(cap, dma_cap_mask_all) + for_each_online_cpu(cpu) { + struct dma_chan_ref *new; + if (NR_CPUS > 1) + new = get_chan_ref_by_cap(cap, cpu_idx++); + else + new = get_chan_ref_by_cap(cap, -1); + + per_cpu_ptr(channel_table[cap], cpu)->ref = new; + } + + spin_unlock_irqrestore(&async_tx_lock, flags); +} + +static enum dma_state_client +dma_channel_add_remove(struct dma_client *client, + struct dma_chan *chan, enum dma_state state) +{ + unsigned long found, flags; + struct dma_chan_ref *master_ref, *ref; + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + switch (state) { + case DMA_RESOURCE_AVAILABLE: + found = 0; + rcu_read_lock(); + list_for_each_entry_rcu(ref, &async_tx_master_list, node) + if (ref->chan == chan) { + found = 1; + break; + } + rcu_read_unlock(); + + pr_debug("async_tx: dma resource available [%s]\n", + found ? "old" : "new"); + + if (!found) + ack = DMA_ACK; + else + break; + + /* add the channel to the generic management list */ + master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL); + if (master_ref) { + /* keep a reference until async_tx is unloaded */ + dma_chan_get(chan); + init_dma_chan_ref(master_ref, chan); + spin_lock_irqsave(&async_tx_lock, flags); + list_add_tail_rcu(&master_ref->node, + &async_tx_master_list); + spin_unlock_irqrestore(&async_tx_lock, + flags); + } else { + printk(KERN_WARNING "async_tx: unable to create" + " new master entry in response to" + " a DMA_RESOURCE_ADDED event" + " (-ENOMEM)\n"); + return 0; + } + + async_tx_rebalance(); + break; + case DMA_RESOURCE_REMOVED: + found = 0; + spin_lock_irqsave(&async_tx_lock, flags); + list_for_each_entry_rcu(ref, &async_tx_master_list, node) + if (ref->chan == chan) { + /* permit backing devices to go away */ + dma_chan_put(ref->chan); + list_del_rcu(&ref->node); + call_rcu(&ref->rcu, free_dma_chan_ref); + found = 1; + break; + } + spin_unlock_irqrestore(&async_tx_lock, flags); + + pr_debug("async_tx: dma resource removed [%s]\n", + found ? "ours" : "not ours"); + + if (found) + ack = DMA_ACK; + else + break; + + async_tx_rebalance(); + break; + case DMA_RESOURCE_SUSPEND: + case DMA_RESOURCE_RESUME: + printk(KERN_WARNING "async_tx: does not support dma channel" + " suspend/resume\n"); + break; + default: + BUG(); + } + + return ack; +} + +static int __init +async_tx_init(void) +{ + enum dma_transaction_type cap; + + spin_lock_init(&async_tx_lock); + bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); + + /* an interrupt will never be an explicit operation type. + * clearing this bit prevents allocation to a slot in 'channel_table' + */ + clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); + + for_each_dma_cap_mask(cap, dma_cap_mask_all) { + channel_table[cap] = alloc_percpu(struct chan_ref_percpu); + if (!channel_table[cap]) + goto err; + } + + channel_table_initialized = 1; + dma_async_client_register(&async_tx_dma); + dma_async_client_chan_request(&async_tx_dma); + + printk(KERN_INFO "async_tx: api initialized (async)\n"); + + return 0; +err: + printk(KERN_ERR "async_tx: initialization failure\n"); + + while (--cap >= 0) + free_percpu(channel_table[cap]); + + return 1; +} + +static void __exit async_tx_exit(void) +{ + enum dma_transaction_type cap; + + channel_table_initialized = 0; + + for_each_dma_cap_mask(cap, dma_cap_mask_all) + if (channel_table[cap]) + free_percpu(channel_table[cap]); + + dma_async_client_unregister(&async_tx_dma); +} + +/** + * async_tx_find_channel - find a channel to carry out the operation or let + * the transaction execute synchronously + * @depend_tx: transaction dependency + * @tx_type: transaction type + */ +struct dma_chan * +async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, + enum dma_transaction_type tx_type) +{ + /* see if we can keep the chain on one channel */ + if (depend_tx && + dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) + return depend_tx->chan; + else if (likely(channel_table_initialized)) { + struct dma_chan_ref *ref; + int cpu = get_cpu(); + ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; + put_cpu(); + return ref ? ref->chan : NULL; + } else + return NULL; +} +EXPORT_SYMBOL_GPL(async_tx_find_channel); +#else +static int __init async_tx_init(void) +{ + printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); + return 0; +} + +static void __exit async_tx_exit(void) +{ + do { } while (0); +} +#endif + +void +async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + tx->callback = cb_fn; + tx->callback_param = cb_param; + + /* set this new tx to run after depend_tx if: + * 1/ a dependency exists (depend_tx is !NULL) + * 2/ the tx can not be submitted to the current channel + */ + if (depend_tx && depend_tx->chan != chan) { + /* if ack is already set then we cannot be sure + * we are referring to the correct operation + */ + BUG_ON(depend_tx->ack); + + tx->parent = depend_tx; + spin_lock_bh(&depend_tx->lock); + list_add_tail(&tx->depend_node, &depend_tx->depend_list); + if (depend_tx->cookie == 0) { + struct dma_chan *dep_chan = depend_tx->chan; + struct dma_device *dep_dev = dep_chan->device; + dep_dev->device_dependency_added(dep_chan); + } + spin_unlock_bh(&depend_tx->lock); + + /* schedule an interrupt to trigger the channel switch */ + async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL); + } else { + tx->parent = NULL; + tx->tx_submit(tx); + } + + if (flags & ASYNC_TX_ACK) + async_tx_ack(tx); + + if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + async_tx_ack(depend_tx); +} +EXPORT_SYMBOL_GPL(async_tx_submit); + +/** + * async_trigger_callback - schedules the callback function to be run after + * any dependent operations have been completed. + * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @depend_tx: 'callback' requires the completion of this transaction + * @cb_fn: function to call after depend_tx completes + * @cb_param: parameter to pass to the callback routine + */ +struct dma_async_tx_descriptor * +async_trigger_callback(enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + struct dma_chan *chan; + struct dma_device *device; + struct dma_async_tx_descriptor *tx; + + if (depend_tx) { + chan = depend_tx->chan; + device = chan->device; + + /* see if we can schedule an interrupt + * otherwise poll for completion + */ + if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + device = NULL; + + tx = device ? device->device_prep_dma_interrupt(chan) : NULL; + } else + tx = NULL; + + if (tx) { + pr_debug("%s: (async)\n", __FUNCTION__); + + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + } else { + pr_debug("%s: (sync)\n", __FUNCTION__); + + /* wait for any prerequisite operations */ + if (depend_tx) { + /* if ack is already set then we cannot be sure + * we are referring to the correct operation + */ + BUG_ON(depend_tx->ack); + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) + panic("%s: DMA_ERROR waiting for depend_tx\n", + __FUNCTION__); + } + + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + } + + return tx; +} +EXPORT_SYMBOL_GPL(async_trigger_callback); + +module_init(async_tx_init); +module_exit(async_tx_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/crypto/async_tx/async_xor.c linux-2.6.22-591/crypto/async_tx/async_xor.c --- linux-2.6.22-570/crypto/async_tx/async_xor.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/async_tx/async_xor.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,327 @@ +/* + * xor offload engine api + * + * Copyright © 2006, Intel Corporation. + * + * Dan Williams + * + * with architecture considerations by: + * Neil Brown + * Jeff Garzik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include +#include + +static void +do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device, + struct dma_chan *chan, struct page *dest, struct page **src_list, + unsigned int offset, unsigned int src_cnt, size_t len, + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + dma_addr_t dma_addr; + enum dma_data_direction dir; + int i; + + pr_debug("%s: len: %zu\n", __FUNCTION__, len); + + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? + DMA_NONE : DMA_FROM_DEVICE; + + dma_addr = dma_map_page(device->dev, dest, offset, len, dir); + tx->tx_set_dest(dma_addr, tx, 0); + + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? + DMA_NONE : DMA_TO_DEVICE; + + for (i = 0; i < src_cnt; i++) { + dma_addr = dma_map_page(device->dev, src_list[i], + offset, len, dir); + tx->tx_set_src(dma_addr, tx, i); + } + + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); +} + +static void +do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, + unsigned int src_cnt, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + void *_dest; + int i; + + pr_debug("%s: len: %zu\n", __FUNCTION__, len); + + /* reuse the 'src_list' array to convert to buffer pointers */ + for (i = 0; i < src_cnt; i++) + src_list[i] = (struct page *) + (page_address(src_list[i]) + offset); + + /* set destination address */ + _dest = page_address(dest) + offset; + + if (flags & ASYNC_TX_XOR_ZERO_DST) + memset(_dest, 0, len); + + xor_blocks(src_cnt, len, _dest, + (void **) src_list); + + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); +} + +/** + * async_xor - attempt to xor a set of blocks with a dma engine. + * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST + * flag must be set to not include dest data in the calculation. The + * assumption with dma eninges is that they only use the destination + * buffer as a source when it is explicity specified in the source list. + * @dest: destination page + * @src_list: array of source pages (if the dest is also a source it must be + * at index zero). The contents of this array may be overwritten. + * @offset: offset in pages to start transaction + * @src_cnt: number of source pages + * @len: length in bytes + * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, + * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @depend_tx: xor depends on the result of this transaction. + * @cb_fn: function to call when the xor completes + * @cb_param: parameter to pass to the callback routine + */ +struct dma_async_tx_descriptor * +async_xor(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR); + struct dma_device *device = chan ? chan->device : NULL; + struct dma_async_tx_descriptor *tx = NULL; + dma_async_tx_callback _cb_fn; + void *_cb_param; + unsigned long local_flags; + int xor_src_cnt; + int i = 0, src_off = 0, int_en; + + BUG_ON(src_cnt <= 1); + + while (src_cnt) { + local_flags = flags; + if (device) { /* run the xor asynchronously */ + xor_src_cnt = min(src_cnt, device->max_xor); + /* if we are submitting additional xors + * only set the callback on the last transaction + */ + if (src_cnt > xor_src_cnt) { + local_flags &= ~ASYNC_TX_ACK; + _cb_fn = NULL; + _cb_param = NULL; + } else { + _cb_fn = cb_fn; + _cb_param = cb_param; + } + + int_en = _cb_fn ? 1 : 0; + + tx = device->device_prep_dma_xor( + chan, xor_src_cnt, len, int_en); + + if (tx) { + do_async_xor(tx, device, chan, dest, + &src_list[src_off], offset, xor_src_cnt, len, + local_flags, depend_tx, _cb_fn, + _cb_param); + } else /* fall through */ + goto xor_sync; + } else { /* run the xor synchronously */ +xor_sync: + /* in the sync case the dest is an implied source + * (assumes the dest is at the src_off index) + */ + if (flags & ASYNC_TX_XOR_DROP_DST) { + src_cnt--; + src_off++; + } + + /* process up to 'MAX_XOR_BLOCKS' sources */ + xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); + + /* if we are submitting additional xors + * only set the callback on the last transaction + */ + if (src_cnt > xor_src_cnt) { + local_flags &= ~ASYNC_TX_ACK; + _cb_fn = NULL; + _cb_param = NULL; + } else { + _cb_fn = cb_fn; + _cb_param = cb_param; + } + + /* wait for any prerequisite operations */ + if (depend_tx) { + /* if ack is already set then we cannot be sure + * we are referring to the correct operation + */ + BUG_ON(depend_tx->ack); + if (dma_wait_for_async_tx(depend_tx) == + DMA_ERROR) + panic("%s: DMA_ERROR waiting for " + "depend_tx\n", + __FUNCTION__); + } + + do_sync_xor(dest, &src_list[src_off], offset, + xor_src_cnt, len, local_flags, depend_tx, + _cb_fn, _cb_param); + } + + /* the previous tx is hidden from the client, + * so ack it + */ + if (i && depend_tx) + async_tx_ack(depend_tx); + + depend_tx = tx; + + if (src_cnt > xor_src_cnt) { + /* drop completed sources */ + src_cnt -= xor_src_cnt; + src_off += xor_src_cnt; + + /* unconditionally preserve the destination */ + flags &= ~ASYNC_TX_XOR_ZERO_DST; + + /* use the intermediate result a source, but remember + * it's dropped, because it's implied, in the sync case + */ + src_list[--src_off] = dest; + src_cnt++; + flags |= ASYNC_TX_XOR_DROP_DST; + } else + src_cnt = 0; + i++; + } + + return tx; +} +EXPORT_SYMBOL_GPL(async_xor); + +static int page_is_zero(struct page *p, unsigned int offset, size_t len) +{ + char *a = page_address(p) + offset; + return ((*(u32 *) a) == 0 && + memcmp(a, a + 4, len - 4) == 0); +} + +/** + * async_xor_zero_sum - attempt a xor parity check with a dma engine. + * @dest: destination page used if the xor is performed synchronously + * @src_list: array of source pages. The dest page must be listed as a source + * at index zero. The contents of this array may be overwritten. + * @offset: offset in pages to start transaction + * @src_cnt: number of source pages + * @len: length in bytes + * @result: 0 if sum == 0 else non-zero + * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @depend_tx: xor depends on the result of this transaction. + * @cb_fn: function to call when the xor completes + * @cb_param: parameter to pass to the callback routine + */ +struct dma_async_tx_descriptor * +async_xor_zero_sum(struct page *dest, struct page **src_list, + unsigned int offset, int src_cnt, size_t len, + u32 *result, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM); + struct dma_device *device = chan ? chan->device : NULL; + int int_en = cb_fn ? 1 : 0; + struct dma_async_tx_descriptor *tx = device ? + device->device_prep_dma_zero_sum(chan, src_cnt, len, result, + int_en) : NULL; + int i; + + BUG_ON(src_cnt <= 1); + + if (tx) { + dma_addr_t dma_addr; + enum dma_data_direction dir; + + pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); + + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? + DMA_NONE : DMA_TO_DEVICE; + + for (i = 0; i < src_cnt; i++) { + dma_addr = dma_map_page(device->dev, src_list[i], + offset, len, dir); + tx->tx_set_src(dma_addr, tx, i); + } + + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + } else { + unsigned long xor_flags = flags; + + pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); + + xor_flags |= ASYNC_TX_XOR_DROP_DST; + xor_flags &= ~ASYNC_TX_ACK; + + tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, + depend_tx, NULL, NULL); + + if (tx) { + if (dma_wait_for_async_tx(tx) == DMA_ERROR) + panic("%s: DMA_ERROR waiting for tx\n", + __FUNCTION__); + async_tx_ack(tx); + } + + *result = page_is_zero(dest, offset, len) ? 0 : 1; + + tx = NULL; + + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + } + + return tx; +} +EXPORT_SYMBOL_GPL(async_xor_zero_sum); + +static int __init async_xor_init(void) +{ + return 0; +} + +static void __exit async_xor_exit(void) +{ + do { } while (0); +} + +module_init(async_xor_init); +module_exit(async_xor_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/crypto/xor.c linux-2.6.22-591/crypto/xor.c --- linux-2.6.22-570/crypto/xor.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/crypto/xor.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,155 @@ +/* + * xor.c : Multiple Devices driver for Linux + * + * Copyright (C) 1996, 1997, 1998, 1999, 2000, + * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. + * + * Dispatch optimized RAID-5 checksumming functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define BH_TRACE 0 +#include +#include +#include +#include + +/* The xor routines to use. */ +static struct xor_block_template *active_template; + +void +xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) +{ + unsigned long *p1, *p2, *p3, *p4; + + p1 = (unsigned long *) srcs[0]; + if (src_count == 1) { + active_template->do_2(bytes, dest, p1); + return; + } + + p2 = (unsigned long *) srcs[1]; + if (src_count == 2) { + active_template->do_3(bytes, dest, p1, p2); + return; + } + + p3 = (unsigned long *) srcs[2]; + if (src_count == 3) { + active_template->do_4(bytes, dest, p1, p2, p3); + return; + } + + p4 = (unsigned long *) srcs[3]; + active_template->do_5(bytes, dest, p1, p2, p3, p4); +} +EXPORT_SYMBOL(xor_blocks); + +/* Set of all registered templates. */ +static struct xor_block_template *template_list; + +#define BENCH_SIZE (PAGE_SIZE) + +static void +do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) +{ + int speed; + unsigned long now; + int i, count, max; + + tmpl->next = template_list; + template_list = tmpl; + + /* + * Count the number of XORs done during a whole jiffy, and use + * this to calculate the speed of checksumming. We use a 2-page + * allocation to have guaranteed color L1-cache layout. + */ + max = 0; + for (i = 0; i < 5; i++) { + now = jiffies; + count = 0; + while (jiffies == now) { + mb(); /* prevent loop optimzation */ + tmpl->do_2(BENCH_SIZE, b1, b2); + mb(); + count++; + mb(); + } + if (count > max) + max = count; + } + + speed = max * (HZ * BENCH_SIZE / 1024); + tmpl->speed = speed; + + printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name, + speed / 1000, speed % 1000); +} + +static int __init +calibrate_xor_blocks(void) +{ + void *b1, *b2; + struct xor_block_template *f, *fastest; + + b1 = (void *) __get_free_pages(GFP_KERNEL, 2); + if (!b1) { + printk(KERN_WARNING "xor: Yikes! No memory available.\n"); + return -ENOMEM; + } + b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; + + /* + * If this arch/cpu has a short-circuited selection, don't loop through + * all the possible functions, just test the best one + */ + + fastest = NULL; + +#ifdef XOR_SELECT_TEMPLATE + fastest = XOR_SELECT_TEMPLATE(fastest); +#endif + +#define xor_speed(templ) do_xor_speed((templ), b1, b2) + + if (fastest) { + printk(KERN_INFO "xor: automatically using best " + "checksumming function: %s\n", + fastest->name); + xor_speed(fastest); + } else { + printk(KERN_INFO "xor: measuring software checksum speed\n"); + XOR_TRY_TEMPLATES; + fastest = template_list; + for (f = fastest; f; f = f->next) + if (f->speed > fastest->speed) + fastest = f; + } + + printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n", + fastest->name, fastest->speed / 1000, fastest->speed % 1000); + +#undef xor_speed + + free_pages((unsigned long)b1, 2); + + active_template = fastest; + return 0; +} + +static __exit void xor_exit(void) { } + +MODULE_LICENSE("GPL"); + +/* when built-in xor.o must initialize before drivers/md/md.o */ +core_initcall(calibrate_xor_blocks); +module_exit(xor_exit); diff -Nurb linux-2.6.22-570/drivers/Makefile linux-2.6.22-591/drivers/Makefile --- linux-2.6.22-570/drivers/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -70,6 +70,7 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_CPU_IDLE) += cpuidle/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ diff -Nurb linux-2.6.22-570/drivers/acpi/Kconfig linux-2.6.22-591/drivers/acpi/Kconfig --- linux-2.6.22-570/drivers/acpi/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -124,7 +124,7 @@ config ACPI_VIDEO tristate "Video" - depends on X86 && BACKLIGHT_CLASS_DEVICE + depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL help This driver implement the ACPI Extensions For Display Adapters for integrated graphics devices on motherboard, as specified in @@ -280,6 +280,14 @@ of verbosity. Saying Y enables these statements. This will increase your kernel size by around 50K. +config ACPI_DEBUG_FUNC_TRACE + bool "Additionally enable ACPI function tracing" + default n + depends on ACPI_DEBUG + help + ACPI Debug Statements slow down ACPI processing. Function trace + is about half of the penalty and is rarely useful. + config ACPI_EC bool default y diff -Nurb linux-2.6.22-570/drivers/acpi/battery.c linux-2.6.22-591/drivers/acpi/battery.c --- linux-2.6.22-570/drivers/acpi/battery.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/battery.c 2007-12-21 15:36:11.000000000 -0500 @@ -43,21 +43,30 @@ #define ACPI_BATTERY_CLASS "battery" #define ACPI_BATTERY_HID "PNP0C0A" #define ACPI_BATTERY_DEVICE_NAME "Battery" -#define ACPI_BATTERY_FILE_INFO "info" -#define ACPI_BATTERY_FILE_STATUS "state" -#define ACPI_BATTERY_FILE_ALARM "alarm" #define ACPI_BATTERY_NOTIFY_STATUS 0x80 #define ACPI_BATTERY_NOTIFY_INFO 0x81 #define ACPI_BATTERY_UNITS_WATTS "mW" #define ACPI_BATTERY_UNITS_AMPS "mA" #define _COMPONENT ACPI_BATTERY_COMPONENT + +#define ACPI_BATTERY_UPDATE_TIME 0 + +#define ACPI_BATTERY_NONE_UPDATE 0 +#define ACPI_BATTERY_EASY_UPDATE 1 +#define ACPI_BATTERY_INIT_UPDATE 2 + ACPI_MODULE_NAME("battery"); MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Battery Driver"); MODULE_LICENSE("GPL"); +static unsigned int update_time = ACPI_BATTERY_UPDATE_TIME; + +/* 0 - every time, > 0 - by update_time */ +module_param(update_time, uint, 0644); + extern struct proc_dir_entry *acpi_lock_battery_dir(void); extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir); @@ -76,7 +85,7 @@ }, }; -struct acpi_battery_status { +struct acpi_battery_state { acpi_integer state; acpi_integer present_rate; acpi_integer remaining_capacity; @@ -99,33 +108,111 @@ acpi_string oem_info; }; -struct acpi_battery_flags { - u8 present:1; /* Bay occupied? */ - u8 power_unit:1; /* 0=watts, 1=apms */ - u8 alarm:1; /* _BTP present? */ - u8 reserved:5; +enum acpi_battery_files{ + ACPI_BATTERY_INFO = 0, + ACPI_BATTERY_STATE, + ACPI_BATTERY_ALARM, + ACPI_BATTERY_NUMFILES, }; -struct acpi_battery_trips { - unsigned long warning; - unsigned long low; +struct acpi_battery_flags { + u8 battery_present_prev; + u8 alarm_present; + u8 init_update; + u8 update[ACPI_BATTERY_NUMFILES]; + u8 power_unit; }; struct acpi_battery { - struct acpi_device * device; + struct mutex mutex; + struct acpi_device *device; struct acpi_battery_flags flags; - struct acpi_battery_trips trips; + struct acpi_buffer bif_data; + struct acpi_buffer bst_data; unsigned long alarm; - struct acpi_battery_info *info; + unsigned long update_time[ACPI_BATTERY_NUMFILES]; }; +inline int acpi_battery_present(struct acpi_battery *battery) +{ + return battery->device->status.battery_present; +} +inline char *acpi_battery_power_units(struct acpi_battery *battery) +{ + if (battery->flags.power_unit) + return ACPI_BATTERY_UNITS_AMPS; + else + return ACPI_BATTERY_UNITS_WATTS; +} + +inline acpi_handle acpi_battery_handle(struct acpi_battery *battery) +{ + return battery->device->handle; +} + /* -------------------------------------------------------------------------- Battery Management -------------------------------------------------------------------------- */ -static int -acpi_battery_get_info(struct acpi_battery *battery, - struct acpi_battery_info **bif) +static void acpi_battery_check_result(struct acpi_battery *battery, int result) +{ + if (!battery) + return; + + if (result) { + battery->flags.init_update = 1; + } +} + +static int acpi_battery_extract_package(struct acpi_battery *battery, + union acpi_object *package, + struct acpi_buffer *format, + struct acpi_buffer *data, + char *package_name) +{ + acpi_status status = AE_OK; + struct acpi_buffer data_null = { 0, NULL }; + + status = acpi_extract_package(package, format, &data_null); + if (status != AE_BUFFER_OVERFLOW) { + ACPI_EXCEPTION((AE_INFO, status, "Extracting size %s", + package_name)); + return -ENODEV; + } + + if (data_null.length != data->length) { + kfree(data->pointer); + data->pointer = kzalloc(data_null.length, GFP_KERNEL); + if (!data->pointer) { + ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "kzalloc()")); + return -ENOMEM; + } + data->length = data_null.length; + } + + status = acpi_extract_package(package, format, data); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Extracting %s", + package_name)); + return -ENODEV; + } + + return 0; +} + +static int acpi_battery_get_status(struct acpi_battery *battery) +{ + int result = 0; + + result = acpi_bus_get_status(battery->device); + if (result) { + ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Evaluating _STA")); + return -ENODEV; + } + return result; +} + +static int acpi_battery_get_info(struct acpi_battery *battery) { int result = 0; acpi_status status = 0; @@ -133,16 +220,20 @@ struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BIF), ACPI_BATTERY_FORMAT_BIF }; - struct acpi_buffer data = { 0, NULL }; union acpi_object *package = NULL; + struct acpi_buffer *data = NULL; + struct acpi_battery_info *bif = NULL; + battery->update_time[ACPI_BATTERY_INFO] = get_seconds(); - if (!battery || !bif) - return -EINVAL; + if (!acpi_battery_present(battery)) + return 0; - /* Evalute _BIF */ + /* Evaluate _BIF */ - status = acpi_evaluate_object(battery->device->handle, "_BIF", NULL, &buffer); + status = + acpi_evaluate_object(acpi_battery_handle(battery), "_BIF", NULL, + &buffer); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF")); return -ENODEV; @@ -150,41 +241,29 @@ package = buffer.pointer; - /* Extract Package Data */ - - status = acpi_extract_package(package, &format, &data); - if (status != AE_BUFFER_OVERFLOW) { - ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF")); - result = -ENODEV; - goto end; - } + data = &battery->bif_data; - data.pointer = kzalloc(data.length, GFP_KERNEL); - if (!data.pointer) { - result = -ENOMEM; - goto end; - } + /* Extract Package Data */ - status = acpi_extract_package(package, &format, &data); - if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF")); - kfree(data.pointer); - result = -ENODEV; + result = + acpi_battery_extract_package(battery, package, &format, data, + "_BIF"); + if (result) goto end; - } end: + kfree(buffer.pointer); - if (!result) - (*bif) = data.pointer; + if (!result) { + bif = data->pointer; + battery->flags.power_unit = bif->power_unit; + } return result; } -static int -acpi_battery_get_status(struct acpi_battery *battery, - struct acpi_battery_status **bst) +static int acpi_battery_get_state(struct acpi_battery *battery) { int result = 0; acpi_status status = 0; @@ -192,16 +271,19 @@ struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BST), ACPI_BATTERY_FORMAT_BST }; - struct acpi_buffer data = { 0, NULL }; union acpi_object *package = NULL; + struct acpi_buffer *data = NULL; + battery->update_time[ACPI_BATTERY_STATE] = get_seconds(); - if (!battery || !bst) - return -EINVAL; + if (!acpi_battery_present(battery)) + return 0; - /* Evalute _BST */ + /* Evaluate _BST */ - status = acpi_evaluate_object(battery->device->handle, "_BST", NULL, &buffer); + status = + acpi_evaluate_object(acpi_battery_handle(battery), "_BST", NULL, + &buffer); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST")); return -ENODEV; @@ -209,55 +291,49 @@ package = buffer.pointer; - /* Extract Package Data */ + data = &battery->bst_data; - status = acpi_extract_package(package, &format, &data); - if (status != AE_BUFFER_OVERFLOW) { - ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST")); - result = -ENODEV; - goto end; - } - - data.pointer = kzalloc(data.length, GFP_KERNEL); - if (!data.pointer) { - result = -ENOMEM; - goto end; - } + /* Extract Package Data */ - status = acpi_extract_package(package, &format, &data); - if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST")); - kfree(data.pointer); - result = -ENODEV; + result = + acpi_battery_extract_package(battery, package, &format, data, + "_BST"); + if (result) goto end; - } end: kfree(buffer.pointer); - if (!result) - (*bst) = data.pointer; - return result; } -static int -acpi_battery_set_alarm(struct acpi_battery *battery, unsigned long alarm) +static int acpi_battery_get_alarm(struct acpi_battery *battery) +{ + battery->update_time[ACPI_BATTERY_ALARM] = get_seconds(); + + return 0; +} + +static int acpi_battery_set_alarm(struct acpi_battery *battery, + unsigned long alarm) { acpi_status status = 0; union acpi_object arg0 = { ACPI_TYPE_INTEGER }; struct acpi_object_list arg_list = { 1, &arg0 }; + battery->update_time[ACPI_BATTERY_ALARM] = get_seconds(); - if (!battery) - return -EINVAL; + if (!acpi_battery_present(battery)) + return -ENODEV; - if (!battery->flags.alarm) + if (!battery->flags.alarm_present) return -ENODEV; arg0.integer.value = alarm; - status = acpi_evaluate_object(battery->device->handle, "_BTP", &arg_list, NULL); + status = + acpi_evaluate_object(acpi_battery_handle(battery), "_BTP", + &arg_list, NULL); if (ACPI_FAILURE(status)) return -ENODEV; @@ -268,65 +344,114 @@ return 0; } -static int acpi_battery_check(struct acpi_battery *battery) +static int acpi_battery_init_alarm(struct acpi_battery *battery) { int result = 0; acpi_status status = AE_OK; acpi_handle handle = NULL; - struct acpi_device *device = NULL; - struct acpi_battery_info *bif = NULL; + struct acpi_battery_info *bif = battery->bif_data.pointer; + unsigned long alarm = battery->alarm; + /* See if alarms are supported, and if so, set default */ - if (!battery) - return -EINVAL; + status = acpi_get_handle(acpi_battery_handle(battery), "_BTP", &handle); + if (ACPI_SUCCESS(status)) { + battery->flags.alarm_present = 1; + if (!alarm && bif) { + alarm = bif->design_capacity_warning; + } + result = acpi_battery_set_alarm(battery, alarm); + if (result) + goto end; + } else { + battery->flags.alarm_present = 0; + } - device = battery->device; + end: - result = acpi_bus_get_status(device); - if (result) return result; +} - /* Insertion? */ - - if (!battery->flags.present && device->status.battery_present) { +static int acpi_battery_init_update(struct acpi_battery *battery) +{ + int result = 0; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery inserted\n")); + result = acpi_battery_get_status(battery); + if (result) + return result; - /* Evalute _BIF to get certain static information */ + battery->flags.battery_present_prev = acpi_battery_present(battery); - result = acpi_battery_get_info(battery, &bif); + if (acpi_battery_present(battery)) { + result = acpi_battery_get_info(battery); + if (result) + return result; + result = acpi_battery_get_state(battery); if (result) return result; - battery->flags.power_unit = bif->power_unit; - battery->trips.warning = bif->design_capacity_warning; - battery->trips.low = bif->design_capacity_low; - kfree(bif); + acpi_battery_init_alarm(battery); + } - /* See if alarms are supported, and if so, set default */ + return result; +} - status = acpi_get_handle(battery->device->handle, "_BTP", &handle); - if (ACPI_SUCCESS(status)) { - battery->flags.alarm = 1; - acpi_battery_set_alarm(battery, battery->trips.warning); +static int acpi_battery_update(struct acpi_battery *battery, + int update, int *update_result_ptr) +{ + int result = 0; + int update_result = ACPI_BATTERY_NONE_UPDATE; + + if (!acpi_battery_present(battery)) { + update = 1; + } + + if (battery->flags.init_update) { + result = acpi_battery_init_update(battery); + if (result) + goto end; + update_result = ACPI_BATTERY_INIT_UPDATE; + } else if (update) { + result = acpi_battery_get_status(battery); + if (result) + goto end; + if ((!battery->flags.battery_present_prev & acpi_battery_present(battery)) + || (battery->flags.battery_present_prev & !acpi_battery_present(battery))) { + result = acpi_battery_init_update(battery); + if (result) + goto end; + update_result = ACPI_BATTERY_INIT_UPDATE; + } else { + update_result = ACPI_BATTERY_EASY_UPDATE; } } - /* Removal? */ + end: - else if (battery->flags.present && !device->status.battery_present) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery removed\n")); - } + battery->flags.init_update = (result != 0); - battery->flags.present = device->status.battery_present; + *update_result_ptr = update_result; return result; } -static void acpi_battery_check_present(struct acpi_battery *battery) +static void acpi_battery_notify_update(struct acpi_battery *battery) { - if (!battery->flags.present) { - acpi_battery_check(battery); + acpi_battery_get_status(battery); + + if (battery->flags.init_update) { + return; + } + + if ((!battery->flags.battery_present_prev & + acpi_battery_present(battery)) || + (battery->flags.battery_present_prev & + !acpi_battery_present(battery))) { + battery->flags.init_update = 1; + } else { + battery->flags.update[ACPI_BATTERY_INFO] = 1; + battery->flags.update[ACPI_BATTERY_STATE] = 1; + battery->flags.update[ACPI_BATTERY_ALARM] = 1; } } @@ -335,37 +460,33 @@ -------------------------------------------------------------------------- */ static struct proc_dir_entry *acpi_battery_dir; -static int acpi_battery_read_info(struct seq_file *seq, void *offset) + +static int acpi_battery_print_info(struct seq_file *seq, int result) { - int result = 0; struct acpi_battery *battery = seq->private; struct acpi_battery_info *bif = NULL; char *units = "?"; - - if (!battery) + if (result) goto end; - acpi_battery_check_present(battery); - - if (battery->flags.present) + if (acpi_battery_present(battery)) seq_printf(seq, "present: yes\n"); else { seq_printf(seq, "present: no\n"); goto end; } - /* Battery Info (_BIF) */ - - result = acpi_battery_get_info(battery, &bif); - if (result || !bif) { - seq_printf(seq, "ERROR: Unable to read battery information\n"); + bif = battery->bif_data.pointer; + if (!bif) { + ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BIF buffer is NULL")); + result = -ENODEV; goto end; } - units = - bif-> - power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS; + /* Battery Units */ + + units = acpi_battery_power_units(battery); if (bif->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN) seq_printf(seq, "design capacity: unknown\n"); @@ -396,7 +517,6 @@ else seq_printf(seq, "design voltage: %d mV\n", (u32) bif->design_voltage); - seq_printf(seq, "design capacity warning: %d %sh\n", (u32) bif->design_capacity_warning, units); seq_printf(seq, "design capacity low: %d %sh\n", @@ -411,50 +531,40 @@ seq_printf(seq, "OEM info: %s\n", bif->oem_info); end: - kfree(bif); - return 0; -} + if (result) + seq_printf(seq, "ERROR: Unable to read battery info\n"); -static int acpi_battery_info_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, acpi_battery_read_info, PDE(inode)->data); + return result; } -static int acpi_battery_read_state(struct seq_file *seq, void *offset) +static int acpi_battery_print_state(struct seq_file *seq, int result) { - int result = 0; struct acpi_battery *battery = seq->private; - struct acpi_battery_status *bst = NULL; + struct acpi_battery_state *bst = NULL; char *units = "?"; - - if (!battery) + if (result) goto end; - acpi_battery_check_present(battery); - - if (battery->flags.present) + if (acpi_battery_present(battery)) seq_printf(seq, "present: yes\n"); else { seq_printf(seq, "present: no\n"); goto end; } - /* Battery Units */ - - units = - battery->flags. - power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS; - - /* Battery Status (_BST) */ - - result = acpi_battery_get_status(battery, &bst); - if (result || !bst) { - seq_printf(seq, "ERROR: Unable to read battery status\n"); + bst = battery->bst_data.pointer; + if (!bst) { + ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BST buffer is NULL")); + result = -ENODEV; goto end; } + /* Battery Units */ + + units = acpi_battery_power_units(battery); + if (!(bst->state & 0x04)) seq_printf(seq, "capacity state: ok\n"); else @@ -490,48 +600,43 @@ (u32) bst->present_voltage); end: - kfree(bst); - return 0; -} + if (result) { + seq_printf(seq, "ERROR: Unable to read battery state\n"); + } -static int acpi_battery_state_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, acpi_battery_read_state, PDE(inode)->data); + return result; } -static int acpi_battery_read_alarm(struct seq_file *seq, void *offset) +static int acpi_battery_print_alarm(struct seq_file *seq, int result) { struct acpi_battery *battery = seq->private; char *units = "?"; - - if (!battery) + if (result) goto end; - acpi_battery_check_present(battery); - - if (!battery->flags.present) { + if (!acpi_battery_present(battery)) { seq_printf(seq, "present: no\n"); goto end; } /* Battery Units */ - units = - battery->flags. - power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS; - - /* Battery Alarm */ + units = acpi_battery_power_units(battery); seq_printf(seq, "alarm: "); if (!battery->alarm) seq_printf(seq, "unsupported\n"); else - seq_printf(seq, "%d %sh\n", (u32) battery->alarm, units); + seq_printf(seq, "%lu %sh\n", battery->alarm, units); end: - return 0; + + if (result) + seq_printf(seq, "ERROR: Unable to read battery alarm\n"); + + return result; } static ssize_t @@ -543,27 +648,113 @@ char alarm_string[12] = { '\0' }; struct seq_file *m = file->private_data; struct acpi_battery *battery = m->private; - + int update_result = ACPI_BATTERY_NONE_UPDATE; if (!battery || (count > sizeof(alarm_string) - 1)) return -EINVAL; - acpi_battery_check_present(battery); + mutex_lock(&battery->mutex); - if (!battery->flags.present) - return -ENODEV; + result = acpi_battery_update(battery, 1, &update_result); + if (result) { + result = -ENODEV; + goto end; + } - if (copy_from_user(alarm_string, buffer, count)) - return -EFAULT; + if (!acpi_battery_present(battery)) { + result = -ENODEV; + goto end; + } + + if (copy_from_user(alarm_string, buffer, count)) { + result = -EFAULT; + goto end; + } alarm_string[count] = '\0'; result = acpi_battery_set_alarm(battery, simple_strtoul(alarm_string, NULL, 0)); if (result) + goto end; + + end: + + acpi_battery_check_result(battery, result); + + if (!result) + result = count; + + mutex_unlock(&battery->mutex); + return result; +} + +typedef int(*print_func)(struct seq_file *seq, int result); +typedef int(*get_func)(struct acpi_battery *battery); + +static struct acpi_read_mux { + print_func print; + get_func get; +} acpi_read_funcs[ACPI_BATTERY_NUMFILES] = { + {.get = acpi_battery_get_info, .print = acpi_battery_print_info}, + {.get = acpi_battery_get_state, .print = acpi_battery_print_state}, + {.get = acpi_battery_get_alarm, .print = acpi_battery_print_alarm}, +}; + +static int acpi_battery_read(int fid, struct seq_file *seq) +{ + struct acpi_battery *battery = seq->private; + int result = 0; + int update_result = ACPI_BATTERY_NONE_UPDATE; + int update = 0; + + mutex_lock(&battery->mutex); + + update = (get_seconds() - battery->update_time[fid] >= update_time); + update = (update | battery->flags.update[fid]); + + result = acpi_battery_update(battery, update, &update_result); + if (result) + goto end; + + if (update_result == ACPI_BATTERY_EASY_UPDATE) { + result = acpi_read_funcs[fid].get(battery); + if (result) + goto end; + } - return count; + end: + result = acpi_read_funcs[fid].print(seq, result); + acpi_battery_check_result(battery, result); + battery->flags.update[fid] = result; + mutex_unlock(&battery->mutex); + return result; +} + +static int acpi_battery_read_info(struct seq_file *seq, void *offset) +{ + return acpi_battery_read(ACPI_BATTERY_INFO, seq); +} + +static int acpi_battery_read_state(struct seq_file *seq, void *offset) +{ + return acpi_battery_read(ACPI_BATTERY_STATE, seq); +} + +static int acpi_battery_read_alarm(struct seq_file *seq, void *offset) +{ + return acpi_battery_read(ACPI_BATTERY_ALARM, seq); +} + +static int acpi_battery_info_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, acpi_battery_read_info, PDE(inode)->data); +} + +static int acpi_battery_state_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, acpi_battery_read_state, PDE(inode)->data); } static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file) @@ -571,35 +762,51 @@ return single_open(file, acpi_battery_read_alarm, PDE(inode)->data); } -static const struct file_operations acpi_battery_info_ops = { +static struct battery_file { + struct file_operations ops; + mode_t mode; + char *name; +} acpi_battery_file[] = { + { + .name = "info", + .mode = S_IRUGO, + .ops = { .open = acpi_battery_info_open_fs, .read = seq_read, .llseek = seq_lseek, .release = single_release, .owner = THIS_MODULE, -}; - -static const struct file_operations acpi_battery_state_ops = { + }, + }, + { + .name = "state", + .mode = S_IRUGO, + .ops = { .open = acpi_battery_state_open_fs, .read = seq_read, .llseek = seq_lseek, .release = single_release, .owner = THIS_MODULE, -}; - -static const struct file_operations acpi_battery_alarm_ops = { + }, + }, + { + .name = "alarm", + .mode = S_IFREG | S_IRUGO | S_IWUSR, + .ops = { .open = acpi_battery_alarm_open_fs, .read = seq_read, .write = acpi_battery_write_alarm, .llseek = seq_lseek, .release = single_release, .owner = THIS_MODULE, + }, + }, }; static int acpi_battery_add_fs(struct acpi_device *device) { struct proc_dir_entry *entry = NULL; - + int i; if (!acpi_device_dir(device)) { acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device), @@ -609,38 +816,16 @@ acpi_device_dir(device)->owner = THIS_MODULE; } - /* 'info' [R] */ - entry = create_proc_entry(ACPI_BATTERY_FILE_INFO, - S_IRUGO, acpi_device_dir(device)); + for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) { + entry = create_proc_entry(acpi_battery_file[i].name, + acpi_battery_file[i].mode, acpi_device_dir(device)); if (!entry) return -ENODEV; else { - entry->proc_fops = &acpi_battery_info_ops; + entry->proc_fops = &acpi_battery_file[i].ops; entry->data = acpi_driver_data(device); entry->owner = THIS_MODULE; } - - /* 'status' [R] */ - entry = create_proc_entry(ACPI_BATTERY_FILE_STATUS, - S_IRUGO, acpi_device_dir(device)); - if (!entry) - return -ENODEV; - else { - entry->proc_fops = &acpi_battery_state_ops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - - /* 'alarm' [R/W] */ - entry = create_proc_entry(ACPI_BATTERY_FILE_ALARM, - S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); - if (!entry) - return -ENODEV; - else { - entry->proc_fops = &acpi_battery_alarm_ops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; } return 0; @@ -648,15 +833,12 @@ static int acpi_battery_remove_fs(struct acpi_device *device) { - + int i; if (acpi_device_dir(device)) { - remove_proc_entry(ACPI_BATTERY_FILE_ALARM, + for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) { + remove_proc_entry(acpi_battery_file[i].name, acpi_device_dir(device)); - remove_proc_entry(ACPI_BATTERY_FILE_STATUS, - acpi_device_dir(device)); - remove_proc_entry(ACPI_BATTERY_FILE_INFO, - acpi_device_dir(device)); - + } remove_proc_entry(acpi_device_bid(device), acpi_battery_dir); acpi_device_dir(device) = NULL; } @@ -673,7 +855,6 @@ struct acpi_battery *battery = data; struct acpi_device *device = NULL; - if (!battery) return; @@ -684,8 +865,10 @@ case ACPI_BATTERY_NOTIFY_INFO: case ACPI_NOTIFY_BUS_CHECK: case ACPI_NOTIFY_DEVICE_CHECK: - acpi_battery_check(battery); - acpi_bus_generate_event(device, event, battery->flags.present); + device = battery->device; + acpi_battery_notify_update(battery); + acpi_bus_generate_event(device, event, + acpi_battery_present(battery)); break; default: ACPI_DEBUG_PRINT((ACPI_DB_INFO, @@ -702,7 +885,6 @@ acpi_status status = 0; struct acpi_battery *battery = NULL; - if (!device) return -EINVAL; @@ -710,15 +892,21 @@ if (!battery) return -ENOMEM; + mutex_init(&battery->mutex); + + mutex_lock(&battery->mutex); + battery->device = device; strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS); acpi_driver_data(device) = battery; - result = acpi_battery_check(battery); + result = acpi_battery_get_status(battery); if (result) goto end; + battery->flags.init_update = 1; + result = acpi_battery_add_fs(device); if (result) goto end; @@ -727,6 +915,7 @@ ACPI_ALL_NOTIFY, acpi_battery_notify, battery); if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Installing notify handler")); result = -ENODEV; goto end; } @@ -736,11 +925,14 @@ device->status.battery_present ? "present" : "absent"); end: + if (result) { acpi_battery_remove_fs(device); kfree(battery); } + mutex_unlock(&battery->mutex); + return result; } @@ -749,18 +941,27 @@ acpi_status status = 0; struct acpi_battery *battery = NULL; - if (!device || !acpi_driver_data(device)) return -EINVAL; battery = acpi_driver_data(device); + mutex_lock(&battery->mutex); + status = acpi_remove_notify_handler(device->handle, ACPI_ALL_NOTIFY, acpi_battery_notify); acpi_battery_remove_fs(device); + kfree(battery->bif_data.pointer); + + kfree(battery->bst_data.pointer); + + mutex_unlock(&battery->mutex); + + mutex_destroy(&battery->mutex); + kfree(battery); return 0; @@ -775,7 +976,10 @@ return -EINVAL; battery = device->driver_data; - return acpi_battery_check(battery); + + battery->flags.init_update = 1; + + return 0; } static int __init acpi_battery_init(void) @@ -800,7 +1004,6 @@ static void __exit acpi_battery_exit(void) { - acpi_bus_unregister_driver(&acpi_battery_driver); acpi_unlock_battery_dir(acpi_battery_dir); diff -Nurb linux-2.6.22-570/drivers/acpi/bay.c linux-2.6.22-591/drivers/acpi/bay.c --- linux-2.6.22-570/drivers/acpi/bay.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/bay.c 2007-12-21 15:36:11.000000000 -0500 @@ -288,6 +288,11 @@ new_bay->pdev = pdev; platform_set_drvdata(pdev, new_bay); + /* + * we want the bay driver to be able to send uevents + */ + pdev->dev.uevent_suppress = 0; + if (acpi_bay_add_fs(new_bay)) { platform_device_unregister(new_bay->pdev); goto bay_add_err; @@ -328,18 +333,12 @@ { struct bay *bay_dev = (struct bay *)data; struct device *dev = &bay_dev->pdev->dev; + char event_string[12]; + char *envp[] = { event_string, NULL }; bay_dprintk(handle, "Bay event"); - - switch(event) { - case ACPI_NOTIFY_BUS_CHECK: - case ACPI_NOTIFY_DEVICE_CHECK: - case ACPI_NOTIFY_EJECT_REQUEST: - kobject_uevent(&dev->kobj, KOBJ_CHANGE); - break; - default: - printk(KERN_ERR PREFIX "Bay: unknown event %d\n", event); - } + sprintf(event_string, "BAY_EVENT=%d\n", event); + kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); } static acpi_status diff -Nurb linux-2.6.22-570/drivers/acpi/dock.c linux-2.6.22-591/drivers/acpi/dock.c --- linux-2.6.22-570/drivers/acpi/dock.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/acpi/dock.c 2007-12-21 15:36:11.000000000 -0500 @@ -40,8 +40,15 @@ MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION); MODULE_LICENSE("GPL"); +static int immediate_undock = 1; +module_param(immediate_undock, bool, 0644); +MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to " + "undock immediately when the undock button is pressed, 0 will cause" + " the driver to wait for userspace to write the undock sysfs file " + " before undocking"); + static struct atomic_notifier_head dock_notifier_list; -static struct platform_device dock_device; +static struct platform_device *dock_device; static char dock_device_name[] = "dock"; struct dock_station { @@ -63,6 +70,7 @@ }; #define DOCK_DOCKING 0x00000001 +#define DOCK_UNDOCKING 0x00000002 #define DOCK_EVENT 3 #define UNDOCK_EVENT 2 @@ -327,12 +335,20 @@ static void dock_event(struct dock_station *ds, u32 event, int num) { - struct device *dev = &dock_device.dev; + struct device *dev = &dock_device->dev; + char event_string[7]; + char *envp[] = { event_string, NULL }; + + if (num == UNDOCK_EVENT) + sprintf(event_string, "UNDOCK"); + else + sprintf(event_string, "DOCK"); + /* * Indicate that the status of the dock station has * changed. */ - kobject_uevent(&dev->kobj, KOBJ_CHANGE); + kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); } /** @@ -420,6 +436,16 @@ ds->last_dock_time = jiffies; } +static inline void begin_undock(struct dock_station *ds) +{ + ds->flags |= DOCK_UNDOCKING; +} + +static inline void complete_undock(struct dock_station *ds) +{ + ds->flags &= ~(DOCK_UNDOCKING); +} + /** * dock_in_progress - see if we are in the middle of handling a dock event * @ds: the dock station @@ -550,7 +576,7 @@ printk(KERN_ERR PREFIX "Unable to undock!\n"); return -EBUSY; } - + complete_undock(ds); return 0; } @@ -594,7 +620,11 @@ * to the driver who wish to hotplug. */ case ACPI_NOTIFY_EJECT_REQUEST: + begin_undock(ds); + if (immediate_undock) handle_eject_request(ds, event); + else + dock_event(ds, event, UNDOCK_EVENT); break; default: printk(KERN_ERR PREFIX "Unknown dock event %d\n", event); @@ -653,6 +683,17 @@ DEVICE_ATTR(docked, S_IRUGO, show_docked, NULL); /* + * show_flags - read method for flags file in sysfs + */ +static ssize_t show_flags(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", dock_station->flags); + +} +DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL); + +/* * write_undock - write method for "undock" file in sysfs */ static ssize_t write_undock(struct device *dev, struct device_attribute *attr, @@ -675,16 +716,15 @@ struct device_attribute *attr, char *buf) { unsigned long lbuf; - acpi_status status = acpi_evaluate_integer(dock_station->handle, "_UID", NULL, &lbuf); - if(ACPI_FAILURE(status)) { + acpi_status status = acpi_evaluate_integer(dock_station->handle, + "_UID", NULL, &lbuf); + if (ACPI_FAILURE(status)) return 0; - } + return snprintf(buf, PAGE_SIZE, "%lx\n", lbuf); } DEVICE_ATTR(uid, S_IRUGO, show_dock_uid, NULL); - - /** * dock_add - add a new dock station * @handle: the dock station handle diff -Nurb linux-2.6.22-570/drivers/acpi/ec.c linux-2.6.22-591/drivers/acpi/ec.c --- linux-2.6.22-570/drivers/acpi/ec.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/ec.c 2007-12-21 15:36:11.000000000 -0500 @@ -34,25 +34,26 @@ #include #include #include +#include #include #include #include #include -#define _COMPONENT ACPI_EC_COMPONENT -ACPI_MODULE_NAME("ec"); -#define ACPI_EC_COMPONENT 0x00100000 #define ACPI_EC_CLASS "embedded_controller" #define ACPI_EC_HID "PNP0C09" #define ACPI_EC_DEVICE_NAME "Embedded Controller" #define ACPI_EC_FILE_INFO "info" + #undef PREFIX #define PREFIX "ACPI: EC: " + /* EC status register */ #define ACPI_EC_FLAG_OBF 0x01 /* Output buffer full */ #define ACPI_EC_FLAG_IBF 0x02 /* Input buffer full */ #define ACPI_EC_FLAG_BURST 0x10 /* burst mode */ #define ACPI_EC_FLAG_SCI 0x20 /* EC-SCI occurred */ + /* EC commands */ enum ec_command { ACPI_EC_COMMAND_READ = 0x80, @@ -61,6 +62,7 @@ ACPI_EC_BURST_DISABLE = 0x83, ACPI_EC_COMMAND_QUERY = 0x84, }; + /* EC events */ enum ec_event { ACPI_EC_EVENT_OBF_1 = 1, /* Output buffer full */ @@ -94,6 +96,16 @@ /* If we find an EC via the ECDT, we need to keep a ptr to its context */ /* External interfaces use first EC only, so remember */ +typedef int (*acpi_ec_query_func) (void *data); + +struct acpi_ec_query_handler { + struct list_head node; + acpi_ec_query_func func; + acpi_handle handle; + void *data; + u8 query_bit; +}; + static struct acpi_ec { acpi_handle handle; unsigned long gpe; @@ -104,6 +116,7 @@ atomic_t query_pending; atomic_t event_count; wait_queue_head_t wait; + struct list_head list; } *boot_ec, *first_ec; /* -------------------------------------------------------------------------- @@ -245,7 +258,7 @@ status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0, 0, 0); if (status) { - printk(KERN_DEBUG PREFIX + printk(KERN_ERR PREFIX "input buffer is not empty, aborting transaction\n"); goto end; } @@ -394,21 +407,67 @@ /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ +int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, + acpi_handle handle, acpi_ec_query_func func, + void *data) +{ + struct acpi_ec_query_handler *handler = + kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL); + if (!handler) + return -ENOMEM; + + handler->query_bit = query_bit; + handler->handle = handle; + handler->func = func; + handler->data = data; + mutex_lock(&ec->lock); + list_add_tail(&handler->node, &ec->list); + mutex_unlock(&ec->lock); + return 0; +} + +EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler); + +void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit) +{ + struct acpi_ec_query_handler *handler; + mutex_lock(&ec->lock); + list_for_each_entry(handler, &ec->list, node) { + if (query_bit == handler->query_bit) { + list_del(&handler->node); + kfree(handler); + break; + } + } + mutex_unlock(&ec->lock); +} + +EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler); static void acpi_ec_gpe_query(void *ec_cxt) { struct acpi_ec *ec = ec_cxt; u8 value = 0; - char object_name[8]; + struct acpi_ec_query_handler *handler, copy; if (!ec || acpi_ec_query(ec, &value)) return; - - snprintf(object_name, 8, "_Q%2.2X", value); - - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name)); - - acpi_evaluate_object(ec->handle, object_name, NULL, NULL); + mutex_lock(&ec->lock); + list_for_each_entry(handler, &ec->list, node) { + if (value == handler->query_bit) { + /* have custom handler for this bit */ + memcpy(©, handler, sizeof(copy)); + mutex_unlock(&ec->lock); + if (copy.func) { + copy.func(copy.data); + } else if (copy.handle) { + acpi_evaluate_object(copy.handle, NULL, NULL, NULL); + } + return; + } + } + mutex_unlock(&ec->lock); + printk(KERN_ERR PREFIX "Handler for query 0x%x is not found!\n", value); } static u32 acpi_ec_gpe_handler(void *data) @@ -427,8 +486,7 @@ if ((value & ACPI_EC_FLAG_SCI) && !atomic_read(&ec->query_pending)) { atomic_set(&ec->query_pending, 1); status = - acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, - ec); + acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, ec); } return status == AE_OK ? @@ -454,57 +512,35 @@ } static acpi_status -acpi_ec_space_handler(u32 function, - acpi_physical_address address, - u32 bit_width, - acpi_integer * value, +acpi_ec_space_handler(u32 function, acpi_physical_address address, + u32 bits, acpi_integer *value, void *handler_context, void *region_context) { - int result = 0; struct acpi_ec *ec = handler_context; - u64 temp = *value; - acpi_integer f_v = 0; - int i = 0; + int result = 0, i = 0; + u8 temp = 0; if ((address > 0xFF) || !value || !handler_context) return AE_BAD_PARAMETER; - if (bit_width != 8 && acpi_strict) { + if (function != ACPI_READ && function != ACPI_WRITE) return AE_BAD_PARAMETER; - } - next_byte: - switch (function) { - case ACPI_READ: - temp = 0; - result = acpi_ec_read(ec, (u8) address, (u8 *) & temp); - break; - case ACPI_WRITE: - result = acpi_ec_write(ec, (u8) address, (u8) temp); - break; - default: - result = -EINVAL; - goto out; - break; - } - - bit_width -= 8; - if (bit_width) { - if (function == ACPI_READ) - f_v |= temp << 8 * i; - if (function == ACPI_WRITE) - temp >>= 8; - i++; - address++; - goto next_byte; - } + if (bits != 8 && acpi_strict) + return AE_BAD_PARAMETER; + while (bits - i > 0) { if (function == ACPI_READ) { - f_v |= temp << 8 * i; - *value = f_v; + result = acpi_ec_read(ec, address, &temp); + (*value) |= ((acpi_integer)temp) << i; + } else { + temp = 0xff & ((*value) >> i); + result = acpi_ec_write(ec, address, temp); + } + i += 8; + ++address; } - out: switch (result) { case -EINVAL: return AE_BAD_PARAMETER; @@ -597,9 +633,6 @@ static acpi_status ec_parse_io_ports(struct acpi_resource *resource, void *context); -static acpi_status -ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval); - static struct acpi_ec *make_acpi_ec(void) { struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL); @@ -610,13 +643,52 @@ atomic_set(&ec->event_count, 1); mutex_init(&ec->lock); init_waitqueue_head(&ec->wait); + INIT_LIST_HEAD(&ec->list); return ec; } +static acpi_status +acpi_ec_register_query_methods(acpi_handle handle, u32 level, + void *context, void **return_value) +{ + struct acpi_namespace_node *node = handle; + struct acpi_ec *ec = context; + int value = 0; + if (sscanf(node->name.ascii, "_Q%x", &value) == 1) { + acpi_ec_add_query_handler(ec, value, handle, NULL, NULL); + } + return AE_OK; +} + +static int ec_parse_device(struct acpi_ec *ec, acpi_handle handle) +{ + if (ACPI_FAILURE(acpi_walk_resources(handle, METHOD_NAME__CRS, + ec_parse_io_ports, ec))) + return -EINVAL; + + /* Get GPE bit assignment (EC events). */ + /* TODO: Add support for _GPE returning a package */ + if (ACPI_FAILURE(acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe))) + return -EINVAL; + + /* Use the global lock for all EC transactions? */ + acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock); + + /* Find and register all query methods */ + acpi_walk_namespace(ACPI_TYPE_METHOD, handle, 1, + acpi_ec_register_query_methods, ec, NULL); + + ec->handle = handle; + + printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx", + ec->gpe, ec->command_addr, ec->data_addr); + + return 0; +} + static int acpi_ec_add(struct acpi_device *device) { - acpi_status status = AE_OK; struct acpi_ec *ec = NULL; if (!device) @@ -629,8 +701,7 @@ if (!ec) return -ENOMEM; - status = ec_parse_device(device->handle, 0, ec, NULL); - if (status != AE_CTRL_TERMINATE) { + if (ec_parse_device(ec, device->handle)) { kfree(ec); return -EINVAL; } @@ -641,6 +712,8 @@ /* We might have incorrect info for GL at boot time */ mutex_lock(&boot_ec->lock); boot_ec->global_lock = ec->global_lock; + /* Copy handlers from new ec into boot ec */ + list_splice(&ec->list, &boot_ec->list); mutex_unlock(&boot_ec->lock); kfree(ec); ec = boot_ec; @@ -651,22 +724,24 @@ acpi_driver_data(device) = ec; acpi_ec_add_fs(device); - - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s [%s] (gpe %d) interrupt mode.", - acpi_device_name(device), acpi_device_bid(device), - (u32) ec->gpe)); - return 0; } static int acpi_ec_remove(struct acpi_device *device, int type) { struct acpi_ec *ec; + struct acpi_ec_query_handler *handler; if (!device) return -EINVAL; ec = acpi_driver_data(device); + mutex_lock(&ec->lock); + list_for_each_entry(handler, &ec->list, node) { + list_del(&handler->node); + kfree(handler); + } + mutex_unlock(&ec->lock); acpi_ec_remove_fs(device); acpi_driver_data(device) = NULL; if (ec == first_ec) @@ -722,15 +797,13 @@ return -ENODEV; } - /* EC is fully operational, allow queries */ - atomic_set(&ec->query_pending, 0); - return 0; } static int acpi_ec_start(struct acpi_device *device) { struct acpi_ec *ec; + int ret = 0; if (!device) return -EINVAL; @@ -740,14 +813,14 @@ if (!ec) return -EINVAL; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "gpe=0x%02lx, ports=0x%2lx,0x%2lx", - ec->gpe, ec->command_addr, ec->data_addr)); - /* Boot EC is already working */ - if (ec == boot_ec) - return 0; + if (ec != boot_ec) + ret = ec_install_handlers(ec); - return ec_install_handlers(ec); + /* EC is fully operational, allow queries */ + atomic_set(&ec->query_pending, 0); + + return ret; } static int acpi_ec_stop(struct acpi_device *device, int type) @@ -779,34 +852,6 @@ return 0; } -static acpi_status -ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval) -{ - acpi_status status; - - struct acpi_ec *ec = context; - status = acpi_walk_resources(handle, METHOD_NAME__CRS, - ec_parse_io_ports, ec); - if (ACPI_FAILURE(status)) - return status; - - /* Get GPE bit assignment (EC events). */ - /* TODO: Add support for _GPE returning a package */ - status = acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe); - if (ACPI_FAILURE(status)) - return status; - - /* Use the global lock for all EC transactions? */ - acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock); - - ec->handle = handle; - - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "GPE=0x%02lx, ports=0x%2lx, 0x%2lx", - ec->gpe, ec->command_addr, ec->data_addr)); - - return AE_CTRL_TERMINATE; -} - int __init acpi_ec_ecdt_probe(void) { int ret; @@ -825,7 +870,7 @@ if (ACPI_FAILURE(status)) goto error; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found ECDT")); + printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n"); boot_ec->command_addr = ecdt_ptr->control.address; boot_ec->data_addr = ecdt_ptr->data.address; diff -Nurb linux-2.6.22-570/drivers/acpi/osl.c linux-2.6.22-591/drivers/acpi/osl.c --- linux-2.6.22-570/drivers/acpi/osl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/osl.c 2007-12-21 15:36:11.000000000 -0500 @@ -77,13 +77,7 @@ #define OSI_STRING_LENGTH_MAX 64 /* arbitrary */ static char osi_additional_string[OSI_STRING_LENGTH_MAX]; -#define OSI_LINUX_ENABLED -#ifdef OSI_LINUX_ENABLED -int osi_linux = 1; /* enable _OSI(Linux) by default */ -#else int osi_linux; /* disable _OSI(Linux) by default */ -#endif - #ifdef CONFIG_DMI static struct __initdata dmi_system_id acpi_osl_dmi_table[]; @@ -1056,6 +1050,17 @@ EXPORT_SYMBOL(max_cstate); +void (*acpi_do_set_cstate_limit)(void); +EXPORT_SYMBOL(acpi_do_set_cstate_limit); + +void acpi_set_cstate_limit(unsigned int new_limit) +{ + max_cstate = new_limit; + if (acpi_do_set_cstate_limit) + acpi_do_set_cstate_limit(); +} +EXPORT_SYMBOL(acpi_set_cstate_limit); + /* * Acquire a spinlock. * @@ -1183,17 +1188,10 @@ if (!strcmp("Linux", interface)) { printk(KERN_WARNING PREFIX "System BIOS is requesting _OSI(Linux)\n"); -#ifdef OSI_LINUX_ENABLED - printk(KERN_WARNING PREFIX - "Please test with \"acpi_osi=!Linux\"\n" - "Please send dmidecode " - "to linux-acpi@vger.kernel.org\n"); -#else printk(KERN_WARNING PREFIX "If \"acpi_osi=Linux\" works better,\n" "Please send dmidecode " "to linux-acpi@vger.kernel.org\n"); -#endif if(osi_linux) return AE_OK; } @@ -1227,36 +1225,14 @@ } #ifdef CONFIG_DMI -#ifdef OSI_LINUX_ENABLED -static int dmi_osi_not_linux(struct dmi_system_id *d) -{ - printk(KERN_NOTICE "%s detected: requires not _OSI(Linux)\n", d->ident); - enable_osi_linux(0); - return 0; -} -#else static int dmi_osi_linux(struct dmi_system_id *d) { - printk(KERN_NOTICE "%s detected: requires _OSI(Linux)\n", d->ident); + printk(KERN_NOTICE "%s detected: enabling _OSI(Linux)\n", d->ident); enable_osi_linux(1); return 0; } -#endif static struct dmi_system_id acpi_osl_dmi_table[] __initdata = { -#ifdef OSI_LINUX_ENABLED - /* - * Boxes that need NOT _OSI(Linux) - */ - { - .callback = dmi_osi_not_linux, - .ident = "Toshiba Satellite P100", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_BOARD_NAME, "Satellite P100"), - }, - }, -#else /* * Boxes that need _OSI(Linux) */ @@ -1268,7 +1244,6 @@ DMI_MATCH(DMI_BOARD_NAME, "MPAD-MSAE Customer Reference Boards"), }, }, -#endif {} }; #endif /* CONFIG_DMI */ diff -Nurb linux-2.6.22-570/drivers/acpi/processor_core.c linux-2.6.22-591/drivers/acpi/processor_core.c --- linux-2.6.22-570/drivers/acpi/processor_core.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/acpi/processor_core.c 2007-12-21 15:36:11.000000000 -0500 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,7 @@ #define ACPI_PROCESSOR_FILE_LIMIT "limit" #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 +#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 #define ACPI_PROCESSOR_LIMIT_USER 0 #define ACPI_PROCESSOR_LIMIT_THERMAL 1 @@ -84,6 +86,8 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data); static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); static int acpi_processor_handle_eject(struct acpi_processor *pr); +extern int acpi_processor_tstate_has_changed(struct acpi_processor *pr); + static struct acpi_driver acpi_processor_driver = { .name = "processor", @@ -701,6 +705,9 @@ acpi_processor_cst_has_changed(pr); acpi_bus_generate_event(device, event, 0); break; + case ACPI_PROCESSOR_NOTIFY_THROTTLING: + acpi_processor_tstate_has_changed(pr); + acpi_bus_generate_event(device, event, 0); default: ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unsupported event [0x%x]\n", event)); @@ -1024,11 +1031,15 @@ acpi_processor_ppc_init(); + cpuidle_register_driver(&acpi_idle_driver); + acpi_do_set_cstate_limit = acpi_max_cstate_changed; return 0; } static void __exit acpi_processor_exit(void) { + acpi_do_set_cstate_limit = NULL; + cpuidle_unregister_driver(&acpi_idle_driver); acpi_processor_ppc_exit(); diff -Nurb linux-2.6.22-570/drivers/acpi/processor_idle.c linux-2.6.22-591/drivers/acpi/processor_idle.c --- linux-2.6.22-570/drivers/acpi/processor_idle.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/acpi/processor_idle.c 2007-12-21 15:36:11.000000000 -0500 @@ -40,6 +40,7 @@ #include /* need_resched() */ #include #include +#include /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -62,25 +63,34 @@ #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_idle"); #define ACPI_PROCESSOR_FILE_POWER "power" -#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) -#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ -#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ -static void (*pm_idle_save) (void) __read_mostly; -module_param(max_cstate, uint, 0644); +#define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) +#define C2_OVERHEAD 1 /* 1us */ +#define C3_OVERHEAD 1 /* 1us */ + +void acpi_max_cstate_changed(void) +{ + /* Driver will reset devices' max cstate limit */ + cpuidle_force_redetect_devices(&acpi_idle_driver); +} + +static int change_max_cstate(const char *val, struct kernel_param *kp) +{ + int max; + + max = simple_strtol(val, NULL, 0); + if (!max) + return -EINVAL; + max_cstate = max; + if (acpi_do_set_cstate_limit) + acpi_do_set_cstate_limit(); + return 0; +} + +module_param_call(max_cstate, change_max_cstate, param_get_uint, &max_cstate, 0644); static unsigned int nocst __read_mostly; module_param(nocst, uint, 0000); -/* - * bm_history -- bit-mask with a bit per jiffy of bus-master activity - * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms - * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms - * 100 HZ: 0x0000000F: 4 jiffies = 40ms - * reduce history for more aggressive entry into C3 - */ -static unsigned int bm_history __read_mostly = - (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); -module_param(bm_history, uint, 0644); /* -------------------------------------------------------------------------- Power Management -------------------------------------------------------------------------- */ @@ -166,88 +176,6 @@ {}, }; -static inline u32 ticks_elapsed(u32 t1, u32 t2) -{ - if (t2 >= t1) - return (t2 - t1); - else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) - return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); - else - return ((0xFFFFFFFF - t1) + t2); -} - -static void -acpi_processor_power_activate(struct acpi_processor *pr, - struct acpi_processor_cx *new) -{ - struct acpi_processor_cx *old; - - if (!pr || !new) - return; - - old = pr->power.state; - - if (old) - old->promotion.count = 0; - new->demotion.count = 0; - - /* Cleanup from old state. */ - if (old) { - switch (old->type) { - case ACPI_STATE_C3: - /* Disable bus master reload */ - if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - break; - } - } - - /* Prepare to use new state. */ - switch (new->type) { - case ACPI_STATE_C3: - /* Enable bus master reload */ - if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); - break; - } - - pr->power.state = new; - - return; -} - -static void acpi_safe_halt(void) -{ - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - if (!need_resched()) - safe_halt(); - current_thread_info()->status |= TS_POLLING; -} - -static atomic_t c3_cpu_count; - -/* Common C-state entry for C2, C3, .. */ -static void acpi_cstate_enter(struct acpi_processor_cx *cstate) -{ - if (cstate->space_id == ACPI_CSTATE_FFH) { - /* Call into architectural FFH based C-state */ - acpi_processor_ffh_cstate_enter(cstate); - } else { - int unused; - /* IO port based C-state */ - inb(cstate->address); - /* Dummy wait op - must do something useless after P_LVL2 read - because chipsets cannot guarantee that STPCLK# signal - gets asserted in time to freeze execution properly. */ - unused = inl(acpi_gbl_FADT.xpm_timer_block.address); - } -} - #ifdef ARCH_APICTIMER_STOPS_ON_C3 /* @@ -341,6 +269,7 @@ return 0; } +<<<<<<< HEAD/drivers/acpi/processor_idle.c static void acpi_processor_idle(void) { struct acpi_processor *pr = NULL; @@ -712,6 +641,8 @@ return 0; } +======= +>>>>>>> /drivers/acpi/processor_idle.c static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) { @@ -929,7 +860,7 @@ * Normalize the C2 latency to expidite policy */ cx->valid = 1; - cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); + cx->latency_ticks = cx->latency; return; } @@ -1003,7 +934,7 @@ * use this in our C3 policy */ cx->valid = 1; - cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); + cx->latency_ticks = cx->latency; return; } @@ -1069,18 +1000,6 @@ pr->power.count = acpi_processor_power_verify(pr); /* - * Set Default Policy - * ------------------ - * Now that we know which states are supported, set the default - * policy. Note that this policy can be changed dynamically - * (e.g. encourage deeper sleeps to conserve battery life when - * not on AC). - */ - result = acpi_processor_set_power_policy(pr); - if (result) - return result; - - /* * if one state of type C2 or C3 is available, mark this * CPU as being "idle manageable" */ @@ -1097,9 +1016,6 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) { - int result = 0; - - if (!pr) return -EINVAL; @@ -1110,16 +1026,9 @@ if (!pr->flags.power_setup_done) return -ENODEV; - /* Fall back to the default idle loop */ - pm_idle = pm_idle_save; - synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ - - pr->flags.power = 0; - result = acpi_processor_get_power_info(pr); - if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) - pm_idle = acpi_processor_idle; - - return result; + acpi_processor_get_power_info(pr); + return cpuidle_force_redetect(per_cpu(cpuidle_devices, pr->id), + &acpi_idle_driver); } /* proc interface */ @@ -1205,30 +1114,6 @@ .release = single_release, }; -#ifdef CONFIG_SMP -static void smp_callback(void *v) -{ - /* we already woke the CPU up, nothing more to do */ -} - -/* - * This function gets called when a part of the kernel has a new latency - * requirement. This means we need to get all processors out of their C-state, - * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that - * wakes them all right up. - */ -static int acpi_processor_latency_notify(struct notifier_block *b, - unsigned long l, void *v) -{ - smp_call_function(smp_callback, NULL, 0, 1); - return NOTIFY_OK; -} - -static struct notifier_block acpi_processor_latency_notifier = { - .notifier_call = acpi_processor_latency_notify, -}; -#endif - int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { @@ -1245,9 +1130,6 @@ "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; -#ifdef CONFIG_SMP - register_latency_notifier(&acpi_processor_latency_notifier); -#endif } if (!pr) @@ -1264,6 +1146,7 @@ acpi_processor_get_power_info(pr); + /* * Install the idle handler if processor power management is supported. * Note that we use previously set idle handler will be used on @@ -1276,11 +1159,6 @@ printk(" C%d[C%d]", i, pr->power.states[i].type); printk(")\n"); - - if (pr->id == 0) { - pm_idle_save = pm_idle; - pm_idle = acpi_processor_idle; - } } /* 'power' [R] */ @@ -1308,21 +1186,332 @@ if (acpi_device_dir(device)) remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, acpi_device_dir(device)); + return 0; +} + +/** + * ticks_elapsed - a helper function that determines how many ticks (in US) + * have elapsed between two PM Timer timestamps + * @t1: the start time + * @t2: the end time + */ +static inline u32 ticks_elapsed(u32 t1, u32 t2) +{ + if (t2 >= t1) + return PM_TIMER_TICKS_TO_US(t2 - t1); + else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) + return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); + else + return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2); +} - /* Unregister the idle handler when processor #0 is removed. */ - if (pr->id == 0) { - pm_idle = pm_idle_save; +/** + * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state + * @pr: the processor + * @target: the new target state + */ +static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, + struct acpi_processor_cx *target) +{ + if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) { + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); + pr->flags.bm_rld_set = 0; + } + if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) { + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); + pr->flags.bm_rld_set = 1; + } +} + +/** + * acpi_idle_do_entry - a helper function that does C2 and C3 type entry + * @cx: cstate data + */ +static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) +{ + if (cx->space_id == ACPI_CSTATE_FFH) { + /* Call into architectural FFH based C-state */ + acpi_processor_ffh_cstate_enter(cx); + } else { + int unused; + /* IO port based C-state */ + inb(cx->address); + /* Dummy wait op - must do something useless after P_LVL2 read + because chipsets cannot guarantee that STPCLK# signal + gets asserted in time to freeze execution properly. */ + unused = inl(acpi_gbl_FADT.xpm_timer_block.address); + } +} + +/** + * acpi_idle_enter_c1 - enters an ACPI C1 state-type + * @dev: the target CPU + * @state: the state data + * + * This is equivalent to the HALT instruction. + */ +static int acpi_idle_enter_c1(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + struct acpi_processor *pr; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state); + pr = processors[smp_processor_id()]; + + if (unlikely(!pr)) + return 0; + + if (pr->flags.bm_check) + acpi_idle_update_bm_rld(pr, cx); + + current_thread_info()->status &= ~TS_POLLING; /* - * We are about to unload the current idle thread pm callback - * (pm_idle), Wait for all processors to update cached/local - * copies of pm_idle before proceeding. - */ - cpu_idle_wait(); -#ifdef CONFIG_SMP - unregister_latency_notifier(&acpi_processor_latency_notifier); + * TS_POLLING-cleared state must be visible before we test + * NEED_RESCHED: + */ + smp_mb(); + if (!need_resched()) + safe_halt(); + current_thread_info()->status |= TS_POLLING; + + cx->usage++; + + return 0; +} + +/** + * acpi_idle_enter_c2 - enters an ACPI C2 state-type + * @dev: the target CPU + * @state: the state data + */ +static int acpi_idle_enter_c2(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + struct acpi_processor *pr; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state); + u32 t1, t2; + pr = processors[smp_processor_id()]; + + if (unlikely(!pr)) + return 0; + + if (pr->flags.bm_check) + acpi_idle_update_bm_rld(pr, cx); + + local_irq_disable(); + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we test + * NEED_RESCHED: + */ + smp_mb(); + + if (unlikely(need_resched())) { + current_thread_info()->status |= TS_POLLING; + local_irq_enable(); + return 0; + } + + t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); + acpi_state_timer_broadcast(pr, cx, 1); + acpi_idle_do_entry(cx); + t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); + +#ifdef CONFIG_GENERIC_TIME + /* TSC halts in C2, so notify users */ + mark_tsc_unstable("possible TSC halt in C2"); #endif + + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; + + cx->usage++; + + acpi_state_timer_broadcast(pr, cx, 0); + return ticks_elapsed(t1, t2); +} + +static int c3_cpu_count; +static DEFINE_SPINLOCK(c3_lock); + +/** + * acpi_idle_enter_c3 - enters an ACPI C3 state-type + * @dev: the target CPU + * @state: the state data + * + * Similar to C2 entry, except special bus master handling is needed. + */ +static int acpi_idle_enter_c3(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + struct acpi_processor *pr; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state); + u32 t1, t2; + pr = processors[smp_processor_id()]; + + if (unlikely(!pr)) + return 0; + + if (pr->flags.bm_check) + acpi_idle_update_bm_rld(pr, cx); + + local_irq_disable(); + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we test + * NEED_RESCHED: + */ + smp_mb(); + + if (unlikely(need_resched())) { + current_thread_info()->status |= TS_POLLING; + local_irq_enable(); + return 0; + } + + /* disable bus master */ + if (pr->flags.bm_check) { + spin_lock(&c3_lock); + c3_cpu_count++; + if (c3_cpu_count == num_online_cpus()) { + /* + * All CPUs are trying to go to C3 + * Disable bus master arbitration + */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); + } + spin_unlock(&c3_lock); + } else { + /* SMP with no shared cache... Invalidate cache */ + ACPI_FLUSH_CPU_CACHE(); + } + + /* Get start time (ticks) */ + t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); + acpi_state_timer_broadcast(pr, cx, 1); + acpi_idle_do_entry(cx); + t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); + + if (pr->flags.bm_check) { + spin_lock(&c3_lock); + /* Enable bus master arbitration */ + if (c3_cpu_count == num_online_cpus()) + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); + c3_cpu_count--; + spin_unlock(&c3_lock); } +#ifdef CONFIG_GENERIC_TIME + /* TSC halts in C3, so notify users */ + mark_tsc_unstable("TSC halts in C3"); +#endif + + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; + + cx->usage++; + + acpi_state_timer_broadcast(pr, cx, 0); + return ticks_elapsed(t1, t2); +} + +/** + * acpi_idle_bm_check - checks if bus master activity was detected + */ +static int acpi_idle_bm_check(void) +{ + u32 bm_status = 0; + + acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); + if (bm_status) + acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); + /* + * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect + * the true state of bus mastering activity; forcing us to + * manually check the BMIDEA bit of each IDE channel. + */ + else if (errata.piix4.bmisx) { + if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) + || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) + bm_status = 1; + } + return bm_status; +} + +/** + * acpi_idle_init - attaches the driver to a CPU + * @dev: the CPU + */ +static int acpi_idle_init(struct cpuidle_device *dev) +{ + int cpu = dev->cpu; + int i, count = 0; + struct acpi_processor_cx *cx; + struct cpuidle_state *state; + + struct acpi_processor *pr = processors[cpu]; + + if (!pr->flags.power_setup_done) + return -EINVAL; + + if (pr->flags.power == 0) { + return -EINVAL; + } + + for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { + cx = &pr->power.states[i]; + state = &dev->states[count]; + + if (!cx->valid) + continue; + +#ifdef CONFIG_HOTPLUG_CPU + if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && + !pr->flags.has_cst && + !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) + continue; +#endif + cpuidle_set_statedata(state, cx); + + state->exit_latency = cx->latency; + state->target_residency = cx->latency * 6; + state->power_usage = cx->power; + + state->flags = 0; + switch (cx->type) { + case ACPI_STATE_C1: + state->flags |= CPUIDLE_FLAG_SHALLOW; + state->enter = acpi_idle_enter_c1; + break; + + case ACPI_STATE_C2: + state->flags |= CPUIDLE_FLAG_BALANCED; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = acpi_idle_enter_c2; + break; + + case ACPI_STATE_C3: + state->flags |= CPUIDLE_FLAG_DEEP; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->flags |= CPUIDLE_FLAG_CHECK_BM; + state->enter = acpi_idle_enter_c3; + break; + } + + count++; + } + + if (!count) + return -EINVAL; + + dev->state_count = count; return 0; } + +struct cpuidle_driver acpi_idle_driver = { + .name = "acpi_idle", + .init = acpi_idle_init, + .redetect = acpi_idle_init, + .bm_check = acpi_idle_bm_check, + .owner = THIS_MODULE, +}; diff -Nurb linux-2.6.22-570/drivers/acpi/processor_throttling.c linux-2.6.22-591/drivers/acpi/processor_throttling.c --- linux-2.6.22-570/drivers/acpi/processor_throttling.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/processor_throttling.c 2007-12-21 15:36:11.000000000 -0500 @@ -44,17 +44,231 @@ #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_throttling"); +static int acpi_processor_get_throttling(struct acpi_processor *pr); +int acpi_processor_set_throttling(struct acpi_processor *pr, int state); + +static int acpi_processor_get_platform_limit(struct acpi_processor *pr) +{ + acpi_status status = 0; + unsigned long tpc = 0; + + if (!pr) + return -EINVAL; + status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc); + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC")); + return -ENODEV; + } + pr->throttling_platform_limit = (int)tpc; + return 0; +} + +int acpi_processor_tstate_has_changed(struct acpi_processor *pr) +{ + return acpi_processor_get_platform_limit(pr); +} + +/* -------------------------------------------------------------------------- + _PTC, _TSS, _TSD support + -------------------------------------------------------------------------- */ +static int acpi_processor_get_throttling_control(struct acpi_processor *pr) +{ + int result = 0; + acpi_status status = 0; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *ptc = NULL; + union acpi_object obj = { 0 }; + + status = acpi_evaluate_object(pr->handle, "_PTC", NULL, &buffer); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTC")); + return -ENODEV; + } + + ptc = (union acpi_object *)buffer.pointer; + if (!ptc || (ptc->type != ACPI_TYPE_PACKAGE) + || (ptc->package.count != 2)) { + printk(KERN_ERR PREFIX "Invalid _PTC data\n"); + result = -EFAULT; + goto end; + } + + /* + * control_register + */ + + obj = ptc->package.elements[0]; + + if ((obj.type != ACPI_TYPE_BUFFER) + || (obj.buffer.length < sizeof(struct acpi_ptc_register)) + || (obj.buffer.pointer == NULL)) { + printk(KERN_ERR PREFIX + "Invalid _PTC data (control_register)\n"); + result = -EFAULT; + goto end; + } + memcpy(&pr->throttling.control_register, obj.buffer.pointer, + sizeof(struct acpi_ptc_register)); + + /* + * status_register + */ + + obj = ptc->package.elements[1]; + + if ((obj.type != ACPI_TYPE_BUFFER) + || (obj.buffer.length < sizeof(struct acpi_ptc_register)) + || (obj.buffer.pointer == NULL)) { + printk(KERN_ERR PREFIX "Invalid _PTC data (status_register)\n"); + result = -EFAULT; + goto end; + } + + memcpy(&pr->throttling.status_register, obj.buffer.pointer, + sizeof(struct acpi_ptc_register)); + + end: + kfree(buffer.pointer); + + return result; +} +static int acpi_processor_get_throttling_states(struct acpi_processor *pr) +{ + int result = 0; + acpi_status status = AE_OK; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" }; + struct acpi_buffer state = { 0, NULL }; + union acpi_object *tss = NULL; + int i; + + status = acpi_evaluate_object(pr->handle, "_TSS", NULL, &buffer); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSS")); + return -ENODEV; + } + + tss = buffer.pointer; + if (!tss || (tss->type != ACPI_TYPE_PACKAGE)) { + printk(KERN_ERR PREFIX "Invalid _TSS data\n"); + result = -EFAULT; + goto end; + } + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d throttling states\n", + tss->package.count)); + + pr->throttling.state_count = tss->package.count; + pr->throttling.states_tss = + kmalloc(sizeof(struct acpi_processor_tx_tss) * tss->package.count, + GFP_KERNEL); + if (!pr->throttling.states_tss) { + result = -ENOMEM; + goto end; + } + + for (i = 0; i < pr->throttling.state_count; i++) { + + struct acpi_processor_tx_tss *tx = + (struct acpi_processor_tx_tss *)&(pr->throttling. + states_tss[i]); + + state.length = sizeof(struct acpi_processor_tx_tss); + state.pointer = tx; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Extracting state %d\n", i)); + + status = acpi_extract_package(&(tss->package.elements[i]), + &format, &state); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Invalid _TSS data")); + result = -EFAULT; + kfree(pr->throttling.states_tss); + goto end; + } + + if (!tx->freqpercentage) { + printk(KERN_ERR PREFIX + "Invalid _TSS data: freq is zero\n"); + result = -EFAULT; + kfree(pr->throttling.states_tss); + goto end; + } + } + + end: + kfree(buffer.pointer); + + return result; +} +static int acpi_processor_get_tsd(struct acpi_processor *pr) +{ + int result = 0; + acpi_status status = AE_OK; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" }; + struct acpi_buffer state = { 0, NULL }; + union acpi_object *tsd = NULL; + struct acpi_tsd_package *pdomain; + + status = acpi_evaluate_object(pr->handle, "_TSD", NULL, &buffer); + if (ACPI_FAILURE(status)) { + return -ENODEV; + } + + tsd = buffer.pointer; + if (!tsd || (tsd->type != ACPI_TYPE_PACKAGE)) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); + result = -EFAULT; + goto end; + } + + if (tsd->package.count != 1) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); + result = -EFAULT; + goto end; + } + + pdomain = &(pr->throttling.domain_info); + + state.length = sizeof(struct acpi_tsd_package); + state.pointer = pdomain; + + status = acpi_extract_package(&(tsd->package.elements[0]), + &format, &state); + if (ACPI_FAILURE(status)) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); + result = -EFAULT; + goto end; + } + + if (pdomain->num_entries != ACPI_TSD_REV0_ENTRIES) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:num_entries\n")); + result = -EFAULT; + goto end; + } + + if (pdomain->revision != ACPI_TSD_REV0_REVISION) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:revision\n")); + result = -EFAULT; + goto end; + } + + end: + kfree(buffer.pointer); + return result; +} + /* -------------------------------------------------------------------------- Throttling Control -------------------------------------------------------------------------- */ -static int acpi_processor_get_throttling(struct acpi_processor *pr) +static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr) { int state = 0; u32 value = 0; u32 duty_mask = 0; u32 duty_value = 0; - if (!pr) return -EINVAL; @@ -94,13 +308,114 @@ return 0; } -int acpi_processor_set_throttling(struct acpi_processor *pr, int state) +static int acpi_read_throttling_status(struct acpi_processor_throttling + *throttling) +{ + int value = -1; + switch (throttling->status_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + acpi_os_read_port((acpi_io_address) throttling->status_register. + address, &value, + (u32) throttling->status_register.bit_width * + 8); + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + printk(KERN_ERR PREFIX + "HARDWARE addr space,NOT supported yet\n"); + break; + default: + printk(KERN_ERR PREFIX "Unknown addr space %d\n", + (u32) (throttling->status_register.space_id)); + } + return value; +} + +static int acpi_write_throttling_state(struct acpi_processor_throttling + *throttling, int value) +{ + int ret = -1; + + switch (throttling->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + acpi_os_write_port((acpi_io_address) throttling-> + control_register.address, value, + (u32) throttling->control_register. + bit_width * 8); + ret = 0; + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + printk(KERN_ERR PREFIX + "HARDWARE addr space,NOT supported yet\n"); + break; + default: + printk(KERN_ERR PREFIX "Unknown addr space %d\n", + (u32) (throttling->control_register.space_id)); + } + return ret; +} + +static int acpi_get_throttling_state(struct acpi_processor *pr, int value) +{ + int i; + + for (i = 0; i < pr->throttling.state_count; i++) { + struct acpi_processor_tx_tss *tx = + (struct acpi_processor_tx_tss *)&(pr->throttling. + states_tss[i]); + if (tx->control == value) + break; + } + if (i > pr->throttling.state_count) + i = -1; + return i; +} + +static int acpi_get_throttling_value(struct acpi_processor *pr, int state) +{ + int value = -1; + if (state >= 0 && state <= pr->throttling.state_count) { + struct acpi_processor_tx_tss *tx = + (struct acpi_processor_tx_tss *)&(pr->throttling. + states_tss[state]); + value = tx->control; + } + return value; +} + +static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) +{ + int state = 0; + u32 value = 0; + + if (!pr) + return -EINVAL; + + if (!pr->flags.throttling) + return -ENODEV; + + pr->throttling.state = 0; + local_irq_disable(); + value = acpi_read_throttling_status(&pr->throttling); + if (value >= 0) { + state = acpi_get_throttling_state(pr, value); + pr->throttling.state = state; + } + local_irq_enable(); + + return 0; +} + +static int acpi_processor_get_throttling(struct acpi_processor *pr) +{ + return pr->throttling.acpi_processor_get_throttling(pr); +} + +int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, int state) { u32 value = 0; u32 duty_mask = 0; u32 duty_value = 0; - if (!pr) return -EINVAL; @@ -113,6 +428,8 @@ if (state == pr->throttling.state) return 0; + if (state < pr->throttling_platform_limit) + return -EPERM; /* * Calculate the duty_value and duty_mask. */ @@ -165,12 +482,50 @@ return 0; } +int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int state) +{ + u32 value = 0; + + if (!pr) + return -EINVAL; + + if ((state < 0) || (state > (pr->throttling.state_count - 1))) + return -EINVAL; + + if (!pr->flags.throttling) + return -ENODEV; + + if (state == pr->throttling.state) + return 0; + + if (state < pr->throttling_platform_limit) + return -EPERM; + + local_irq_disable(); + + value = acpi_get_throttling_value(pr, state); + if (value >= 0) { + acpi_write_throttling_state(&pr->throttling, value); + pr->throttling.state = state; + } + local_irq_enable(); + + return 0; +} + +int acpi_processor_set_throttling(struct acpi_processor *pr, int state) +{ + return pr->throttling.acpi_processor_set_throttling(pr, state); +} + int acpi_processor_get_throttling_info(struct acpi_processor *pr) { int result = 0; int step = 0; int i = 0; - + int no_ptc = 0; + int no_tss = 0; + int no_tsd = 0; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "pblk_address[0x%08x] duty_offset[%d] duty_width[%d]\n", @@ -182,6 +537,21 @@ return -EINVAL; /* TBD: Support ACPI 2.0 objects */ + no_ptc = acpi_processor_get_throttling_control(pr); + no_tss = acpi_processor_get_throttling_states(pr); + no_tsd = acpi_processor_get_tsd(pr); + + if (no_ptc || no_tss) { + pr->throttling.acpi_processor_get_throttling = + &acpi_processor_get_throttling_fadt; + pr->throttling.acpi_processor_set_throttling = + &acpi_processor_set_throttling_fadt; + } else { + pr->throttling.acpi_processor_get_throttling = + &acpi_processor_get_throttling_ptc; + pr->throttling.acpi_processor_set_throttling = + &acpi_processor_set_throttling_ptc; + } if (!pr->throttling.address) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No throttling register\n")); @@ -262,7 +632,6 @@ int i = 0; int result = 0; - if (!pr) goto end; @@ -280,15 +649,27 @@ } seq_printf(seq, "state count: %d\n" - "active state: T%d\n", - pr->throttling.state_count, pr->throttling.state); + "active state: T%d\n" + "state available: T%d to T%d\n", + pr->throttling.state_count, pr->throttling.state, + pr->throttling_platform_limit, + pr->throttling.state_count - 1); seq_puts(seq, "states:\n"); + if (pr->throttling.acpi_processor_get_throttling == + acpi_processor_get_throttling_fadt) { for (i = 0; i < pr->throttling.state_count; i++) seq_printf(seq, " %cT%d: %02d%%\n", (i == pr->throttling.state ? '*' : ' '), i, (pr->throttling.states[i].performance ? pr-> throttling.states[i].performance / 10 : 0)); + } else { + for (i = 0; i < pr->throttling.state_count; i++) + seq_printf(seq, " %cT%d: %02d%%\n", + (i == pr->throttling.state ? '*' : ' '), i, + (int)pr->throttling.states_tss[i]. + freqpercentage); + } end: return 0; @@ -301,7 +682,7 @@ PDE(inode)->data); } -static ssize_t acpi_processor_write_throttling(struct file * file, +static ssize_t acpi_processor_write_throttling(struct file *file, const char __user * buffer, size_t count, loff_t * data) { @@ -310,7 +691,6 @@ struct acpi_processor *pr = m->private; char state_string[12] = { '\0' }; - if (!pr || (count > sizeof(state_string) - 1)) return -EINVAL; diff -Nurb linux-2.6.22-570/drivers/acpi/sbs.c linux-2.6.22-591/drivers/acpi/sbs.c --- linux-2.6.22-570/drivers/acpi/sbs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/sbs.c 2007-12-21 15:36:11.000000000 -0500 @@ -127,7 +127,7 @@ static struct acpi_driver acpi_sbs_driver = { .name = "sbs", .class = ACPI_SBS_CLASS, - .ids = ACPI_SBS_HID, + .ids = "ACPI0001,ACPI0005", .ops = { .add = acpi_sbs_add, .remove = acpi_sbs_remove, @@ -176,10 +176,8 @@ }; struct acpi_sbs { - acpi_handle handle; int base; struct acpi_device *device; - struct acpi_ec_smbus *smbus; struct mutex mutex; int sbsm_present; int sbsm_batteries_supported; @@ -511,7 +509,7 @@ "acpi_sbs_read_word() failed")); goto end; } - + sbs->sbsm_present = 1; sbs->sbsm_batteries_supported = battery_system_info & 0x000f; end: @@ -1630,13 +1628,12 @@ { struct acpi_sbs *sbs = NULL; int result = 0, remove_result = 0; - unsigned long sbs_obj; int id; acpi_status status = AE_OK; unsigned long val; status = - acpi_evaluate_integer(device->parent->handle, "_EC", NULL, &val); + acpi_evaluate_integer(device->handle, "_EC", NULL, &val); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Error obtaining _EC")); return -EIO; @@ -1653,7 +1650,7 @@ sbs_mutex_lock(sbs); - sbs->base = (val & 0xff00ull) >> 8; + sbs->base = 0xff & (val >> 8); sbs->device = device; strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME); @@ -1665,24 +1662,10 @@ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "acpi_ac_add() failed")); goto end; } - status = acpi_evaluate_integer(device->handle, "_SBS", NULL, &sbs_obj); - if (status) { - ACPI_EXCEPTION((AE_INFO, status, - "acpi_evaluate_integer() failed")); - result = -EIO; - goto end; - } - if (sbs_obj > 0) { - result = acpi_sbsm_get_info(sbs); - if (result) { - ACPI_EXCEPTION((AE_INFO, AE_ERROR, - "acpi_sbsm_get_info() failed")); - goto end; - } - sbs->sbsm_present = 1; - } - if (sbs->sbsm_present == 0) { + acpi_sbsm_get_info(sbs); + + if (!sbs->sbsm_present) { result = acpi_battery_add(sbs, 0); if (result) { ACPI_EXCEPTION((AE_INFO, AE_ERROR, @@ -1702,8 +1685,6 @@ } } - sbs->handle = device->handle; - init_timer(&sbs->update_timer); result = acpi_check_update_proc(sbs); if (result) diff -Nurb linux-2.6.22-570/drivers/acpi/system.c linux-2.6.22-591/drivers/acpi/system.c --- linux-2.6.22-570/drivers/acpi/system.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/system.c 2007-12-21 15:36:11.000000000 -0500 @@ -39,15 +39,12 @@ #define ACPI_SYSTEM_CLASS "system" #define ACPI_SYSTEM_DEVICE_NAME "System" -#define ACPI_SYSTEM_FILE_INFO "info" -#define ACPI_SYSTEM_FILE_EVENT "event" -#define ACPI_SYSTEM_FILE_DSDT "dsdt" -#define ACPI_SYSTEM_FILE_FADT "fadt" /* * Make ACPICA version work as module param */ -static int param_get_acpica_version(char *buffer, struct kernel_param *kp) { +static int param_get_acpica_version(char *buffer, struct kernel_param *kp) +{ int result; result = sprintf(buffer, "%x", ACPI_CA_VERSION); @@ -58,9 +55,126 @@ module_param_call(acpica_version, NULL, param_get_acpica_version, NULL, 0444); /* -------------------------------------------------------------------------- + FS Interface (/sys) + -------------------------------------------------------------------------- */ +static LIST_HEAD(acpi_table_attr_list); +static struct kobject tables_kobj; + +struct acpi_table_attr { + struct bin_attribute attr; + char name[8]; + int instance; + struct list_head node; +}; + +static ssize_t acpi_table_show(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t offset, size_t count) +{ + struct acpi_table_attr *table_attr = + container_of(bin_attr, struct acpi_table_attr, attr); + struct acpi_table_header *table_header = NULL; + acpi_status status; + ssize_t ret_count = count; + + status = + acpi_get_table(table_attr->name, table_attr->instance, + &table_header); + if (ACPI_FAILURE(status)) + return -ENODEV; + + if (offset >= table_header->length) { + ret_count = 0; + goto end; + } + + if (offset + ret_count > table_header->length) + ret_count = table_header->length - offset; + + memcpy(buf, ((char *)table_header) + offset, ret_count); + + end: + return ret_count; +} + +static void acpi_table_attr_init(struct acpi_table_attr *table_attr, + struct acpi_table_header *table_header) +{ + struct acpi_table_header *header = NULL; + struct acpi_table_attr *attr = NULL; + + memcpy(table_attr->name, table_header->signature, ACPI_NAME_SIZE); + + list_for_each_entry(attr, &acpi_table_attr_list, node) { + if (!memcmp(table_header->signature, attr->name, + ACPI_NAME_SIZE)) + if (table_attr->instance < attr->instance) + table_attr->instance = attr->instance; + } + table_attr->instance++; + + if (table_attr->instance > 1 || (table_attr->instance == 1 && + !acpi_get_table(table_header-> + signature, 2, + &header))) + sprintf(table_attr->name + 4, "%d", table_attr->instance); + + table_attr->attr.size = 0; + table_attr->attr.read = acpi_table_show; + table_attr->attr.attr.name = table_attr->name; + table_attr->attr.attr.mode = 0444; + table_attr->attr.attr.owner = THIS_MODULE; + + return; +} + +static int acpi_system_sysfs_init(void) +{ + struct acpi_table_attr *table_attr; + struct acpi_table_header *table_header = NULL; + int table_index = 0; + int result; + + tables_kobj.parent = &acpi_subsys.kobj; + kobject_set_name(&tables_kobj, "tables"); + result = kobject_register(&tables_kobj); + if (result) + return result; + + do { + result = acpi_get_table_by_index(table_index, &table_header); + if (!result) { + table_index++; + table_attr = NULL; + table_attr = + kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL); + if (!table_attr) + return -ENOMEM; + + acpi_table_attr_init(table_attr, table_header); + result = + sysfs_create_bin_file(&tables_kobj, + &table_attr->attr); + if (result) { + kfree(table_attr); + return result; + } else + list_add_tail(&table_attr->node, + &acpi_table_attr_list); + } + } while (!result); + + return 0; +} + +/* -------------------------------------------------------------------------- FS Interface (/proc) -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI_PROCFS +#define ACPI_SYSTEM_FILE_INFO "info" +#define ACPI_SYSTEM_FILE_EVENT "event" +#define ACPI_SYSTEM_FILE_DSDT "dsdt" +#define ACPI_SYSTEM_FILE_FADT "fadt" static int acpi_system_read_info(struct seq_file *seq, void *offset) { @@ -80,7 +194,6 @@ .llseek = seq_lseek, .release = single_release, }; -#endif static ssize_t acpi_system_read_dsdt(struct file *, char __user *, size_t, loff_t *); @@ -97,13 +210,11 @@ struct acpi_table_header *dsdt = NULL; ssize_t res; - status = acpi_get_table(ACPI_SIG_DSDT, 1, &dsdt); if (ACPI_FAILURE(status)) return -ENODEV; - res = simple_read_from_buffer(buffer, count, ppos, - dsdt, dsdt->length); + res = simple_read_from_buffer(buffer, count, ppos, dsdt, dsdt->length); return res; } @@ -123,28 +234,21 @@ struct acpi_table_header *fadt = NULL; ssize_t res; - status = acpi_get_table(ACPI_SIG_FADT, 1, &fadt); if (ACPI_FAILURE(status)) return -ENODEV; - res = simple_read_from_buffer(buffer, count, ppos, - fadt, fadt->length); + res = simple_read_from_buffer(buffer, count, ppos, fadt, fadt->length); return res; } -static int __init acpi_system_init(void) +static int acpi_system_procfs_init(void) { struct proc_dir_entry *entry; int error = 0; char *name; - - if (acpi_disabled) - return 0; - -#ifdef CONFIG_ACPI_PROCFS /* 'info' [R] */ name = ACPI_SYSTEM_FILE_INFO; entry = create_proc_entry(name, S_IRUGO, acpi_root_dir); @@ -153,7 +257,6 @@ else { entry->proc_fops = &acpi_system_info_ops; } -#endif /* 'dsdt' [R] */ name = ACPI_SYSTEM_FILE_DSDT; @@ -177,12 +280,32 @@ Error: remove_proc_entry(ACPI_SYSTEM_FILE_FADT, acpi_root_dir); remove_proc_entry(ACPI_SYSTEM_FILE_DSDT, acpi_root_dir); -#ifdef CONFIG_ACPI_PROCFS remove_proc_entry(ACPI_SYSTEM_FILE_INFO, acpi_root_dir); -#endif error = -EFAULT; goto Done; } +#else +static int acpi_system_procfs_init(void) +{ + return 0; +} +#endif + +static int __init acpi_system_init(void) +{ + int result = 0; + + if (acpi_disabled) + return 0; + + result = acpi_system_procfs_init(); + if (result) + return result; + + result = acpi_system_sysfs_init(); + + return result; +} subsys_initcall(acpi_system_init); diff -Nurb linux-2.6.22-570/drivers/acpi/thermal.c linux-2.6.22-591/drivers/acpi/thermal.c --- linux-2.6.22-570/drivers/acpi/thermal.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/thermal.c 2007-12-21 15:36:11.000000000 -0500 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,6 @@ #define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 #define ACPI_THERMAL_NOTIFY_HOT 0xF1 #define ACPI_THERMAL_MODE_ACTIVE 0x00 -#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff" #define ACPI_THERMAL_MAX_ACTIVE 10 #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65 @@ -419,26 +419,6 @@ return 0; } -static int acpi_thermal_call_usermode(char *path) -{ - char *argv[2] = { NULL, NULL }; - char *envp[3] = { NULL, NULL, NULL }; - - - if (!path) - return -EINVAL; - - argv[0] = path; - - /* minimal command environment */ - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - - call_usermodehelper(argv[0], argv, envp, 0); - - return 0; -} - static int acpi_thermal_critical(struct acpi_thermal *tz) { if (!tz || !tz->trips.critical.flags.valid) @@ -456,7 +436,7 @@ acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL, tz->trips.critical.flags.enabled); - acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF); + orderly_poweroff(true); return 0; } @@ -1114,7 +1094,6 @@ break; case ACPI_THERMAL_NOTIFY_THRESHOLDS: acpi_thermal_get_trip_points(tz); - acpi_thermal_check(tz); acpi_bus_generate_event(device, event, 0); break; case ACPI_THERMAL_NOTIFY_DEVICES: diff -Nurb linux-2.6.22-570/drivers/acpi/utilities/uteval.c linux-2.6.22-591/drivers/acpi/utilities/uteval.c --- linux-2.6.22-570/drivers/acpi/utilities/uteval.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/utilities/uteval.c 2007-12-21 15:36:11.000000000 -0500 @@ -62,16 +62,13 @@ static char *acpi_interfaces_supported[] = { /* Operating System Vendor Strings */ - "Windows 2000", - "Windows 2001", - "Windows 2001 SP0", - "Windows 2001 SP1", - "Windows 2001 SP2", - "Windows 2001 SP3", - "Windows 2001 SP4", - "Windows 2001.1", - "Windows 2001.1 SP1", /* Added 03/2006 */ - "Windows 2006", /* Added 03/2006 */ + "Windows 2000", /* Windows 2000 */ + "Windows 2001", /* Windows XP */ + "Windows 2001 SP1", /* Windows XP SP1 */ + "Windows 2001 SP2", /* Windows XP SP2 */ + "Windows 2001.1", /* Windows Server 2003 */ + "Windows 2001.1 SP1", /* Windows Server 2003 SP1 - Added 03/2006 */ + "Windows 2006", /* Windows Vista - Added 03/2006 */ /* Feature Group Strings */ diff -Nurb linux-2.6.22-570/drivers/acpi/video.c linux-2.6.22-591/drivers/acpi/video.c --- linux-2.6.22-570/drivers/acpi/video.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/acpi/video.c 2007-12-21 15:36:14.000000000 -0500 @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -169,6 +170,7 @@ struct acpi_device *dev; struct acpi_video_device_brightness *brightness; struct backlight_device *backlight; + struct output_device *output_dev; }; /* bus */ @@ -272,6 +274,10 @@ u32 level_current, u32 event); static void acpi_video_switch_brightness(struct acpi_video_device *device, int event); +static int acpi_video_device_get_state(struct acpi_video_device *device, + unsigned long *state); +static int acpi_video_output_get(struct output_device *od); +static int acpi_video_device_set_state(struct acpi_video_device *device, int state); /*backlight device sysfs support*/ static int acpi_video_get_brightness(struct backlight_device *bd) @@ -297,6 +303,28 @@ .update_status = acpi_video_set_brightness, }; +/*video output device sysfs support*/ +static int acpi_video_output_get(struct output_device *od) +{ + unsigned long state; + struct acpi_video_device *vd = + (struct acpi_video_device *)class_get_devdata(&od->class_dev); + acpi_video_device_get_state(vd, &state); + return (int)state; +} + +static int acpi_video_output_set(struct output_device *od) +{ + unsigned long state = od->request_state; + struct acpi_video_device *vd= + (struct acpi_video_device *)class_get_devdata(&od->class_dev); + return acpi_video_device_set_state(vd, state); +} + +static struct output_properties acpi_output_properties = { + .set_state = acpi_video_output_set, + .get_status = acpi_video_output_get, +}; /* -------------------------------------------------------------------------- Video Management -------------------------------------------------------------------------- */ @@ -531,7 +559,6 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) { - acpi_integer status; acpi_handle h_dummy1; int i; u32 max_level = 0; @@ -565,9 +592,9 @@ device->cap._DSS = 1; } - status = acpi_video_device_lcd_query_levels(device, &obj); + if (ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) { - if (obj && obj->type == ACPI_TYPE_PACKAGE && obj->package.count >= 2) { + if (obj->package.count >= 2) { int count = 0; union acpi_object *o; @@ -588,6 +615,7 @@ continue; } br->levels[count] = (u32) o->integer.value; + if (br->levels[count] > max_level) max_level = br->levels[count]; count++; @@ -606,9 +634,13 @@ } } + } else { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available LCD brightness level\n")); + } + kfree(obj); - if (device->cap._BCL && device->cap._BCM && device->cap._BQC){ + if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){ unsigned long tmp; static int count = 0; char *name; @@ -626,6 +658,17 @@ kfree(name); } + if (device->cap._DCS && device->cap._DSS){ + static int count = 0; + char *name; + name = kzalloc(MAX_NAME_LEN, GFP_KERNEL); + if (!name) + return; + sprintf(name, "acpi_video%d", count++); + device->output_dev = video_output_register(name, + NULL, device, &acpi_output_properties); + kfree(name); + } return; } @@ -1669,6 +1712,7 @@ ACPI_DEVICE_NOTIFY, acpi_video_device_notify); backlight_device_unregister(device->backlight); + video_output_unregister(device->output_dev); return 0; } diff -Nurb linux-2.6.22-570/drivers/atm/idt77252.c linux-2.6.22-591/drivers/atm/idt77252.c --- linux-2.6.22-570/drivers/atm/idt77252.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/atm/idt77252.c 2007-12-21 15:36:14.000000000 -0500 @@ -3576,7 +3576,7 @@ * XXX: */ sprintf(tname, "eth%d", card->index); - tmp = dev_get_by_name(tname); /* jhs: was "tmp = dev_get(tname);" */ + tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */ if (tmp) { memcpy(card->atmdev->esi, tmp->dev_addr, 6); diff -Nurb linux-2.6.22-570/drivers/base/bus.c linux-2.6.22-591/drivers/base/bus.c --- linux-2.6.22-570/drivers/base/bus.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/base/bus.c 2007-12-21 15:36:11.000000000 -0500 @@ -562,7 +562,6 @@ bus->drivers_probe_attr.attr.name = "drivers_probe"; bus->drivers_probe_attr.attr.mode = S_IWUSR; - bus->drivers_probe_attr.attr.owner = bus->owner; bus->drivers_probe_attr.store = store_drivers_probe; retval = bus_create_file(bus, &bus->drivers_probe_attr); if (retval) @@ -570,7 +569,6 @@ bus->drivers_autoprobe_attr.attr.name = "drivers_autoprobe"; bus->drivers_autoprobe_attr.attr.mode = S_IWUSR | S_IRUGO; - bus->drivers_autoprobe_attr.attr.owner = bus->owner; bus->drivers_autoprobe_attr.show = show_drivers_autoprobe; bus->drivers_autoprobe_attr.store = store_drivers_autoprobe; retval = bus_create_file(bus, &bus->drivers_autoprobe_attr); diff -Nurb linux-2.6.22-570/drivers/base/class.c linux-2.6.22-591/drivers/base/class.c --- linux-2.6.22-570/drivers/base/class.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/base/class.c 2007-12-21 15:36:14.000000000 -0500 @@ -134,6 +134,17 @@ } } +static int class_setup_shadowing(struct class *cls) +{ + const struct shadow_dir_operations *shadow_ops; + + shadow_ops = cls->shadow_ops; + if (!shadow_ops) + return 0; + + return sysfs_enable_shadowing(&cls->subsys.kobj, shadow_ops); +} + int class_register(struct class * cls) { int error; @@ -152,11 +163,22 @@ subsys_set_kset(cls, class_subsys); error = subsystem_register(&cls->subsys); - if (!error) { - error = add_class_attrs(class_get(cls)); - class_put(cls); - } + if (error) + goto out; + + error = class_setup_shadowing(cls); + if (error) + goto out_unregister; + + error = add_class_attrs(cls); + if (error) + goto out_unregister; + +out: return error; +out_unregister: + subsystem_unregister(&cls->subsys); + goto out; } void class_unregister(struct class * cls) @@ -312,9 +334,6 @@ pr_debug("device class '%s': release.\n", cd->class_id); - kfree(cd->devt_attr); - cd->devt_attr = NULL; - if (cd->release) cd->release(cd); else if (cls->release) @@ -547,6 +566,9 @@ return print_dev_t(buf, class_dev->devt); } +static struct class_device_attribute class_devt_attr = + __ATTR(dev, S_IRUGO, show_dev, NULL); + static ssize_t store_uevent(struct class_device *class_dev, const char *buf, size_t count) { @@ -554,6 +576,9 @@ return count; } +static struct class_device_attribute class_uevent_attr = + __ATTR(uevent, S_IWUSR, NULL, store_uevent); + void class_device_initialize(struct class_device *class_dev) { kobj_set_kset_s(class_dev, class_obj_subsys); @@ -603,34 +628,17 @@ &parent_class->subsys.kobj, "subsystem"); if (error) goto out3; - class_dev->uevent_attr.attr.name = "uevent"; - class_dev->uevent_attr.attr.mode = S_IWUSR; - class_dev->uevent_attr.attr.owner = parent_class->owner; - class_dev->uevent_attr.store = store_uevent; - error = class_device_create_file(class_dev, &class_dev->uevent_attr); + + error = class_device_create_file(class_dev, &class_uevent_attr); if (error) goto out3; if (MAJOR(class_dev->devt)) { - struct class_device_attribute *attr; - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) { - error = -ENOMEM; - goto out4; - } - attr->attr.name = "dev"; - attr->attr.mode = S_IRUGO; - attr->attr.owner = parent_class->owner; - attr->show = show_dev; - error = class_device_create_file(class_dev, attr); - if (error) { - kfree(attr); + error = class_device_create_file(class_dev, &class_devt_attr); + if (error) goto out4; } - class_dev->devt_attr = attr; - } - error = class_device_add_attrs(class_dev); if (error) goto out5; @@ -671,10 +679,10 @@ out6: class_device_remove_attrs(class_dev); out5: - if (class_dev->devt_attr) - class_device_remove_file(class_dev, class_dev->devt_attr); + if (MAJOR(class_dev->devt)) + class_device_remove_file(class_dev, &class_devt_attr); out4: - class_device_remove_file(class_dev, &class_dev->uevent_attr); + class_device_remove_file(class_dev, &class_uevent_attr); out3: kobject_del(&class_dev->kobj); out2: @@ -774,9 +782,9 @@ sysfs_remove_link(&class_dev->kobj, "device"); } sysfs_remove_link(&class_dev->kobj, "subsystem"); - class_device_remove_file(class_dev, &class_dev->uevent_attr); - if (class_dev->devt_attr) - class_device_remove_file(class_dev, class_dev->devt_attr); + class_device_remove_file(class_dev, &class_uevent_attr); + if (MAJOR(class_dev->devt)) + class_device_remove_file(class_dev, &class_devt_attr); class_device_remove_attrs(class_dev); class_device_remove_groups(class_dev); diff -Nurb linux-2.6.22-570/drivers/base/core.c linux-2.6.22-591/drivers/base/core.c --- linux-2.6.22-570/drivers/base/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/base/core.c 2007-12-21 15:36:14.000000000 -0500 @@ -310,6 +310,9 @@ return count; } +static struct device_attribute uevent_attr = + __ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent); + static int device_add_attributes(struct device *dev, struct device_attribute *attrs) { @@ -423,6 +426,9 @@ return print_dev_t(buf, dev->devt); } +static struct device_attribute devt_attr = + __ATTR(dev, S_IRUGO, show_dev, NULL); + /* * devices_subsys - structure to be registered with kobject core. */ @@ -616,8 +622,14 @@ return kobj; /* or create a new class-directory at the parent device */ - return kobject_kset_add_dir(&dev->class->class_dirs, + kobj = kobject_kset_add_dir(&dev->class->class_dirs, parent_kobj, dev->class->name); + + /* If we created a new class-directory setup shadowing */ + if (kobj && dev->class->shadow_ops) + sysfs_enable_shadowing(kobj, dev->class->shadow_ops); + + return kobj; } if (parent) @@ -637,6 +649,82 @@ return 0; } +static int device_add_class_symlinks(struct device *dev) +{ + int error; + + if (!dev->class) + return 0; + error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, + "subsystem"); + if (error) + goto out; + /* + * If this is not a "fake" compatible device, then create the + * symlink from the class to the device. + */ + if (dev->kobj.parent != &dev->class->subsys.kobj) { + error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, + dev->bus_id); + if (error) + goto out_subsys; + } + /* only bus-device parents get a "device"-link */ + if (dev->parent && dev->parent->bus) { + error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, + "device"); + if (error) + goto out_busid; +#ifdef CONFIG_SYSFS_DEPRECATED + { + char * class_name = make_class_name(dev->class->name, + &dev->kobj); + if (class_name) + error = sysfs_create_link(&dev->parent->kobj, + &dev->kobj, class_name); + kfree(class_name); + if (error) + goto out_device; + } +#endif + } + return 0; + +#ifdef CONFIG_SYSFS_DEPRECATED +out_device: + if (dev->parent) + sysfs_remove_link(&dev->kobj, "device"); +#endif +out_busid: + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); +out_subsys: + sysfs_remove_link(&dev->kobj, "subsystem"); +out: + return error; +} + +static void device_remove_class_symlinks(struct device *dev) +{ + if (!dev->class) + return; + if (dev->parent) { +#ifdef CONFIG_SYSFS_DEPRECATED + char *class_name; + + class_name = make_class_name(dev->class->name, &dev->kobj); + if (class_name) { + sysfs_remove_link(&dev->parent->kobj, class_name); + kfree(class_name); + } +#endif + sysfs_remove_link(&dev->kobj, "device"); + } + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); + sysfs_remove_link(&dev->kobj, "subsystem"); +} + /** * device_add - add device to device hierarchy. * @dev: device. @@ -651,7 +739,6 @@ int device_add(struct device *dev) { struct device *parent = NULL; - char *class_name = NULL; struct class_interface *class_intf; int error = -EINVAL; @@ -681,58 +768,17 @@ blocking_notifier_call_chain(&dev->bus->bus_notifier, BUS_NOTIFY_ADD_DEVICE, dev); - dev->uevent_attr.attr.name = "uevent"; - dev->uevent_attr.attr.mode = S_IRUGO | S_IWUSR; - if (dev->driver) - dev->uevent_attr.attr.owner = dev->driver->owner; - dev->uevent_attr.store = store_uevent; - dev->uevent_attr.show = show_uevent; - error = device_create_file(dev, &dev->uevent_attr); + error = device_create_file(dev, &uevent_attr); if (error) goto attrError; if (MAJOR(dev->devt)) { - struct device_attribute *attr; - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) { - error = -ENOMEM; - goto ueventattrError; - } - attr->attr.name = "dev"; - attr->attr.mode = S_IRUGO; - if (dev->driver) - attr->attr.owner = dev->driver->owner; - attr->show = show_dev; - error = device_create_file(dev, attr); - if (error) { - kfree(attr); + error = device_create_file(dev, &devt_attr); + if (error) goto ueventattrError; } - - dev->devt_attr = attr; - } - - if (dev->class) { - sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, - "subsystem"); - /* If this is not a "fake" compatible device, then create the - * symlink from the class to the device. */ - if (dev->kobj.parent != &dev->class->subsys.kobj) - sysfs_create_link(&dev->class->subsys.kobj, - &dev->kobj, dev->bus_id); - if (parent) { - sysfs_create_link(&dev->kobj, &dev->parent->kobj, - "device"); -#ifdef CONFIG_SYSFS_DEPRECATED - class_name = make_class_name(dev->class->name, - &dev->kobj); - if (class_name) - sysfs_create_link(&dev->parent->kobj, - &dev->kobj, class_name); -#endif - } - } - + if ((error = device_add_class_symlinks(dev))) + goto SymlinkError; if ((error = device_add_attrs(dev))) goto AttrsError; if ((error = device_pm_add(dev))) @@ -756,7 +802,6 @@ up(&dev->class->sem); } Done: - kfree(class_name); put_device(dev); return error; BusError: @@ -767,10 +812,10 @@ BUS_NOTIFY_DEL_DEVICE, dev); device_remove_attrs(dev); AttrsError: - if (dev->devt_attr) { - device_remove_file(dev, dev->devt_attr); - kfree(dev->devt_attr); - } + device_remove_class_symlinks(dev); + SymlinkError: + if (MAJOR(dev->devt)) + device_remove_file(dev, &devt_attr); if (dev->class) { sysfs_remove_link(&dev->kobj, "subsystem"); @@ -792,7 +837,7 @@ } } ueventattrError: - device_remove_file(dev, &dev->uevent_attr); + device_remove_file(dev, &uevent_attr); attrError: kobject_uevent(&dev->kobj, KOBJ_REMOVE); kobject_del(&dev->kobj); @@ -869,17 +914,15 @@ if (parent) klist_del(&dev->knode_parent); - if (dev->devt_attr) { - device_remove_file(dev, dev->devt_attr); - kfree(dev->devt_attr); - } + if (MAJOR(dev->devt)) + device_remove_file(dev, &devt_attr); if (dev->class) { sysfs_remove_link(&dev->kobj, "subsystem"); /* If this is not a "fake" compatible device, remove the * symlink from the class to the device. */ if (dev->kobj.parent != &dev->class->subsys.kobj) - sysfs_remove_link(&dev->class->subsys.kobj, - dev->bus_id); + sysfs_delete_link(&dev->class->subsys.kobj, + &dev->kobj, dev->bus_id); if (parent) { #ifdef CONFIG_SYSFS_DEPRECATED char *class_name = make_class_name(dev->class->name, @@ -926,7 +969,7 @@ up(&dev->class->sem); } } - device_remove_file(dev, &dev->uevent_attr); + device_remove_file(dev, &uevent_attr); device_remove_attrs(dev); bus_remove_device(dev); @@ -1155,7 +1198,7 @@ { char *old_class_name = NULL; char *new_class_name = NULL; - char *old_symlink_name = NULL; + char *old_device_name = NULL; int error; dev = get_device(dev); @@ -1169,42 +1212,46 @@ old_class_name = make_class_name(dev->class->name, &dev->kobj); #endif - if (dev->class) { - old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); - if (!old_symlink_name) { + old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); + if (!old_device_name) { error = -ENOMEM; - goto out_free_old_class; - } - strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE); + goto out; } - + strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE); strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); + if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) { + error = sysfs_rename_link(&dev->class->subsys.kobj, + &dev->kobj, old_device_name, new_name); + if (error) + goto out; + } + error = kobject_rename(&dev->kobj, new_name); + if (error) { + strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE); + goto out; + } #ifdef CONFIG_SYSFS_DEPRECATED if (old_class_name) { + error = -ENOMEM; new_class_name = make_class_name(dev->class->name, &dev->kobj); - if (new_class_name) { - sysfs_create_link(&dev->parent->kobj, &dev->kobj, - new_class_name); - sysfs_remove_link(&dev->parent->kobj, old_class_name); - } - } -#endif + if (!new_class_name) + goto out; - if (dev->class) { - sysfs_remove_link(&dev->class->subsys.kobj, - old_symlink_name); - sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, - dev->bus_id); + error = sysfs_rename_link(&dev->parent->kobj, &dev->kobj, + old_class_name, new_class_name); + if (error) + goto out; } +#endif +out: put_device(dev); kfree(new_class_name); - kfree(old_symlink_name); - out_free_old_class: kfree(old_class_name); + kfree(old_device_name); return error; } diff -Nurb linux-2.6.22-570/drivers/base/dd.c linux-2.6.22-591/drivers/base/dd.c --- linux-2.6.22-570/drivers/base/dd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/base/dd.c 2007-12-23 01:39:35.000000000 -0500 @@ -296,9 +296,8 @@ { struct device_driver * drv; - drv = dev->driver; + drv = get_driver(dev->driver); if (drv) { - get_driver(drv); driver_sysfs_remove(dev); sysfs_remove_link(&dev->kobj, "driver"); klist_remove(&dev->knode_driver); diff -Nurb linux-2.6.22-570/drivers/base/dd.c.orig linux-2.6.22-591/drivers/base/dd.c.orig --- linux-2.6.22-570/drivers/base/dd.c.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/base/dd.c.orig 2007-12-22 21:18:39.000000000 -0500 @@ -0,0 +1,369 @@ +/* + * drivers/base/dd.c - The core device/driver interactions. + * + * This file contains the (sometimes tricky) code that controls the + * interactions between devices and drivers, which primarily includes + * driver binding and unbinding. + * + * All of this code used to exist in drivers/base/bus.c, but was + * relocated to here in the name of compartmentalization (since it wasn't + * strictly code just for the 'struct bus_type'. + * + * Copyright (c) 2002-5 Patrick Mochel + * Copyright (c) 2002-3 Open Source Development Labs + * + * This file is released under the GPLv2 + */ + +#include +#include +#include +#include + +#include "base.h" +#include "power/power.h" + +#define to_drv(node) container_of(node, struct device_driver, kobj.entry) + + +static void driver_bound(struct device *dev) +{ + if (klist_node_attached(&dev->knode_driver)) { + printk(KERN_WARNING "%s: device %s already bound\n", + __FUNCTION__, kobject_name(&dev->kobj)); + return; + } + + pr_debug("bound device '%s' to driver '%s'\n", + dev->bus_id, dev->driver->name); + + if (dev->bus) + blocking_notifier_call_chain(&dev->bus->bus_notifier, + BUS_NOTIFY_BOUND_DRIVER, dev); + + klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices); +} + +static int driver_sysfs_add(struct device *dev) +{ + int ret; + + ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, + kobject_name(&dev->kobj)); + if (ret == 0) { + ret = sysfs_create_link(&dev->kobj, &dev->driver->kobj, + "driver"); + if (ret) + sysfs_remove_link(&dev->driver->kobj, + kobject_name(&dev->kobj)); + } + return ret; +} + +static void driver_sysfs_remove(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv) { + sysfs_remove_link(&drv->kobj, kobject_name(&dev->kobj)); + sysfs_remove_link(&dev->kobj, "driver"); + } +} + +/** + * device_bind_driver - bind a driver to one device. + * @dev: device. + * + * Allow manual attachment of a driver to a device. + * Caller must have already set @dev->driver. + * + * Note that this does not modify the bus reference count + * nor take the bus's rwsem. Please verify those are accounted + * for before calling this. (It is ok to call with no other effort + * from a driver's probe() method.) + * + * This function must be called with @dev->sem held. + */ +int device_bind_driver(struct device *dev) +{ + int ret; + + ret = driver_sysfs_add(dev); + if (!ret) + driver_bound(dev); + return ret; +} + +static atomic_t probe_count = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); + +static int really_probe(struct device *dev, struct device_driver *drv) +{ + int ret = 0; + + atomic_inc(&probe_count); + pr_debug("%s: Probing driver %s with device %s\n", + drv->bus->name, drv->name, dev->bus_id); + WARN_ON(!list_empty(&dev->devres_head)); + + dev->driver = drv; + if (driver_sysfs_add(dev)) { + printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n", + __FUNCTION__, dev->bus_id); + goto probe_failed; + } + + if (dev->bus->probe) { + ret = dev->bus->probe(dev); + if (ret) + goto probe_failed; + } else if (drv->probe) { + ret = drv->probe(dev); + if (ret) + goto probe_failed; + } + + driver_bound(dev); + ret = 1; + pr_debug("%s: Bound Device %s to Driver %s\n", + drv->bus->name, dev->bus_id, drv->name); + goto done; + +probe_failed: + devres_release_all(dev); + driver_sysfs_remove(dev); + dev->driver = NULL; + + if (ret != -ENODEV && ret != -ENXIO) { + /* driver matched but the probe failed */ + printk(KERN_WARNING + "%s: probe of %s failed with error %d\n", + drv->name, dev->bus_id, ret); + } + /* + * Ignore errors returned by ->probe so that the next driver can try + * its luck. + */ + ret = 0; +done: + atomic_dec(&probe_count); + wake_up(&probe_waitqueue); + return ret; +} + +/** + * driver_probe_done + * Determine if the probe sequence is finished or not. + * + * Should somehow figure out how to use a semaphore, not an atomic variable... + */ +int driver_probe_done(void) +{ + pr_debug("%s: probe_count = %d\n", __FUNCTION__, + atomic_read(&probe_count)); + if (atomic_read(&probe_count)) + return -EBUSY; + return 0; +} + +/** + * driver_probe_device - attempt to bind device & driver together + * @drv: driver to bind a device to + * @dev: device to try to bind to the driver + * + * First, we call the bus's match function, if one present, which should + * compare the device IDs the driver supports with the device IDs of the + * device. Note we don't do this ourselves because we don't know the + * format of the ID structures, nor what is to be considered a match and + * what is not. + * + * This function returns 1 if a match is found, -ENODEV if the device is + * not registered, and 0 otherwise. + * + * This function must be called with @dev->sem held. When called for a + * USB interface, @dev->parent->sem must be held as well. + */ +int driver_probe_device(struct device_driver * drv, struct device * dev) +{ + int ret = 0; + + if (!device_is_registered(dev)) + return -ENODEV; + if (drv->bus->match && !drv->bus->match(dev, drv)) + goto done; + + pr_debug("%s: Matched Device %s with Driver %s\n", + drv->bus->name, dev->bus_id, drv->name); + + ret = really_probe(dev, drv); + +done: + return ret; +} + +static int __device_attach(struct device_driver * drv, void * data) +{ + struct device * dev = data; + return driver_probe_device(drv, dev); +} + +/** + * device_attach - try to attach device to a driver. + * @dev: device. + * + * Walk the list of drivers that the bus has and call + * driver_probe_device() for each pair. If a compatible + * pair is found, break out and return. + * + * Returns 1 if the device was bound to a driver; + * 0 if no matching device was found; + * -ENODEV if the device is not registered. + * + * When called for a USB interface, @dev->parent->sem must be held. + */ +int device_attach(struct device * dev) +{ + int ret = 0; + + down(&dev->sem); + if (dev->driver) { + ret = device_bind_driver(dev); + if (ret == 0) + ret = 1; + else { + dev->driver = NULL; + ret = 0; + } + } else { + ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); + } + up(&dev->sem); + return ret; +} + +static int __driver_attach(struct device * dev, void * data) +{ + struct device_driver * drv = data; + + /* + * Lock device and try to bind to it. We drop the error + * here and always return 0, because we need to keep trying + * to bind to devices and some drivers will return an error + * simply if it didn't support the device. + * + * driver_probe_device() will spit a warning if there + * is an error. + */ + + if (dev->parent) /* Needed for USB */ + down(&dev->parent->sem); + down(&dev->sem); + if (!dev->driver) + driver_probe_device(drv, dev); + up(&dev->sem); + if (dev->parent) + up(&dev->parent->sem); + + return 0; +} + +/** + * driver_attach - try to bind driver to devices. + * @drv: driver. + * + * Walk the list of devices that the bus has on it and try to + * match the driver with each one. If driver_probe_device() + * returns 0 and the @dev->driver is set, we've found a + * compatible pair. + */ +int driver_attach(struct device_driver * drv) +{ + return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); +} + +/** + * device_release_driver - manually detach device from driver. + * @dev: device. + * + * Manually detach device from driver. + * + * __device_release_driver() must be called with @dev->sem held. + * When called for a USB interface, @dev->parent->sem must be held + * as well. + */ + +static void __device_release_driver(struct device * dev) +{ + struct device_driver * drv; + + drv = dev->driver; + if (drv) { + get_driver(drv); + driver_sysfs_remove(dev); + sysfs_remove_link(&dev->kobj, "driver"); + klist_remove(&dev->knode_driver); + + if (dev->bus) + blocking_notifier_call_chain(&dev->bus->bus_notifier, + BUS_NOTIFY_UNBIND_DRIVER, + dev); + + if (dev->bus && dev->bus->remove) + dev->bus->remove(dev); + else if (drv->remove) + drv->remove(dev); + devres_release_all(dev); + dev->driver = NULL; + put_driver(drv); + } +} + +void device_release_driver(struct device * dev) +{ + /* + * If anyone calls device_release_driver() recursively from + * within their ->remove callback for the same device, they + * will deadlock right here. + */ + down(&dev->sem); + __device_release_driver(dev); + up(&dev->sem); +} + + +/** + * driver_detach - detach driver from all devices it controls. + * @drv: driver. + */ +void driver_detach(struct device_driver * drv) +{ + struct device * dev; + + for (;;) { + spin_lock(&drv->klist_devices.k_lock); + if (list_empty(&drv->klist_devices.k_list)) { + spin_unlock(&drv->klist_devices.k_lock); + break; + } + dev = list_entry(drv->klist_devices.k_list.prev, + struct device, knode_driver.n_node); + get_device(dev); + spin_unlock(&drv->klist_devices.k_lock); + + if (dev->parent) /* Needed for USB */ + down(&dev->parent->sem); + down(&dev->sem); + if (dev->driver == drv) + __device_release_driver(dev); + up(&dev->sem); + if (dev->parent) + up(&dev->parent->sem); + put_device(dev); + } +} + +EXPORT_SYMBOL_GPL(device_bind_driver); +EXPORT_SYMBOL_GPL(device_release_driver); +EXPORT_SYMBOL_GPL(device_attach); +EXPORT_SYMBOL_GPL(driver_attach); + diff -Nurb linux-2.6.22-570/drivers/base/firmware_class.c linux-2.6.22-591/drivers/base/firmware_class.c --- linux-2.6.22-570/drivers/base/firmware_class.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/base/firmware_class.c 2007-12-21 15:36:11.000000000 -0500 @@ -175,7 +175,7 @@ static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); static ssize_t -firmware_data_read(struct kobject *kobj, +firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = to_dev(kobj); @@ -240,7 +240,7 @@ * the driver as a firmware image. **/ static ssize_t -firmware_data_write(struct kobject *kobj, +firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = to_dev(kobj); @@ -271,7 +271,7 @@ } static struct bin_attribute firmware_attr_data_tmpl = { - .attr = {.name = "data", .mode = 0644, .owner = THIS_MODULE}, + .attr = {.name = "data", .mode = 0644}, .size = 0, .read = firmware_data_read, .write = firmware_data_write, diff -Nurb linux-2.6.22-570/drivers/block/acsi_slm.c linux-2.6.22-591/drivers/block/acsi_slm.c --- linux-2.6.22-570/drivers/block/acsi_slm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/block/acsi_slm.c 2007-12-21 15:36:11.000000000 -0500 @@ -367,7 +367,7 @@ int length; int end; - if (!(page = __get_free_page( GFP_KERNEL ))) + if (!(page = __get_free_page(GFP_TEMPORARY))) return( -ENOMEM ); length = slm_getstats( (char *)page, iminor(node) ); diff -Nurb linux-2.6.22-570/drivers/block/aoe/aoecmd.c linux-2.6.22-591/drivers/block/aoe/aoecmd.c --- linux-2.6.22-570/drivers/block/aoe/aoecmd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/block/aoe/aoecmd.c 2007-12-21 15:36:14.000000000 -0500 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "aoe.h" @@ -194,7 +195,7 @@ sl = sl_tail = NULL; read_lock(&dev_base_lock); - for_each_netdev(ifp) { + for_each_netdev(&init_net, ifp) { dev_hold(ifp); if (!is_aoe_netif(ifp)) goto cont; diff -Nurb linux-2.6.22-570/drivers/block/aoe/aoenet.c linux-2.6.22-591/drivers/block/aoe/aoenet.c --- linux-2.6.22-570/drivers/block/aoe/aoenet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/block/aoe/aoenet.c 2007-12-21 15:36:14.000000000 -0500 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "aoe.h" @@ -114,6 +115,9 @@ struct aoe_hdr *h; u32 n; + if (ifp->nd_net != &init_net) + goto exit; + skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) return 0; diff -Nurb linux-2.6.22-570/drivers/block/cciss_scsi.c linux-2.6.22-591/drivers/block/cciss_scsi.c --- linux-2.6.22-570/drivers/block/cciss_scsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/block/cciss_scsi.c 2007-12-21 15:36:11.000000000 -0500 @@ -555,7 +555,6 @@ { struct scsi_cmnd *cmd; ctlr_info_t *ctlr; - u64bit addr64; ErrorInfo_struct *ei; ei = cp->err_info; @@ -569,20 +568,7 @@ cmd = (struct scsi_cmnd *) cp->scsi_cmd; ctlr = hba[cp->ctlr]; - /* undo the DMA mappings */ - - if (cmd->use_sg) { - pci_unmap_sg(ctlr->pdev, - cmd->request_buffer, cmd->use_sg, - cmd->sc_data_direction); - } - else if (cmd->request_bufflen) { - addr64.val32.lower = cp->SG[0].Addr.lower; - addr64.val32.upper = cp->SG[0].Addr.upper; - pci_unmap_single(ctlr->pdev, (dma_addr_t) addr64.val, - cmd->request_bufflen, - cmd->sc_data_direction); - } + scsi_dma_unmap(cmd); cmd->result = (DID_OK << 16); /* host byte */ cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */ @@ -597,7 +583,7 @@ ei->SenseLen > SCSI_SENSE_BUFFERSIZE ? SCSI_SENSE_BUFFERSIZE : ei->SenseLen); - cmd->resid = ei->ResidualCnt; + scsi_set_resid(cmd, ei->ResidualCnt); if(ei->CommandStatus != 0) { /* an error has occurred */ @@ -1204,46 +1190,29 @@ CommandList_struct *cp, struct scsi_cmnd *cmd) { - unsigned int use_sg, nsegs=0, len; - struct scatterlist *scatter = (struct scatterlist *) cmd->request_buffer; + unsigned int len; + struct scatterlist *sg; __u64 addr64; + int use_sg, i; - /* is it just one virtual address? */ - if (!cmd->use_sg) { - if (cmd->request_bufflen) { /* anything to xfer? */ - - addr64 = (__u64) pci_map_single(pdev, - cmd->request_buffer, - cmd->request_bufflen, - cmd->sc_data_direction); + BUG_ON(scsi_sg_count(cmd) > MAXSGENTRIES); - cp->SG[0].Addr.lower = + use_sg = scsi_dma_map(cmd); + if (use_sg) { /* not too many addrs? */ + scsi_for_each_sg(cmd, sg, use_sg, i) { + addr64 = (__u64) sg_dma_address(sg); + len = sg_dma_len(sg); + cp->SG[i].Addr.lower = (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF); - cp->SG[0].Addr.upper = + cp->SG[i].Addr.upper = (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF); - cp->SG[0].Len = cmd->request_bufflen; - nsegs=1; + cp->SG[i].Len = len; + cp->SG[i].Ext = 0; // we are not chaining } - } /* else, must be a list of virtual addresses.... */ - else if (cmd->use_sg <= MAXSGENTRIES) { /* not too many addrs? */ - - use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, - cmd->sc_data_direction); - - for (nsegs=0; nsegs < use_sg; nsegs++) { - addr64 = (__u64) sg_dma_address(&scatter[nsegs]); - len = sg_dma_len(&scatter[nsegs]); - cp->SG[nsegs].Addr.lower = - (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF); - cp->SG[nsegs].Addr.upper = - (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF); - cp->SG[nsegs].Len = len; - cp->SG[nsegs].Ext = 0; // we are not chaining } - } else BUG(); - cp->Header.SGList = (__u8) nsegs; /* no. SGs contig in this cmd */ - cp->Header.SGTotal = (__u16) nsegs; /* total sgs in this cmd list */ + cp->Header.SGList = (__u8) use_sg; /* no. SGs contig in this cmd */ + cp->Header.SGTotal = (__u16) use_sg; /* total sgs in this cmd list */ return; } diff -Nurb linux-2.6.22-570/drivers/block/loop.c linux-2.6.22-591/drivers/block/loop.c --- linux-2.6.22-570/drivers/block/loop.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/drivers/block/loop.c 2007-12-21 15:36:11.000000000 -0500 @@ -68,6 +68,7 @@ #include #include #include +#include #include #include /* for invalidate_bdev() */ #include @@ -577,13 +578,6 @@ struct loop_device *lo = data; struct bio *bio; - /* - * loop can be used in an encrypted device, - * hence, it mustn't be stopped at all - * because it could be indirectly used during suspension - */ - current->flags |= PF_NOFREEZE; - set_user_nice(current, -20); while (!kthread_should_stop() || lo->lo_bio) { diff -Nurb linux-2.6.22-570/drivers/block/pktcdvd.c linux-2.6.22-591/drivers/block/pktcdvd.c --- linux-2.6.22-570/drivers/block/pktcdvd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/block/pktcdvd.c 2007-12-21 15:36:11.000000000 -0500 @@ -146,8 +146,7 @@ **********************************************************/ #define DEF_ATTR(_obj,_name,_mode) \ - static struct attribute _obj = { \ - .name = _name, .owner = THIS_MODULE, .mode = _mode } + static struct attribute _obj = { .name = _name, .mode = _mode } /********************************************************** /sys/class/pktcdvd/pktcdvd[0-7]/ @@ -1594,6 +1593,7 @@ long min_sleep_time, residue; set_user_nice(current, -20); + set_freezable(); for (;;) { DECLARE_WAITQUEUE(wait, current); diff -Nurb linux-2.6.22-570/drivers/char/apm-emulation.c linux-2.6.22-591/drivers/char/apm-emulation.c --- linux-2.6.22-570/drivers/char/apm-emulation.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/char/apm-emulation.c 2007-12-21 15:36:11.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -329,13 +330,8 @@ /* * Wait for the suspend/resume to complete. If there * are pending acknowledges, we wait here for them. - * - * Note: we need to ensure that the PM subsystem does - * not kick us out of the wait when it suspends the - * threads. */ flags = current->flags; - current->flags |= PF_NOFREEZE; wait_event(apm_suspend_waitqueue, as->suspend_state == SUSPEND_DONE); @@ -365,13 +361,8 @@ /* * Wait for the suspend/resume to complete. If there * are pending acknowledges, we wait here for them. - * - * Note: we need to ensure that the PM subsystem does - * not kick us out of the wait when it suspends the - * threads. */ flags = current->flags; - current->flags |= PF_NOFREEZE; wait_event_interruptible(apm_suspend_waitqueue, as->suspend_state == SUSPEND_DONE); @@ -598,7 +589,6 @@ kapmd_tsk = NULL; return ret; } - kapmd_tsk->flags |= PF_NOFREEZE; wake_up_process(kapmd_tsk); #ifdef CONFIG_PROC_FS diff -Nurb linux-2.6.22-570/drivers/char/hvc_console.c linux-2.6.22-591/drivers/char/hvc_console.c --- linux-2.6.22-570/drivers/char/hvc_console.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/char/hvc_console.c 2007-12-21 15:36:11.000000000 -0500 @@ -674,11 +674,12 @@ * calling hvc_poll() who determines whether a console adapter support * interrupts. */ -int khvcd(void *unused) +static int khvcd(void *unused) { int poll_mask; struct hvc_struct *hp; + set_freezable(); __set_current_state(TASK_RUNNING); do { poll_mask = 0; diff -Nurb linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c linux-2.6.22-591/drivers/char/ipmi/ipmi_msghandler.c --- linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/char/ipmi/ipmi_msghandler.c 2007-12-21 15:36:11.000000000 -0500 @@ -2171,52 +2171,42 @@ int err; bmc->device_id_attr.attr.name = "device_id"; - bmc->device_id_attr.attr.owner = THIS_MODULE; bmc->device_id_attr.attr.mode = S_IRUGO; bmc->device_id_attr.show = device_id_show; bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs"; - bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE; bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; bmc->revision_attr.attr.name = "revision"; - bmc->revision_attr.attr.owner = THIS_MODULE; bmc->revision_attr.attr.mode = S_IRUGO; bmc->revision_attr.show = revision_show; bmc->firmware_rev_attr.attr.name = "firmware_revision"; - bmc->firmware_rev_attr.attr.owner = THIS_MODULE; bmc->firmware_rev_attr.attr.mode = S_IRUGO; bmc->firmware_rev_attr.show = firmware_rev_show; bmc->version_attr.attr.name = "ipmi_version"; - bmc->version_attr.attr.owner = THIS_MODULE; bmc->version_attr.attr.mode = S_IRUGO; bmc->version_attr.show = ipmi_version_show; bmc->add_dev_support_attr.attr.name = "additional_device_support"; - bmc->add_dev_support_attr.attr.owner = THIS_MODULE; bmc->add_dev_support_attr.attr.mode = S_IRUGO; bmc->add_dev_support_attr.show = add_dev_support_show; bmc->manufacturer_id_attr.attr.name = "manufacturer_id"; - bmc->manufacturer_id_attr.attr.owner = THIS_MODULE; bmc->manufacturer_id_attr.attr.mode = S_IRUGO; bmc->manufacturer_id_attr.show = manufacturer_id_show; bmc->product_id_attr.attr.name = "product_id"; - bmc->product_id_attr.attr.owner = THIS_MODULE; bmc->product_id_attr.attr.mode = S_IRUGO; bmc->product_id_attr.show = product_id_show; bmc->guid_attr.attr.name = "guid"; - bmc->guid_attr.attr.owner = THIS_MODULE; bmc->guid_attr.attr.mode = S_IRUGO; bmc->guid_attr.show = guid_show; bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision"; - bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE; bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; diff -Nurb linux-2.6.22-570/drivers/char/keyboard.c linux-2.6.22-591/drivers/char/keyboard.c --- linux-2.6.22-570/drivers/char/keyboard.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/char/keyboard.c 2007-12-21 15:36:11.000000000 -0500 @@ -1150,6 +1150,7 @@ sysrq_down = 0; if (sysrq_down && down && !rep) { handle_sysrq(kbd_sysrq_xlate[keycode], tty); + sysrq_down = 0; /* In case we miss the 'up' event. */ return; } #endif diff -Nurb linux-2.6.22-570/drivers/connector/connector.c linux-2.6.22-591/drivers/connector/connector.c --- linux-2.6.22-570/drivers/connector/connector.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/connector/connector.c 2007-12-21 15:36:14.000000000 -0500 @@ -446,7 +446,7 @@ dev->id.idx = cn_idx; dev->id.val = cn_val; - dev->nls = netlink_kernel_create(NETLINK_CONNECTOR, + dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, CN_NETLINK_USERS + 0xf, dev->input, NULL, THIS_MODULE); if (!dev->nls) diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c linux-2.6.22-591/drivers/cpufreq/cpufreq_stats.c --- linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/cpufreq/cpufreq_stats.c 2007-12-21 15:36:11.000000000 -0500 @@ -25,8 +25,7 @@ #define CPUFREQ_STATDEVICE_ATTR(_name,_mode,_show) \ static struct freq_attr _attr_##_name = {\ - .attr = {.name = __stringify(_name), .owner = THIS_MODULE, \ - .mode = _mode, }, \ + .attr = {.name = __stringify(_name), .mode = _mode, }, \ .show = _show,\ }; diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c linux-2.6.22-591/drivers/cpufreq/cpufreq_userspace.c --- linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/cpufreq/cpufreq_userspace.c 2007-12-21 15:36:11.000000000 -0500 @@ -120,7 +120,7 @@ static struct freq_attr freq_attr_scaling_setspeed = { - .attr = { .name = "scaling_setspeed", .mode = 0644, .owner = THIS_MODULE }, + .attr = { .name = "scaling_setspeed", .mode = 0644 }, .show = show_speed, .store = store_speed, }; diff -Nurb linux-2.6.22-570/drivers/cpufreq/freq_table.c linux-2.6.22-591/drivers/cpufreq/freq_table.c --- linux-2.6.22-570/drivers/cpufreq/freq_table.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/cpufreq/freq_table.c 2007-12-21 15:36:11.000000000 -0500 @@ -199,7 +199,6 @@ struct freq_attr cpufreq_freq_attr_scaling_available_freqs = { .attr = { .name = "scaling_available_frequencies", .mode = 0444, - .owner=THIS_MODULE }, .show = show_available_freqs, }; diff -Nurb linux-2.6.22-570/drivers/cpuidle/Kconfig linux-2.6.22-591/drivers/cpuidle/Kconfig --- linux-2.6.22-570/drivers/cpuidle/Kconfig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,39 @@ +menu "CPU idle PM support" + +config CPU_IDLE + bool "CPU idle PM support" + help + CPU idle is a generic framework for supporting software-controlled + idle processor power management. It includes modular cross-platform + governors that can be swapped during runtime. + + If you're using a mobile platform that supports CPU idle PM (e.g. + an ACPI-capable notebook), you should say Y here. + +if CPU_IDLE + +comment "Governors" + +config CPU_IDLE_GOV_LADDER + tristate "'ladder' governor" + depends on CPU_IDLE + default y + help + This cpuidle governor promotes and demotes through the supported idle + states using residency time and bus master activity as metrics. This + algorithm was originally introduced in the old ACPI processor driver. + +config CPU_IDLE_GOV_MENU + tristate "'menu' governor" + depends on CPU_IDLE && NO_HZ + default y + help + This cpuidle governor evaluates all available states and chooses the + deepest state that meets all of the following constraints: BM activity, + expected time until next timer interrupt, and last break event time + delta. It is designed to minimize power consumption. Currently + dynticks is required. + +endif # CPU_IDLE + +endmenu diff -Nurb linux-2.6.22-570/drivers/cpuidle/Makefile linux-2.6.22-591/drivers/cpuidle/Makefile --- linux-2.6.22-570/drivers/cpuidle/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,5 @@ +# +# Makefile for cpuidle. +# + +obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.c linux-2.6.22-591/drivers/cpuidle/cpuidle.c --- linux-2.6.22-570/drivers/cpuidle/cpuidle.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/cpuidle.c 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,306 @@ +/* + * cpuidle.c - core cpuidle infrastructure + * + * (C) 2006-2007 Venkatesh Pallipadi + * Shaohua Li + * Adam Belay + * + * This code is licenced under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cpuidle.h" + +DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); +EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices); + +DEFINE_MUTEX(cpuidle_lock); +LIST_HEAD(cpuidle_detected_devices); +static void (*pm_idle_old)(void); + +/** + * cpuidle_idle_call - the main idle loop + * + * NOTE: no locks or semaphores should be used here + */ +static void cpuidle_idle_call(void) +{ + struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices); + struct cpuidle_state *target_state; + int next_state; + + /* check if the device is ready */ + if (!dev || dev->status != CPUIDLE_STATUS_DOIDLE) { + if (pm_idle_old) + pm_idle_old(); + else + local_irq_enable(); + return; + } + + /* ask the governor for the next state */ + next_state = cpuidle_curr_governor->select(dev); + if (need_resched()) + return; + target_state = &dev->states[next_state]; + + /* enter the state and update stats */ + dev->last_residency = target_state->enter(dev, target_state); + dev->last_state = target_state; + target_state->time += dev->last_residency; + target_state->usage++; + + /* give the governor an opportunity to reflect on the outcome */ + if (cpuidle_curr_governor->reflect) + cpuidle_curr_governor->reflect(dev); +} + +/** + * cpuidle_install_idle_handler - installs the cpuidle idle loop handler + */ +void cpuidle_install_idle_handler(void) +{ + if (pm_idle != cpuidle_idle_call) { + /* Make sure all changes finished before we switch to new idle */ + smp_wmb(); + pm_idle = cpuidle_idle_call; + } +} + +/** + * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler + */ +void cpuidle_uninstall_idle_handler(void) +{ + if (pm_idle != pm_idle_old) { + pm_idle = pm_idle_old; + cpu_idle_wait(); + } +} + +/** + * cpuidle_rescan_device - prepares for a new state configuration + * @dev: the target device + * + * Must be called with cpuidle_lock aquired. + */ +void cpuidle_rescan_device(struct cpuidle_device *dev) +{ + int i; + + if (cpuidle_curr_governor->scan) + cpuidle_curr_governor->scan(dev); + + for (i = 0; i < dev->state_count; i++) { + dev->states[i].usage = 0; + dev->states[i].time = 0; + } +} + +/** + * cpuidle_add_device - attaches the driver to a CPU instance + * @sys_dev: the system device (driver model CPU representation) + */ +static int cpuidle_add_device(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + struct cpuidle_device *dev; + + dev = per_cpu(cpuidle_devices, cpu); + + mutex_lock(&cpuidle_lock); + if (cpu_is_offline(cpu)) { + mutex_unlock(&cpuidle_lock); + return 0; + } + + if (!dev) { + dev = kzalloc(sizeof(struct cpuidle_device), GFP_KERNEL); + if (!dev) { + mutex_unlock(&cpuidle_lock); + return -ENOMEM; + } + init_completion(&dev->kobj_unregister); + per_cpu(cpuidle_devices, cpu) = dev; + } + dev->cpu = cpu; + + if (dev->status & CPUIDLE_STATUS_DETECTED) { + mutex_unlock(&cpuidle_lock); + return 0; + } + + cpuidle_add_sysfs(sys_dev); + + if (cpuidle_curr_driver) { + if (cpuidle_attach_driver(dev)) + goto err_ret; + } + + if (cpuidle_curr_governor) { + if (cpuidle_attach_governor(dev)) { + cpuidle_detach_driver(dev); + goto err_ret; + } + } + + if (cpuidle_device_can_idle(dev)) + cpuidle_install_idle_handler(); + + list_add(&dev->device_list, &cpuidle_detected_devices); + dev->status |= CPUIDLE_STATUS_DETECTED; + +err_ret: + mutex_unlock(&cpuidle_lock); + + return 0; +} + +/** + * __cpuidle_remove_device - detaches the driver from a CPU instance + * @sys_dev: the system device (driver model CPU representation) + * + * Must be called with cpuidle_lock aquired. + */ +static int __cpuidle_remove_device(struct sys_device *sys_dev) +{ + struct cpuidle_device *dev; + + dev = per_cpu(cpuidle_devices, sys_dev->id); + + if (!(dev->status & CPUIDLE_STATUS_DETECTED)) { + return 0; + } + dev->status &= ~CPUIDLE_STATUS_DETECTED; + /* NOTE: we don't wait because the cpu is already offline */ + if (cpuidle_curr_governor) + cpuidle_detach_governor(dev); + if (cpuidle_curr_driver) + cpuidle_detach_driver(dev); + cpuidle_remove_sysfs(sys_dev); + list_del(&dev->device_list); + wait_for_completion(&dev->kobj_unregister); + per_cpu(cpuidle_devices, sys_dev->id) = NULL; + kfree(dev); + + return 0; +} + +/** + * cpuidle_remove_device - detaches the driver from a CPU instance + * @sys_dev: the system device (driver model CPU representation) + */ +static int cpuidle_remove_device(struct sys_device *sys_dev) +{ + int ret; + mutex_lock(&cpuidle_lock); + ret = __cpuidle_remove_device(sys_dev); + mutex_unlock(&cpuidle_lock); + + return ret; +} + +static struct sysdev_driver cpuidle_sysdev_driver = { + .add = cpuidle_add_device, + .remove = cpuidle_remove_device, +}; + +static int cpuidle_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev((unsigned long)hcpu); + + switch (action) { + case CPU_ONLINE: + cpuidle_add_device(sys_dev); + break; + case CPU_DOWN_PREPARE: + mutex_lock(&cpuidle_lock); + break; + case CPU_DEAD: + __cpuidle_remove_device(sys_dev); + mutex_unlock(&cpuidle_lock); + break; + case CPU_DOWN_FAILED: + mutex_unlock(&cpuidle_lock); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cpuidle_cpu_notifier = +{ + .notifier_call = cpuidle_cpu_callback, +}; + +#ifdef CONFIG_SMP + +static void smp_callback(void *v) +{ + /* we already woke the CPU up, nothing more to do */ +} + +/* + * This function gets called when a part of the kernel has a new latency + * requirement. This means we need to get all processors out of their C-state, + * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that + * wakes them all right up. + */ +static int cpuidle_latency_notify(struct notifier_block *b, + unsigned long l, void *v) +{ + smp_call_function(smp_callback, NULL, 0, 1); + return NOTIFY_OK; +} + +static struct notifier_block cpuidle_latency_notifier = { + .notifier_call = cpuidle_latency_notify, +}; + +#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0) + +#else /* CONFIG_SMP */ + +#define latency_notifier_init(x) do { } while (0) + +#endif /* CONFIG_SMP */ + +/** + * cpuidle_init - core initializer + */ +static int __init cpuidle_init(void) +{ + int ret; + + pm_idle_old = pm_idle; + + ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); + if (ret) + return ret; + + register_hotcpu_notifier(&cpuidle_cpu_notifier); + + ret = sysdev_driver_register(&cpu_sysdev_class, &cpuidle_sysdev_driver); + + if (ret) { + cpuidle_remove_class_sysfs(&cpu_sysdev_class); + printk(KERN_ERR "cpuidle: failed to initialize\n"); + return ret; + } + + latency_notifier_init(&cpuidle_latency_notifier); + + return 0; +} + +core_initcall(cpuidle_init); diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.h linux-2.6.22-591/drivers/cpuidle/cpuidle.h --- linux-2.6.22-570/drivers/cpuidle/cpuidle.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/cpuidle.h 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,50 @@ +/* + * cpuidle.h - The internal header file + */ + +#ifndef __DRIVER_CPUIDLE_H +#define __DRIVER_CPUIDLE_H + +#include + +/* For internal use only */ +extern struct cpuidle_governor *cpuidle_curr_governor; +extern struct cpuidle_driver *cpuidle_curr_driver; +extern struct list_head cpuidle_drivers; +extern struct list_head cpuidle_governors; +extern struct list_head cpuidle_detected_devices; +extern struct mutex cpuidle_lock; + +/* idle loop */ +extern void cpuidle_install_idle_handler(void); +extern void cpuidle_uninstall_idle_handler(void); +extern void cpuidle_rescan_device(struct cpuidle_device *dev); + +/* drivers */ +extern int cpuidle_attach_driver(struct cpuidle_device *dev); +extern void cpuidle_detach_driver(struct cpuidle_device *dev); +extern int cpuidle_switch_driver(struct cpuidle_driver *drv); + +/* governors */ +extern int cpuidle_attach_governor(struct cpuidle_device *dev); +extern void cpuidle_detach_governor(struct cpuidle_device *dev); +extern int cpuidle_switch_governor(struct cpuidle_governor *gov); + +/* sysfs */ +extern int cpuidle_add_class_sysfs(struct sysdev_class *cls); +extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls); +extern int cpuidle_add_driver_sysfs(struct cpuidle_device *device); +extern void cpuidle_remove_driver_sysfs(struct cpuidle_device *device); +extern int cpuidle_add_sysfs(struct sys_device *sysdev); +extern void cpuidle_remove_sysfs(struct sys_device *sysdev); + +/** + * cpuidle_device_can_idle - determines if a CPU can utilize the idle loop + * @dev: the target CPU + */ +static inline int cpuidle_device_can_idle(struct cpuidle_device *dev) +{ + return (dev->status == CPUIDLE_STATUS_DOIDLE); +} + +#endif /* __DRIVER_CPUIDLE_H */ diff -Nurb linux-2.6.22-570/drivers/cpuidle/driver.c linux-2.6.22-591/drivers/cpuidle/driver.c --- linux-2.6.22-570/drivers/cpuidle/driver.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/driver.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,276 @@ +/* + * driver.c - driver support + * + * (C) 2006-2007 Venkatesh Pallipadi + * Shaohua Li + * Adam Belay + * + * This code is licenced under the GPL. + */ + +#include +#include +#include + +#include "cpuidle.h" + +LIST_HEAD(cpuidle_drivers); +struct cpuidle_driver *cpuidle_curr_driver; + + +/** + * cpuidle_attach_driver - attaches a driver to a CPU + * @dev: the target CPU + * + * Must be called with cpuidle_lock aquired. + */ +int cpuidle_attach_driver(struct cpuidle_device *dev) +{ + int ret; + + if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) + return -EIO; + + if (!try_module_get(cpuidle_curr_driver->owner)) + return -EINVAL; + + ret = cpuidle_curr_driver->init(dev); + if (ret) { + module_put(cpuidle_curr_driver->owner); + printk(KERN_INFO "cpuidle: driver %s failed to attach to " + "cpu %d\n", cpuidle_curr_driver->name, dev->cpu); + } else { + if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) + cpuidle_rescan_device(dev); + smp_wmb(); + dev->status |= CPUIDLE_STATUS_DRIVER_ATTACHED; + cpuidle_add_driver_sysfs(dev); + } + + return ret; +} + +/** + * cpuidle_detach_govenor - detaches a driver from a CPU + * @dev: the target CPU + * + * Must be called with cpuidle_lock aquired. + */ +void cpuidle_detach_driver(struct cpuidle_device *dev) +{ + if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) { + cpuidle_remove_driver_sysfs(dev); + dev->status &= ~CPUIDLE_STATUS_DRIVER_ATTACHED; + if (cpuidle_curr_driver->exit) + cpuidle_curr_driver->exit(dev); + module_put(cpuidle_curr_driver->owner); + } +} + +/** + * __cpuidle_find_driver - finds a driver of the specified name + * @str: the name + * + * Must be called with cpuidle_lock aquired. + */ +static struct cpuidle_driver * __cpuidle_find_driver(const char *str) +{ + struct cpuidle_driver *drv; + + list_for_each_entry(drv, &cpuidle_drivers, driver_list) + if (!strnicmp(str, drv->name, CPUIDLE_NAME_LEN)) + return drv; + + return NULL; +} + +/** + * cpuidle_switch_driver - changes the driver + * @drv: the new target driver + * + * NOTE: "drv" can be NULL to specify disabled + * Must be called with cpuidle_lock aquired. + */ +int cpuidle_switch_driver(struct cpuidle_driver *drv) +{ + struct cpuidle_device *dev; + + if (drv == cpuidle_curr_driver) + return -EINVAL; + + cpuidle_uninstall_idle_handler(); + + if (cpuidle_curr_driver) + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + cpuidle_detach_driver(dev); + + cpuidle_curr_driver = drv; + + if (drv) { + int ret = 1; + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + if (cpuidle_attach_driver(dev) == 0) + ret = 0; + + /* If attach on all devices fail, switch to NULL driver */ + if (ret) + cpuidle_curr_driver = NULL; + + if (cpuidle_curr_driver && cpuidle_curr_governor) { + printk(KERN_INFO "cpuidle: using driver %s\n", + drv->name); + cpuidle_install_idle_handler(); + } + } + + return 0; +} + +/** + * cpuidle_register_driver - registers a driver + * @drv: the driver + */ +int cpuidle_register_driver(struct cpuidle_driver *drv) +{ + int ret = -EEXIST; + + if (!drv || !drv->init) + return -EINVAL; + + mutex_lock(&cpuidle_lock); + if (__cpuidle_find_driver(drv->name) == NULL) { + ret = 0; + list_add_tail(&drv->driver_list, &cpuidle_drivers); + if (!cpuidle_curr_driver) + cpuidle_switch_driver(drv); + } + mutex_unlock(&cpuidle_lock); + + return ret; +} + +EXPORT_SYMBOL_GPL(cpuidle_register_driver); + +/** + * cpuidle_unregister_driver - unregisters a driver + * @drv: the driver + */ +void cpuidle_unregister_driver(struct cpuidle_driver *drv) +{ + if (!drv) + return; + + mutex_lock(&cpuidle_lock); + if (drv == cpuidle_curr_driver) + cpuidle_switch_driver(NULL); + list_del(&drv->driver_list); + mutex_unlock(&cpuidle_lock); +} + +EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); + +static void __cpuidle_force_redetect(struct cpuidle_device *dev) +{ + cpuidle_remove_driver_sysfs(dev); + cpuidle_curr_driver->redetect(dev); + cpuidle_add_driver_sysfs(dev); +} + +/** + * cpuidle_force_redetect - redetects the idle states of a CPU + * + * @dev: the CPU to redetect + * @drv: the target driver + * + * Generally, the driver will call this when the supported states set has + * changed. (e.g. as the result of an ACPI transition to battery power) + */ +int cpuidle_force_redetect(struct cpuidle_device *dev, + struct cpuidle_driver *drv) +{ + int uninstalled = 0; + + mutex_lock(&cpuidle_lock); + + if (drv != cpuidle_curr_driver) { + mutex_unlock(&cpuidle_lock); + return 0; + } + + if (!(dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) || + !cpuidle_curr_driver->redetect) { + mutex_unlock(&cpuidle_lock); + return -EIO; + } + + if (cpuidle_device_can_idle(dev)) { + uninstalled = 1; + cpuidle_uninstall_idle_handler(); + } + + __cpuidle_force_redetect(dev); + + if (cpuidle_device_can_idle(dev)) { + cpuidle_rescan_device(dev); + cpuidle_install_idle_handler(); + } + + /* other devices are still ok */ + if (uninstalled) + cpuidle_install_idle_handler(); + + mutex_unlock(&cpuidle_lock); + + return 0; +} + +EXPORT_SYMBOL_GPL(cpuidle_force_redetect); + +/** + * cpuidle_force_redetect_devices - redetects the idle states of all CPUs + * + * @drv: the target driver + * + * Generally, the driver will call this when the supported states set has + * changed. (e.g. as the result of an ACPI transition to battery power) + */ +int cpuidle_force_redetect_devices(struct cpuidle_driver *drv) +{ + struct cpuidle_device *dev; + int ret = 0; + + mutex_lock(&cpuidle_lock); + + if (drv != cpuidle_curr_driver) + goto out; + + if (!cpuidle_curr_driver->redetect) { + ret = -EIO; + goto out; + } + + cpuidle_uninstall_idle_handler(); + + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + __cpuidle_force_redetect(dev); + + cpuidle_install_idle_handler(); +out: + mutex_unlock(&cpuidle_lock); + return ret; +} + +EXPORT_SYMBOL_GPL(cpuidle_force_redetect_devices); + +/** + * cpuidle_get_bm_activity - determines if BM activity has occured + */ +int cpuidle_get_bm_activity(void) +{ + if (cpuidle_curr_driver->bm_check) + return cpuidle_curr_driver->bm_check(); + else + return 0; +} +EXPORT_SYMBOL_GPL(cpuidle_get_bm_activity); + diff -Nurb linux-2.6.22-570/drivers/cpuidle/governor.c linux-2.6.22-591/drivers/cpuidle/governor.c --- linux-2.6.22-570/drivers/cpuidle/governor.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/governor.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,160 @@ +/* + * governor.c - governor support + * + * (C) 2006-2007 Venkatesh Pallipadi + * Shaohua Li + * Adam Belay + * + * This code is licenced under the GPL. + */ + +#include +#include +#include + +#include "cpuidle.h" + +LIST_HEAD(cpuidle_governors); +struct cpuidle_governor *cpuidle_curr_governor; + + +/** + * cpuidle_attach_governor - attaches a governor to a CPU + * @dev: the target CPU + * + * Must be called with cpuidle_lock aquired. + */ +int cpuidle_attach_governor(struct cpuidle_device *dev) +{ + int ret = 0; + + if(dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) + return -EIO; + + if (!try_module_get(cpuidle_curr_governor->owner)) + return -EINVAL; + + if (cpuidle_curr_governor->init) + ret = cpuidle_curr_governor->init(dev); + if (ret) { + module_put(cpuidle_curr_governor->owner); + printk(KERN_ERR "cpuidle: governor %s failed to attach to cpu %d\n", + cpuidle_curr_governor->name, dev->cpu); + } else { + if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) + cpuidle_rescan_device(dev); + smp_wmb(); + dev->status |= CPUIDLE_STATUS_GOVERNOR_ATTACHED; + } + + return ret; +} + +/** + * cpuidle_detach_govenor - detaches a governor from a CPU + * @dev: the target CPU + * + * Must be called with cpuidle_lock aquired. + */ +void cpuidle_detach_governor(struct cpuidle_device *dev) +{ + if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) { + dev->status &= ~CPUIDLE_STATUS_GOVERNOR_ATTACHED; + if (cpuidle_curr_governor->exit) + cpuidle_curr_governor->exit(dev); + module_put(cpuidle_curr_governor->owner); + } +} + +/** + * __cpuidle_find_governor - finds a governor of the specified name + * @str: the name + * + * Must be called with cpuidle_lock aquired. + */ +static struct cpuidle_governor * __cpuidle_find_governor(const char *str) +{ + struct cpuidle_governor *gov; + + list_for_each_entry(gov, &cpuidle_governors, governor_list) + if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN)) + return gov; + + return NULL; +} + +/** + * cpuidle_switch_governor - changes the governor + * @gov: the new target governor + * + * NOTE: "gov" can be NULL to specify disabled + * Must be called with cpuidle_lock aquired. + */ +int cpuidle_switch_governor(struct cpuidle_governor *gov) +{ + struct cpuidle_device *dev; + + if (gov == cpuidle_curr_governor) + return -EINVAL; + + cpuidle_uninstall_idle_handler(); + + if (cpuidle_curr_governor) + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + cpuidle_detach_governor(dev); + + cpuidle_curr_governor = gov; + + if (gov) { + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + cpuidle_attach_governor(dev); + if (cpuidle_curr_driver) + cpuidle_install_idle_handler(); + printk(KERN_INFO "cpuidle: using governor %s\n", gov->name); + } + + return 0; +} + +/** + * cpuidle_register_governor - registers a governor + * @gov: the governor + */ +int cpuidle_register_governor(struct cpuidle_governor *gov) +{ + int ret = -EEXIST; + + if (!gov || !gov->select) + return -EINVAL; + + mutex_lock(&cpuidle_lock); + if (__cpuidle_find_governor(gov->name) == NULL) { + ret = 0; + list_add_tail(&gov->governor_list, &cpuidle_governors); + if (!cpuidle_curr_governor) + cpuidle_switch_governor(gov); + } + mutex_unlock(&cpuidle_lock); + + return ret; +} + +EXPORT_SYMBOL_GPL(cpuidle_register_governor); + +/** + * cpuidle_unregister_governor - unregisters a governor + * @gov: the governor + */ +void cpuidle_unregister_governor(struct cpuidle_governor *gov) +{ + if (!gov) + return; + + mutex_lock(&cpuidle_lock); + if (gov == cpuidle_curr_governor) + cpuidle_switch_governor(NULL); + list_del(&gov->governor_list); + mutex_unlock(&cpuidle_lock); +} + +EXPORT_SYMBOL_GPL(cpuidle_unregister_governor); diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/Makefile linux-2.6.22-591/drivers/cpuidle/governors/Makefile --- linux-2.6.22-570/drivers/cpuidle/governors/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/governors/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,6 @@ +# +# Makefile for cpuidle governors. +# + +obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o +obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/ladder.c linux-2.6.22-591/drivers/cpuidle/governors/ladder.c --- linux-2.6.22-570/drivers/cpuidle/governors/ladder.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/governors/ladder.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,227 @@ +/* + * ladder.c - the residency ladder algorithm + * + * Copyright (C) 2001, 2002 Andy Grover + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2004, 2005 Dominik Brodowski + * + * (C) 2006-2007 Venkatesh Pallipadi + * Shaohua Li + * Adam Belay + * + * This code is licenced under the GPL. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define PROMOTION_COUNT 4 +#define DEMOTION_COUNT 1 + +/* + * bm_history -- bit-mask with a bit per jiffy of bus-master activity + * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms + * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms + * 100 HZ: 0x0000000F: 4 jiffies = 40ms + * reduce history for more aggressive entry into C3 + */ +static unsigned int bm_history __read_mostly = + (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); +module_param(bm_history, uint, 0644); + +struct ladder_device_state { + struct { + u32 promotion_count; + u32 demotion_count; + u32 promotion_time; + u32 demotion_time; + u32 bm; + } threshold; + struct { + int promotion_count; + int demotion_count; + } stats; +}; + +struct ladder_device { + struct ladder_device_state states[CPUIDLE_STATE_MAX]; + unsigned int bm_check:1; + unsigned long bm_check_timestamp; + unsigned long bm_activity; /* FIXME: bm activity should be global */ + int last_state_idx; +}; + +/** + * ladder_do_selection - prepares private data for a state change + * @ldev: the ladder device + * @old_idx: the current state index + * @new_idx: the new target state index + */ +static inline void ladder_do_selection(struct ladder_device *ldev, + int old_idx, int new_idx) +{ + ldev->states[old_idx].stats.promotion_count = 0; + ldev->states[old_idx].stats.demotion_count = 0; + ldev->last_state_idx = new_idx; +} + +/** + * ladder_select_state - selects the next state to enter + * @dev: the CPU + */ +static int ladder_select_state(struct cpuidle_device *dev) +{ + struct ladder_device *ldev = dev->governor_data; + struct ladder_device_state *last_state; + int last_residency, last_idx = ldev->last_state_idx; + + if (unlikely(!ldev)) + return 0; + + last_state = &ldev->states[last_idx]; + + /* demote if within BM threshold */ + if (ldev->bm_check) { + unsigned long diff; + + diff = jiffies - ldev->bm_check_timestamp; + if (diff > 31) + diff = 31; + + ldev->bm_activity <<= diff; + if (cpuidle_get_bm_activity()) + ldev->bm_activity |= ((1 << diff) - 1); + + ldev->bm_check_timestamp = jiffies; + if ((last_idx > 0) && + (last_state->threshold.bm & ldev->bm_activity)) { + ladder_do_selection(ldev, last_idx, last_idx - 1); + return last_idx - 1; + } + } + + if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) + last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency; + else + last_residency = last_state->threshold.promotion_time + 1; + + /* consider promotion */ + if (last_idx < dev->state_count - 1 && + last_residency > last_state->threshold.promotion_time && + dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) { + last_state->stats.promotion_count++; + last_state->stats.demotion_count = 0; + if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { + ladder_do_selection(ldev, last_idx, last_idx + 1); + return last_idx + 1; + } + } + + /* consider demotion */ + if (last_idx > 0 && + last_residency < last_state->threshold.demotion_time) { + last_state->stats.demotion_count++; + last_state->stats.promotion_count = 0; + if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { + ladder_do_selection(ldev, last_idx, last_idx - 1); + return last_idx - 1; + } + } + + /* otherwise remain at the current state */ + return last_idx; +} + +/** + * ladder_scan_device - scans a CPU's states and does setup + * @dev: the CPU + */ +static void ladder_scan_device(struct cpuidle_device *dev) +{ + int i, bm_check = 0; + struct ladder_device *ldev = dev->governor_data; + struct ladder_device_state *lstate; + struct cpuidle_state *state; + + ldev->last_state_idx = 0; + ldev->bm_check_timestamp = 0; + ldev->bm_activity = 0; + + for (i = 0; i < dev->state_count; i++) { + state = &dev->states[i]; + lstate = &ldev->states[i]; + + lstate->stats.promotion_count = 0; + lstate->stats.demotion_count = 0; + + lstate->threshold.promotion_count = PROMOTION_COUNT; + lstate->threshold.demotion_count = DEMOTION_COUNT; + + if (i < dev->state_count - 1) + lstate->threshold.promotion_time = state->exit_latency; + if (i > 0) + lstate->threshold.demotion_time = state->exit_latency; + if (state->flags & CPUIDLE_FLAG_CHECK_BM) { + lstate->threshold.bm = bm_history; + bm_check = 1; + } else + lstate->threshold.bm = 0; + } + + ldev->bm_check = bm_check; +} + +/** + * ladder_init_device - initializes a CPU-instance + * @dev: the CPU + */ +static int ladder_init_device(struct cpuidle_device *dev) +{ + dev->governor_data = kmalloc(sizeof(struct ladder_device), GFP_KERNEL); + + return !dev->governor_data; +} + +/** + * ladder_exit_device - exits a CPU-instance + * @dev: the CPU + */ +static void ladder_exit_device(struct cpuidle_device *dev) +{ + kfree(dev->governor_data); +} + +static struct cpuidle_governor ladder_governor = { + .name = "ladder", + .init = ladder_init_device, + .exit = ladder_exit_device, + .scan = ladder_scan_device, + .select = ladder_select_state, + .owner = THIS_MODULE, +}; + +/** + * init_ladder - initializes the governor + */ +static int __init init_ladder(void) +{ + return cpuidle_register_governor(&ladder_governor); +} + +/** + * exit_ladder - exits the governor + */ +static void __exit exit_ladder(void) +{ + cpuidle_unregister_governor(&ladder_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_ladder); +module_exit(exit_ladder); diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/menu.c linux-2.6.22-591/drivers/cpuidle/governors/menu.c --- linux-2.6.22-570/drivers/cpuidle/governors/menu.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/governors/menu.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,152 @@ +/* + * menu.c - the menu idle governor + * + * Copyright (C) 2006-2007 Adam Belay + * + * This code is licenced under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define BM_HOLDOFF 20000 /* 20 ms */ + +struct menu_device { + int last_state_idx; + int deepest_bm_state; + + int break_last_us; + int break_elapsed_us; + + int bm_elapsed_us; + int bm_holdoff_us; + + unsigned long idle_jiffies; +}; + +static DEFINE_PER_CPU(struct menu_device, menu_devices); + +/** + * menu_select - selects the next idle state to enter + * @dev: the CPU + */ +static int menu_select(struct cpuidle_device *dev) +{ + struct menu_device *data = &__get_cpu_var(menu_devices); + int i, expected_us, max_state = dev->state_count; + + /* discard BM history because it is sticky */ + cpuidle_get_bm_activity(); + + /* determine the expected residency time */ + expected_us = (s32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000; + expected_us = min(expected_us, data->break_last_us); + + /* determine the maximum state compatible with current BM status */ + if (cpuidle_get_bm_activity()) + data->bm_elapsed_us = 0; + if (data->bm_elapsed_us <= data->bm_holdoff_us) + max_state = data->deepest_bm_state + 1; + + /* find the deepest idle state that satisfies our constraints */ + for (i = 1; i < max_state; i++) { + struct cpuidle_state *s = &dev->states[i]; + if (s->target_residency > expected_us) + break; + if (s->exit_latency > system_latency_constraint()) + break; + } + + data->last_state_idx = i - 1; + data->idle_jiffies = tick_nohz_get_idle_jiffies(); + return i - 1; +} + +/** + * menu_reflect - attempts to guess what happened after entry + * @dev: the CPU + * + * NOTE: it's important to be fast here because this operation will add to + * the overall exit latency. + */ +static void menu_reflect(struct cpuidle_device *dev) +{ + struct menu_device *data = &__get_cpu_var(menu_devices); + int last_idx = data->last_state_idx; + int measured_us = cpuidle_get_last_residency(dev); + struct cpuidle_state *target = &dev->states[last_idx]; + + /* + * Ugh, this idle state doesn't support residency measurements, so we + * are basically lost in the dark. As a compromise, assume we slept + * for one full standard timer tick. However, be aware that this + * could potentially result in a suboptimal state transition. + */ + if (!(target->flags & CPUIDLE_FLAG_TIME_VALID)) + measured_us = USEC_PER_SEC / HZ; + + data->bm_elapsed_us += measured_us; + data->break_elapsed_us += measured_us; + + /* + * Did something other than the timer interrupt cause the break event? + */ + if (tick_nohz_get_idle_jiffies() == data->idle_jiffies) { + data->break_last_us = data->break_elapsed_us; + data->break_elapsed_us = 0; + } +} + +/** + * menu_scan_device - scans a CPU's states and does setup + * @dev: the CPU + */ +static void menu_scan_device(struct cpuidle_device *dev) +{ + struct menu_device *data = &per_cpu(menu_devices, dev->cpu); + int i; + + data->last_state_idx = 0; + data->break_last_us = 0; + data->break_elapsed_us = 0; + data->bm_elapsed_us = 0; + data->bm_holdoff_us = BM_HOLDOFF; + + for (i = 1; i < dev->state_count; i++) + if (dev->states[i].flags & CPUIDLE_FLAG_CHECK_BM) + break; + data->deepest_bm_state = i - 1; +} + +struct cpuidle_governor menu_governor = { + .name = "menu", + .scan = menu_scan_device, + .select = menu_select, + .reflect = menu_reflect, + .owner = THIS_MODULE, +}; + +/** + * init_menu - initializes the governor + */ +static int __init init_menu(void) +{ + return cpuidle_register_governor(&menu_governor); +} + +/** + * exit_menu - exits the governor + */ +static void __exit exit_menu(void) +{ + cpuidle_unregister_governor(&menu_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_menu); +module_exit(exit_menu); diff -Nurb linux-2.6.22-570/drivers/cpuidle/sysfs.c linux-2.6.22-591/drivers/cpuidle/sysfs.c --- linux-2.6.22-570/drivers/cpuidle/sysfs.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/cpuidle/sysfs.c 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,393 @@ +/* + * sysfs.c - sysfs support + * + * (C) 2006-2007 Shaohua Li + * + * This code is licenced under the GPL. + */ + +#include +#include +#include +#include + +#include "cpuidle.h" + +static unsigned int sysfs_switch; +static int __init cpuidle_sysfs_setup(char *unused) +{ + sysfs_switch = 1; + return 1; +} +__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup); + +static ssize_t show_available_drivers(struct sys_device *dev, char *buf) +{ + ssize_t i = 0; + struct cpuidle_driver *tmp; + + mutex_lock(&cpuidle_lock); + list_for_each_entry(tmp, &cpuidle_drivers, driver_list) { + if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) + goto out; + i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); + } +out: + i+= sprintf(&buf[i], "\n"); + mutex_unlock(&cpuidle_lock); + return i; +} + +static ssize_t show_available_governors(struct sys_device *dev, char *buf) +{ + ssize_t i = 0; + struct cpuidle_governor *tmp; + + mutex_lock(&cpuidle_lock); + list_for_each_entry(tmp, &cpuidle_governors, governor_list) { + if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) + goto out; + i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); + } + if (list_empty(&cpuidle_governors)) + i+= sprintf(&buf[i], "no governors"); +out: + i+= sprintf(&buf[i], "\n"); + mutex_unlock(&cpuidle_lock); + return i; +} + +static ssize_t show_current_driver(struct sys_device *dev, char *buf) +{ + ssize_t ret; + + mutex_lock(&cpuidle_lock); + ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name); + mutex_unlock(&cpuidle_lock); + return ret; +} + +static ssize_t store_current_driver(struct sys_device *dev, + const char *buf, size_t count) +{ + char str[CPUIDLE_NAME_LEN]; + int len = count; + struct cpuidle_driver *tmp, *found = NULL; + + if (len > CPUIDLE_NAME_LEN) + len = CPUIDLE_NAME_LEN; + + if (sscanf(buf, "%s", str) != 1) + return -EINVAL; + + mutex_lock(&cpuidle_lock); + list_for_each_entry(tmp, &cpuidle_drivers, driver_list) { + if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) { + found = tmp; + break; + } + } + if (found) + cpuidle_switch_driver(found); + mutex_unlock(&cpuidle_lock); + + return count; +} + +static ssize_t show_current_governor(struct sys_device *dev, char *buf) +{ + ssize_t i; + + mutex_lock(&cpuidle_lock); + if (cpuidle_curr_governor) + i = sprintf(buf, "%s\n", cpuidle_curr_governor->name); + else + i = sprintf(buf, "no governor\n"); + mutex_unlock(&cpuidle_lock); + + return i; +} + +static ssize_t store_current_governor(struct sys_device *dev, + const char *buf, size_t count) +{ + char str[CPUIDLE_NAME_LEN]; + int len = count; + struct cpuidle_governor *tmp, *found = NULL; + + if (len > CPUIDLE_NAME_LEN) + len = CPUIDLE_NAME_LEN; + + if (sscanf(buf, "%s", str) != 1) + return -EINVAL; + + mutex_lock(&cpuidle_lock); + list_for_each_entry(tmp, &cpuidle_governors, governor_list) { + if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) { + found = tmp; + break; + } + } + if (found) + cpuidle_switch_governor(found); + mutex_unlock(&cpuidle_lock); + + return count; +} + +static SYSDEV_ATTR(current_driver_ro, 0444, show_current_driver, NULL); +static SYSDEV_ATTR(current_governor_ro, 0444, show_current_governor, NULL); + +static struct attribute *cpuclass_default_attrs[] = { + &attr_current_driver_ro.attr, + &attr_current_governor_ro.attr, + NULL +}; + +static SYSDEV_ATTR(available_drivers, 0444, show_available_drivers, NULL); +static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL); +static SYSDEV_ATTR(current_driver, 0644, show_current_driver, + store_current_driver); +static SYSDEV_ATTR(current_governor, 0644, show_current_governor, + store_current_governor); + +static struct attribute *cpuclass_switch_attrs[] = { + &attr_available_drivers.attr, + &attr_available_governors.attr, + &attr_current_driver.attr, + &attr_current_governor.attr, + NULL +}; + +static struct attribute_group cpuclass_attr_group = { + .attrs = cpuclass_default_attrs, + .name = "cpuidle", +}; + +/** + * cpuidle_add_class_sysfs - add CPU global sysfs attributes + */ +int cpuidle_add_class_sysfs(struct sysdev_class *cls) +{ + if (sysfs_switch) + cpuclass_attr_group.attrs = cpuclass_switch_attrs; + + return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group); +} + +/** + * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes + */ +void cpuidle_remove_class_sysfs(struct sysdev_class *cls) +{ + sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group); +} + +struct cpuidle_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_device *, char *); + ssize_t (*store)(struct cpuidle_device *, const char *, size_t count); +}; + +#define define_one_ro(_name, show) \ + static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL) +#define define_one_rw(_name, show, store) \ + static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store) + +#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj) +#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr) +static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf) +{ + int ret = -EIO; + struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + + if (cattr->show) { + mutex_lock(&cpuidle_lock); + ret = cattr->show(dev, buf); + mutex_unlock(&cpuidle_lock); + } + return ret; +} + +static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, + const char * buf, size_t count) +{ + int ret = -EIO; + struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + + if (cattr->store) { + mutex_lock(&cpuidle_lock); + ret = cattr->store(dev, buf, count); + mutex_unlock(&cpuidle_lock); + } + return ret; +} + +static struct sysfs_ops cpuidle_sysfs_ops = { + .show = cpuidle_show, + .store = cpuidle_store, +}; + +static void cpuidle_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + + complete(&dev->kobj_unregister); +} + +static struct kobj_type ktype_cpuidle = { + .sysfs_ops = &cpuidle_sysfs_ops, + .release = cpuidle_sysfs_release, +}; + +struct cpuidle_state_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_state *, char *); + ssize_t (*store)(struct cpuidle_state *, const char *, size_t); +}; + +#define define_one_state_ro(_name, show) \ +static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL) + +#define define_show_state_function(_name) \ +static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \ +{ \ + return sprintf(buf, "%d\n", state->_name);\ +} + +define_show_state_function(exit_latency) +define_show_state_function(power_usage) +define_show_state_function(usage) +define_show_state_function(time) +define_one_state_ro(latency, show_state_exit_latency); +define_one_state_ro(power, show_state_power_usage); +define_one_state_ro(usage, show_state_usage); +define_one_state_ro(time, show_state_time); + +static struct attribute *cpuidle_state_default_attrs[] = { + &attr_latency.attr, + &attr_power.attr, + &attr_usage.attr, + &attr_time.attr, + NULL +}; + +#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) +#define kobj_to_state(k) (kobj_to_state_obj(k)->state) +#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) +static ssize_t cpuidle_state_show(struct kobject * kobj, + struct attribute * attr ,char * buf) +{ + int ret = -EIO; + struct cpuidle_state *state = kobj_to_state(kobj); + struct cpuidle_state_attr * cattr = attr_to_stateattr(attr); + + if (cattr->show) + ret = cattr->show(state, buf); + + return ret; +} + +static struct sysfs_ops cpuidle_state_sysfs_ops = { + .show = cpuidle_state_show, +}; + +static void cpuidle_state_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj); + + complete(&state_obj->kobj_unregister); +} + +static struct kobj_type ktype_state_cpuidle = { + .sysfs_ops = &cpuidle_state_sysfs_ops, + .default_attrs = cpuidle_state_default_attrs, + .release = cpuidle_state_sysfs_release, +}; + +static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i) +{ + kobject_unregister(&device->kobjs[i]->kobj); + wait_for_completion(&device->kobjs[i]->kobj_unregister); + kfree(device->kobjs[i]); + device->kobjs[i] = NULL; +} + +/** + * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes + * @device: the target device + */ +int cpuidle_add_driver_sysfs(struct cpuidle_device *device) +{ + int i, ret = -ENOMEM; + struct cpuidle_state_kobj *kobj; + + /* state statistics */ + for (i = 0; i < device->state_count; i++) { + kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); + if (!kobj) + goto error_state; + kobj->state = &device->states[i]; + init_completion(&kobj->kobj_unregister); + + kobj->kobj.parent = &device->kobj; + kobj->kobj.ktype = &ktype_state_cpuidle; + kobject_set_name(&kobj->kobj, "state%d", i); + ret = kobject_register(&kobj->kobj); + if (ret) { + kfree(kobj); + goto error_state; + } + device->kobjs[i] = kobj; + } + + return 0; + +error_state: + for (i = i - 1; i >= 0; i--) + cpuidle_free_state_kobj(device, i); + return ret; +} + +/** + * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes + * @device: the target device + */ +void cpuidle_remove_driver_sysfs(struct cpuidle_device *device) +{ + int i; + + for (i = 0; i < device->state_count; i++) + cpuidle_free_state_kobj(device, i); +} + +/** + * cpuidle_add_sysfs - creates a sysfs instance for the target device + * @sysdev: the target device + */ +int cpuidle_add_sysfs(struct sys_device *sysdev) +{ + int cpu = sysdev->id; + struct cpuidle_device *dev; + + dev = per_cpu(cpuidle_devices, cpu); + dev->kobj.parent = &sysdev->kobj; + dev->kobj.ktype = &ktype_cpuidle; + kobject_set_name(&dev->kobj, "%s", "cpuidle"); + return kobject_register(&dev->kobj); +} + +/** + * cpuidle_remove_sysfs - deletes a sysfs instance on the target device + * @sysdev: the target device + */ +void cpuidle_remove_sysfs(struct sys_device *sysdev) +{ + int cpu = sysdev->id; + struct cpuidle_device *dev; + + dev = per_cpu(cpuidle_devices, cpu); + kobject_unregister(&dev->kobj); +} diff -Nurb linux-2.6.22-570/drivers/dma/Kconfig linux-2.6.22-591/drivers/dma/Kconfig --- linux-2.6.22-570/drivers/dma/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/dma/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -8,8 +8,8 @@ config DMA_ENGINE bool "Support for DMA engines" ---help--- - DMA engines offload copy operations from the CPU to dedicated - hardware, allowing the copies to happen asynchronously. + DMA engines offload bulk memory operations from the CPU to dedicated + hardware, allowing the operations to happen asynchronously. comment "DMA Clients" @@ -32,4 +32,11 @@ ---help--- Enable support for the Intel(R) I/OAT DMA engine. +config INTEL_IOP_ADMA + tristate "Intel IOP ADMA support" + depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX) + default m + ---help--- + Enable support for the Intel(R) IOP Series RAID engines. + endmenu diff -Nurb linux-2.6.22-570/drivers/dma/Makefile linux-2.6.22-591/drivers/dma/Makefile --- linux-2.6.22-570/drivers/dma/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/dma/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -1,3 +1,4 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o diff -Nurb linux-2.6.22-570/drivers/dma/dmaengine.c linux-2.6.22-591/drivers/dma/dmaengine.c --- linux-2.6.22-570/drivers/dma/dmaengine.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/dma/dmaengine.c 2007-12-21 15:36:11.000000000 -0500 @@ -37,11 +37,11 @@ * Each device has a channels list, which runs unlocked but is never modified * once the device is registered, it's just setup by the driver. * - * Each client has a channels list, it's only modified under the client->lock - * and in an RCU callback, so it's safe to read under rcu_read_lock(). + * Each client is responsible for keeping track of the channels it uses. See + * the definition of dma_event_callback in dmaengine.h. * * Each device has a kref, which is initialized to 1 when the device is - * registered. A kref_put is done for each class_device registered. When the + * registered. A kref_get is done for each class_device registered. When the * class_device is released, the coresponding kref_put is done in the release * method. Every time one of the device's channels is allocated to a client, * a kref_get occurs. When the channel is freed, the coresponding kref_put @@ -51,14 +51,17 @@ * references to finish. * * Each channel has an open-coded implementation of Rusty Russell's "bigref," - * with a kref and a per_cpu local_t. A single reference is set when on an - * ADDED event, and removed with a REMOVE event. Net DMA client takes an - * extra reference per outstanding transaction. The relase function does a - * kref_put on the device. -ChrisL + * with a kref and a per_cpu local_t. A dma_chan_get is called when a client + * signals that it wants to use a channel, and dma_chan_put is called when + * a channel is removed or a client using it is unregesitered. A client can + * take extra references per outstanding transaction, as is the case with + * the NET DMA client. The release function does a kref_put on the device. + * -ChrisL, DanW */ #include #include +#include #include #include #include @@ -66,6 +69,7 @@ #include #include #include +#include static DEFINE_MUTEX(dma_list_mutex); static LIST_HEAD(dma_device_list); @@ -100,8 +104,19 @@ static ssize_t show_in_use(struct class_device *cd, char *buf) { struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev); + int in_use = 0; - return sprintf(buf, "%d\n", (chan->client ? 1 : 0)); + if (unlikely(chan->slow_ref) && + atomic_read(&chan->refcount.refcount) > 1) + in_use = 1; + else { + if (local_read(&(per_cpu_ptr(chan->local, + get_cpu())->refcount)) > 0) + in_use = 1; + put_cpu(); + } + + return sprintf(buf, "%d\n", in_use); } static struct class_device_attribute dma_class_attrs[] = { @@ -127,43 +142,72 @@ /* --- client and device registration --- */ +#define dma_chan_satisfies_mask(chan, mask) \ + __dma_chan_satisfies_mask((chan), &(mask)) +static int +__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want) +{ + dma_cap_mask_t has; + + bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits, + DMA_TX_TYPE_END); + return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); +} + /** - * dma_client_chan_alloc - try to allocate a channel to a client + * dma_client_chan_alloc - try to allocate channels to a client * @client: &dma_client * * Called with dma_list_mutex held. */ -static struct dma_chan *dma_client_chan_alloc(struct dma_client *client) +static void dma_client_chan_alloc(struct dma_client *client) { struct dma_device *device; struct dma_chan *chan; - unsigned long flags; int desc; /* allocated descriptor count */ + enum dma_state_client ack; - /* Find a channel, any DMA engine will do */ - list_for_each_entry(device, &dma_device_list, global_node) { + /* Find a channel */ + list_for_each_entry(device, &dma_device_list, global_node) list_for_each_entry(chan, &device->channels, device_node) { - if (chan->client) + if (!dma_chan_satisfies_mask(chan, client->cap_mask)) continue; desc = chan->device->device_alloc_chan_resources(chan); if (desc >= 0) { + ack = client->event_callback(client, + chan, + DMA_RESOURCE_AVAILABLE); + + /* we are done once this client rejects + * an available resource + */ + if (ack == DMA_ACK) { + dma_chan_get(chan); kref_get(&device->refcount); - kref_init(&chan->refcount); - chan->slow_ref = 0; - INIT_RCU_HEAD(&chan->rcu); - chan->client = client; - spin_lock_irqsave(&client->lock, flags); - list_add_tail_rcu(&chan->client_node, - &client->channels); - spin_unlock_irqrestore(&client->lock, flags); - return chan; + } else if (ack == DMA_NAK) + return; } } +} + +enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) +{ + enum dma_status status; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); + + dma_async_issue_pending(chan); + do { + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + printk(KERN_ERR "dma_sync_wait_timeout!\n"); + return DMA_ERROR; } + } while (status == DMA_IN_PROGRESS); - return NULL; + return status; } +EXPORT_SYMBOL(dma_sync_wait); /** * dma_chan_cleanup - release a DMA channel's resources @@ -173,7 +217,6 @@ { struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); chan->device->device_free_chan_resources(chan); - chan->client = NULL; kref_put(&chan->device->refcount, dma_async_device_cleanup); } EXPORT_SYMBOL(dma_chan_cleanup); @@ -189,7 +232,7 @@ kref_put(&chan->refcount, dma_chan_cleanup); } -static void dma_client_chan_free(struct dma_chan *chan) +static void dma_chan_release(struct dma_chan *chan) { atomic_add(0x7FFFFFFF, &chan->refcount.refcount); chan->slow_ref = 1; @@ -197,70 +240,57 @@ } /** - * dma_chans_rebalance - reallocate channels to clients - * - * When the number of DMA channel in the system changes, - * channels need to be rebalanced among clients. + * dma_chans_notify_available - broadcast available channels to the clients */ -static void dma_chans_rebalance(void) +static void dma_clients_notify_available(void) { struct dma_client *client; - struct dma_chan *chan; - unsigned long flags; mutex_lock(&dma_list_mutex); - list_for_each_entry(client, &dma_client_list, global_node) { - while (client->chans_desired > client->chan_count) { - chan = dma_client_chan_alloc(client); - if (!chan) - break; - client->chan_count++; - client->event_callback(client, - chan, - DMA_RESOURCE_ADDED); - } - while (client->chans_desired < client->chan_count) { - spin_lock_irqsave(&client->lock, flags); - chan = list_entry(client->channels.next, - struct dma_chan, - client_node); - list_del_rcu(&chan->client_node); - spin_unlock_irqrestore(&client->lock, flags); - client->chan_count--; - client->event_callback(client, - chan, - DMA_RESOURCE_REMOVED); - dma_client_chan_free(chan); - } - } + list_for_each_entry(client, &dma_client_list, global_node) + dma_client_chan_alloc(client); mutex_unlock(&dma_list_mutex); } /** - * dma_async_client_register - allocate and register a &dma_client - * @event_callback: callback for notification of channel addition/removal + * dma_chans_notify_available - tell the clients that a channel is going away + * @chan: channel on its way out */ -struct dma_client *dma_async_client_register(dma_event_callback event_callback) +static void dma_clients_notify_removed(struct dma_chan *chan) { struct dma_client *client; + enum dma_state_client ack; - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return NULL; + mutex_lock(&dma_list_mutex); - INIT_LIST_HEAD(&client->channels); - spin_lock_init(&client->lock); - client->chans_desired = 0; - client->chan_count = 0; - client->event_callback = event_callback; + list_for_each_entry(client, &dma_client_list, global_node) { + ack = client->event_callback(client, chan, + DMA_RESOURCE_REMOVED); + /* client was holding resources for this channel so + * free it + */ + if (ack == DMA_ACK) { + dma_chan_put(chan); + kref_put(&chan->device->refcount, + dma_async_device_cleanup); + } + } + + mutex_unlock(&dma_list_mutex); +} + +/** + * dma_async_client_register - register a &dma_client + * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask' + */ +void dma_async_client_register(struct dma_client *client) +{ mutex_lock(&dma_list_mutex); list_add_tail(&client->global_node, &dma_client_list); mutex_unlock(&dma_list_mutex); - - return client; } EXPORT_SYMBOL(dma_async_client_register); @@ -272,40 +302,42 @@ */ void dma_async_client_unregister(struct dma_client *client) { + struct dma_device *device; struct dma_chan *chan; + enum dma_state_client ack; if (!client) return; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &client->channels, client_node) - dma_client_chan_free(chan); - rcu_read_unlock(); - mutex_lock(&dma_list_mutex); + /* free all channels the client is holding */ + list_for_each_entry(device, &dma_device_list, global_node) + list_for_each_entry(chan, &device->channels, device_node) { + ack = client->event_callback(client, chan, + DMA_RESOURCE_REMOVED); + + if (ack == DMA_ACK) { + dma_chan_put(chan); + kref_put(&chan->device->refcount, + dma_async_device_cleanup); + } + } + list_del(&client->global_node); mutex_unlock(&dma_list_mutex); - - kfree(client); - dma_chans_rebalance(); } EXPORT_SYMBOL(dma_async_client_unregister); /** - * dma_async_client_chan_request - request DMA channels - * @client: &dma_client - * @number: count of DMA channels requested - * - * Clients call dma_async_client_chan_request() to specify how many - * DMA channels they need, 0 to free all currently allocated. - * The resulting allocations/frees are indicated to the client via the - * event callback. + * dma_async_client_chan_request - send all available channels to the + * client that satisfy the capability mask + * @client - requester */ -void dma_async_client_chan_request(struct dma_client *client, - unsigned int number) +void dma_async_client_chan_request(struct dma_client *client) { - client->chans_desired = number; - dma_chans_rebalance(); + mutex_lock(&dma_list_mutex); + dma_client_chan_alloc(client); + mutex_unlock(&dma_list_mutex); } EXPORT_SYMBOL(dma_async_client_chan_request); @@ -322,6 +354,25 @@ if (!device) return -ENODEV; + /* validate device routines */ + BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) && + !device->device_prep_dma_memcpy); + BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && + !device->device_prep_dma_xor); + BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && + !device->device_prep_dma_zero_sum); + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && + !device->device_prep_dma_memset); + BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && + !device->device_prep_dma_interrupt); + + BUG_ON(!device->device_alloc_chan_resources); + BUG_ON(!device->device_free_chan_resources); + BUG_ON(!device->device_dependency_added); + BUG_ON(!device->device_is_tx_complete); + BUG_ON(!device->device_issue_pending); + BUG_ON(!device->dev); + init_completion(&device->done); kref_init(&device->refcount); device->dev_id = id++; @@ -339,6 +390,9 @@ device->dev_id, chan->chan_id); kref_get(&device->refcount); + kref_init(&chan->refcount); + chan->slow_ref = 0; + INIT_RCU_HEAD(&chan->rcu); class_device_register(&chan->class_dev); } @@ -346,7 +400,7 @@ list_add_tail(&device->global_node, &dma_device_list); mutex_unlock(&dma_list_mutex); - dma_chans_rebalance(); + dma_clients_notify_available(); return 0; } @@ -371,32 +425,165 @@ void dma_async_device_unregister(struct dma_device *device) { struct dma_chan *chan; - unsigned long flags; mutex_lock(&dma_list_mutex); list_del(&device->global_node); mutex_unlock(&dma_list_mutex); list_for_each_entry(chan, &device->channels, device_node) { - if (chan->client) { - spin_lock_irqsave(&chan->client->lock, flags); - list_del(&chan->client_node); - chan->client->chan_count--; - spin_unlock_irqrestore(&chan->client->lock, flags); - chan->client->event_callback(chan->client, - chan, - DMA_RESOURCE_REMOVED); - dma_client_chan_free(chan); - } + dma_clients_notify_removed(chan); class_device_unregister(&chan->class_dev); + dma_chan_release(chan); } - dma_chans_rebalance(); kref_put(&device->refcount, dma_async_device_cleanup); wait_for_completion(&device->done); } EXPORT_SYMBOL(dma_async_device_unregister); +/** + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses + * @chan: DMA channel to offload copy to + * @dest: destination address (virtual) + * @src: source address (virtual) + * @len: length + * + * Both @dest and @src must be mappable to a bus address according to the + * DMA mapping API rules for streaming mappings. + * Both @dest and @src must stay memory resident (kernel memory or locked + * user space pages). + */ +dma_cookie_t +dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, + void *src, size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t addr; + dma_cookie_t cookie; + int cpu; + + tx = dev->device_prep_dma_memcpy(chan, len, 0); + if (!tx) + return -ENOMEM; + + tx->ack = 1; + tx->callback = NULL; + addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); + tx->tx_set_src(addr, tx, 0); + addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); + tx->tx_set_dest(addr, tx, 0); + cookie = tx->tx_submit(tx); + + cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy from address to page + * @chan: DMA channel to offload copy to + * @page: destination page + * @offset: offset in page to copy to + * @kdata: source address (virtual) + * @len: length + * + * Both @page/@offset and @kdata must be mappable to a bus address according + * to the DMA mapping API rules for streaming mappings. + * Both @page/@offset and @kdata must stay memory resident (kernel memory or + * locked user space pages) + */ +dma_cookie_t +dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, + unsigned int offset, void *kdata, size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t addr; + dma_cookie_t cookie; + int cpu; + + tx = dev->device_prep_dma_memcpy(chan, len, 0); + if (!tx) + return -ENOMEM; + + tx->ack = 1; + tx->callback = NULL; + addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); + tx->tx_set_src(addr, tx, 0); + addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); + tx->tx_set_dest(addr, tx, 0); + cookie = tx->tx_submit(tx); + + cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); + +/** + * dma_async_memcpy_pg_to_pg - offloaded copy from page to page + * @chan: DMA channel to offload copy to + * @dest_pg: destination page + * @dest_off: offset in page to copy to + * @src_pg: source page + * @src_off: offset in page to copy from + * @len: length + * + * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus + * address according to the DMA mapping API rules for streaming mappings. + * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident + * (kernel memory or locked user space pages). + */ +dma_cookie_t +dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, + unsigned int dest_off, struct page *src_pg, unsigned int src_off, + size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t addr; + dma_cookie_t cookie; + int cpu; + + tx = dev->device_prep_dma_memcpy(chan, len, 0); + if (!tx) + return -ENOMEM; + + tx->ack = 1; + tx->callback = NULL; + addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); + tx->tx_set_src(addr, tx, 0); + addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE); + tx->tx_set_dest(addr, tx, 0); + cookie = tx->tx_submit(tx); + + cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); + +void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, + struct dma_chan *chan) +{ + tx->chan = chan; + spin_lock_init(&tx->lock); + INIT_LIST_HEAD(&tx->depend_node); + INIT_LIST_HEAD(&tx->depend_list); +} +EXPORT_SYMBOL(dma_async_tx_descriptor_init); + static int __init dma_bus_init(void) { mutex_init(&dma_list_mutex); diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.c linux-2.6.22-591/drivers/dma/ioatdma.c --- linux-2.6.22-570/drivers/dma/ioatdma.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/dma/ioatdma.c 2007-12-21 15:36:11.000000000 -0500 @@ -39,6 +39,7 @@ #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) #define to_ioat_device(dev) container_of(dev, struct ioat_device, common) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) /* internal functions */ static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -71,13 +72,76 @@ INIT_LIST_HEAD(&ioat_chan->used_desc); /* This should be made common somewhere in dmaengine.c */ ioat_chan->common.device = &device->common; - ioat_chan->common.client = NULL; list_add_tail(&ioat_chan->common.device_node, &device->common.channels); } return device->common.chancnt; } +static void +ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) +{ + struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + + pci_unmap_addr_set(desc, src, addr); + + list_for_each_entry(iter, &desc->group_list, node) { + iter->hw->src_addr = addr; + addr += ioat_chan->xfercap; + } + +} + +static void +ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) +{ + struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + + pci_unmap_addr_set(desc, dst, addr); + + list_for_each_entry(iter, &desc->group_list, node) { + iter->hw->dst_addr = addr; + addr += ioat_chan->xfercap; + } +} + +static dma_cookie_t +ioat_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); + struct ioat_desc_sw *group_start = list_entry(desc->group_list.next, + struct ioat_desc_sw, node); + int append = 0; + dma_cookie_t cookie; + + spin_lock_bh(&ioat_chan->desc_lock); + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = desc->async_tx.cookie = cookie; + + /* write address into NextDescriptor field of last desc in chain */ + to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = group_start->phys; + list_splice_init(&desc->group_list, ioat_chan->used_desc.prev); + + ioat_chan->pending += desc->group_count; + if (ioat_chan->pending >= 4) { + append = 1; + ioat_chan->pending = 0; + } + spin_unlock_bh(&ioat_chan->desc_lock); + + if (append) + ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, + IOAT_CHANCMD_APPEND); + return cookie; +} + static struct ioat_desc_sw *ioat_dma_alloc_descriptor( struct ioat_dma_chan *ioat_chan, gfp_t flags) @@ -99,6 +163,11 @@ } memset(desc, 0, sizeof(*desc)); + dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); + desc_sw->async_tx.tx_set_src = ioat_set_src; + desc_sw->async_tx.tx_set_dest = ioat_set_dest; + desc_sw->async_tx.tx_submit = ioat_tx_submit; + INIT_LIST_HEAD(&desc_sw->group_list); desc_sw->hw = desc; desc_sw->phys = phys; @@ -215,45 +284,25 @@ ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); } -/** - * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction - * @ioat_chan: IOAT DMA channel handle - * @dest: DMA destination address - * @src: DMA source address - * @len: transaction length in bytes - */ - -static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan, - dma_addr_t dest, - dma_addr_t src, - size_t len) -{ - struct ioat_desc_sw *first; - struct ioat_desc_sw *prev; - struct ioat_desc_sw *new; - dma_cookie_t cookie; +static struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *first, *prev, *new; LIST_HEAD(new_chain); u32 copy; size_t orig_len; - dma_addr_t orig_src, orig_dst; - unsigned int desc_count = 0; - unsigned int append = 0; - - if (!ioat_chan || !dest || !src) - return -EFAULT; + int desc_count = 0; if (!len) - return ioat_chan->common.cookie; + return NULL; orig_len = len; - orig_src = src; - orig_dst = dest; first = NULL; prev = NULL; spin_lock_bh(&ioat_chan->desc_lock); - while (len) { if (!list_empty(&ioat_chan->free_desc)) { new = to_ioat_desc(ioat_chan->free_desc.next); @@ -270,9 +319,8 @@ new->hw->size = copy; new->hw->ctl = 0; - new->hw->src_addr = src; - new->hw->dst_addr = dest; - new->cookie = 0; + new->async_tx.cookie = 0; + new->async_tx.ack = 1; /* chain together the physical address list for the HW */ if (!first) @@ -281,130 +329,26 @@ prev->hw->next = (u64) new->phys; prev = new; - len -= copy; - dest += copy; - src += copy; - list_add_tail(&new->node, &new_chain); desc_count++; } - new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - new->hw->next = 0; - /* cookie incr and addition to used_list must be atomic */ + list_splice(&new_chain, &new->group_list); - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->cookie = cookie; + new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + new->hw->next = 0; + new->group_count = desc_count; + new->async_tx.ack = 0; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; - pci_unmap_addr_set(new, src, orig_src); - pci_unmap_addr_set(new, dst, orig_dst); pci_unmap_len_set(new, src_len, orig_len); pci_unmap_len_set(new, dst_len, orig_len); - - /* write address into NextDescriptor field of last desc in chain */ - to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys; - list_splice_init(&new_chain, ioat_chan->used_desc.prev); - - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= 20) { - append = 1; - ioat_chan->pending = 0; - } - spin_unlock_bh(&ioat_chan->desc_lock); - if (append) - ioatdma_chan_write8(ioat_chan, - IOAT_CHANCMD_OFFSET, - IOAT_CHANCMD_APPEND); - return cookie; -} - -/** - * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs - * @chan: IOAT DMA channel handle - * @dest: DMA destination address - * @src: DMA source address - * @len: transaction length in bytes - */ - -static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan, - void *dest, - void *src, - size_t len) -{ - dma_addr_t dest_addr; - dma_addr_t src_addr; - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - dest_addr = pci_map_single(ioat_chan->device->pdev, - dest, len, PCI_DMA_FROMDEVICE); - src_addr = pci_map_single(ioat_chan->device->pdev, - src, len, PCI_DMA_TODEVICE); - - return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); -} - -/** - * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page - * @chan: IOAT DMA channel handle - * @page: pointer to the page to copy to - * @offset: offset into that page - * @src: DMA source address - * @len: transaction length in bytes - */ - -static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan, - struct page *page, - unsigned int offset, - void *src, - size_t len) -{ - dma_addr_t dest_addr; - dma_addr_t src_addr; - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - dest_addr = pci_map_page(ioat_chan->device->pdev, - page, offset, len, PCI_DMA_FROMDEVICE); - src_addr = pci_map_single(ioat_chan->device->pdev, - src, len, PCI_DMA_TODEVICE); - - return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); + return new ? &new->async_tx : NULL; } -/** - * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages - * @chan: IOAT DMA channel handle - * @dest_pg: pointer to the page to copy to - * @dest_off: offset into that page - * @src_pg: pointer to the page to copy from - * @src_off: offset into that page - * @len: transaction length in bytes. This is guaranteed not to make a copy - * across a page boundary. - */ - -static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan, - struct page *dest_pg, - unsigned int dest_off, - struct page *src_pg, - unsigned int src_off, - size_t len) -{ - dma_addr_t dest_addr; - dma_addr_t src_addr; - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - dest_addr = pci_map_page(ioat_chan->device->pdev, - dest_pg, dest_off, len, PCI_DMA_FROMDEVICE); - src_addr = pci_map_page(ioat_chan->device->pdev, - src_pg, src_off, len, PCI_DMA_TODEVICE); - - return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); -} /** * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw @@ -467,8 +411,8 @@ * exceeding xfercap, perhaps. If so, only the last one will * have a cookie, and require unmapping. */ - if (desc->cookie) { - cookie = desc->cookie; + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; /* yes we are unmapping both _page and _single alloc'd regions with unmap_page. Is this *really* that bad? @@ -484,13 +428,18 @@ } if (desc->phys != phys_complete) { - /* a completed entry, but not the last, so cleanup */ + /* a completed entry, but not the last, so cleanup + * if the client is done with the descriptor + */ + if (desc->async_tx.ack) { list_del(&desc->node); list_add_tail(&desc->node, &chan->free_desc); + } else + desc->async_tx.cookie = 0; } else { /* last used desc. Do not remove, so we can append from it, but don't look at it next time, either */ - desc->cookie = 0; + desc->async_tx.cookie = 0; /* TODO check status bits? */ break; @@ -506,6 +455,17 @@ spin_unlock(&chan->cleanup_lock); } +static void ioat_dma_dependency_added(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + spin_lock_bh(&ioat_chan->desc_lock); + if (ioat_chan->pending == 0) { + spin_unlock_bh(&ioat_chan->desc_lock); + ioat_dma_memcpy_cleanup(ioat_chan); + } else + spin_unlock_bh(&ioat_chan->desc_lock); +} + /** * ioat_dma_is_complete - poll the status of a IOAT DMA transaction * @chan: IOAT DMA channel handle @@ -607,6 +567,7 @@ desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; desc->hw->next = 0; + desc->async_tx.ack = 1; list_add_tail(&desc->node, &ioat_chan->used_desc); spin_unlock_bh(&ioat_chan->desc_lock); @@ -633,6 +594,8 @@ u8 *src; u8 *dest; struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t addr; dma_cookie_t cookie; int err = 0; @@ -658,7 +621,15 @@ goto out; } - cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE); + tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0); + async_tx_ack(tx); + addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, + DMA_TO_DEVICE); + ioat_set_src(addr, tx, 0); + addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, + DMA_FROM_DEVICE); + ioat_set_dest(addr, tx, 0); + cookie = ioat_tx_submit(tx); ioat_dma_memcpy_issue_pending(dma_chan); msleep(1); @@ -754,13 +725,14 @@ INIT_LIST_HEAD(&device->common.channels); enumerate_dma_channels(device); + dma_cap_set(DMA_MEMCPY, device->common.cap_mask); device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; device->common.device_free_chan_resources = ioat_dma_free_chan_resources; - device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; - device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; - device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; - device->common.device_memcpy_complete = ioat_dma_is_complete; - device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; + device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy; + device->common.device_is_tx_complete = ioat_dma_is_complete; + device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; + device->common.device_dependency_added = ioat_dma_dependency_added; + device->common.dev = &pdev->dev; printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n", device->common.chancnt); diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.h linux-2.6.22-591/drivers/dma/ioatdma.h --- linux-2.6.22-570/drivers/dma/ioatdma.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/dma/ioatdma.h 2007-12-21 15:36:11.000000000 -0500 @@ -30,9 +30,6 @@ #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 -extern struct list_head dma_device_list; -extern struct list_head dma_client_list; - /** * struct ioat_device - internal representation of a IOAT device * @pdev: PCI-Express device @@ -105,15 +102,20 @@ /** * struct ioat_desc_sw - wrapper around hardware descriptor * @hw: hardware DMA descriptor + * @async_tx: * @node: + * @group_list: + * @group_cnt: * @cookie: * @phys: */ struct ioat_desc_sw { struct ioat_dma_descriptor *hw; + struct dma_async_tx_descriptor async_tx; struct list_head node; - dma_cookie_t cookie; + struct list_head group_list; + int group_count; dma_addr_t phys; DECLARE_PCI_UNMAP_ADDR(src) DECLARE_PCI_UNMAP_LEN(src_len) @@ -122,4 +124,3 @@ }; #endif /* IOATDMA_H */ - diff -Nurb linux-2.6.22-570/drivers/dma/iop-adma.c linux-2.6.22-591/drivers/dma/iop-adma.c --- linux-2.6.22-570/drivers/dma/iop-adma.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/dma/iop-adma.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,1465 @@ +/* + * offload engine driver for the Intel Xscale series of i/o processors + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) +#define to_iop_adma_device(dev) \ + container_of(dev, struct iop_adma_device, common) +#define tx_to_iop_adma_slot(tx) \ + container_of(tx, struct iop_adma_desc_slot, async_tx) + +/** + * iop_adma_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &iop_chan->lock while calling this function + */ +static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) +{ + int stride = slot->slots_per_op; + + while (stride--) { + slot->slots_per_op = 0; + slot = list_entry(slot->slot_node.next, + struct iop_adma_desc_slot, + slot_node); + } +} + +static dma_cookie_t +iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *iop_chan, dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + spin_lock_bh(&desc->async_tx.lock); + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + */ + if (desc->group_head && desc->unmap_len) { + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = + &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + u32 src_cnt = unmap->unmap_src_cnt; + dma_addr_t addr = iop_desc_get_dest_addr(unmap, + iop_chan); + + dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, + iop_chan, + src_cnt); + dma_unmap_page(dev, addr, len, + DMA_TO_DEVICE); + } + desc->group_head = NULL; + } + } + + /* run dependent operations */ + async_tx_run_dependencies(&desc->async_tx); + spin_unlock_bh(&desc->async_tx.lock); + + return cookie; +} + +static int +iop_adma_clean_slot(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *iop_chan) +{ + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!desc->async_tx.ack) + return 0; + + /* leave the last descriptor in the chain + * so we can append to it + */ + if (desc->chain_node.next == &iop_chan->chain) + return 1; + + dev_dbg(iop_chan->device->common.dev, + "\tfree slot: %d slots_per_op: %d\n", + desc->idx, desc->slots_per_op); + + list_del(&desc->chain_node); + iop_adma_free_slots(desc); + + return 0; +} + +static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; + dma_cookie_t cookie = 0; + u32 current_desc = iop_chan_get_current_descriptor(iop_chan); + int busy = iop_chan_is_busy(iop_chan); + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); + /* free completed slots from the chain starting with + * the oldest descriptor + */ + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, + chain_node) { + pr_debug("\tcookie: %d slot: %d busy: %d " + "this_desc: %#x next_desc: %#x ack: %d\n", + iter->async_tx.cookie, iter->idx, busy, iter->phys, + iop_desc_get_next_desc(iter), + iter->async_tx.ack); + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy, or if it appears that the current descriptor + * needs to be re-read (i.e. has been appended to) + */ + if (iter->phys == current_desc) { + BUG_ON(seen_current++); + if (busy || iop_desc_get_next_desc(iter)) + break; + } + + /* detect the start of a group transaction */ + if (!slot_cnt && !slots_per_op) { + slot_cnt = iter->slot_cnt; + slots_per_op = iter->slots_per_op; + if (slot_cnt <= slots_per_op) { + slot_cnt = 0; + slots_per_op = 0; + } + } + + if (slot_cnt) { + pr_debug("\tgroup++\n"); + if (!grp_start) + grp_start = iter; + slot_cnt -= slots_per_op; + } + + /* all the members of a group are complete */ + if (slots_per_op != 0 && slot_cnt == 0) { + struct iop_adma_desc_slot *grp_iter, *_grp_iter; + int end_of_chain = 0; + pr_debug("\tgroup end\n"); + + /* collect the total results */ + if (grp_start->xor_check_result) { + u32 zero_sum_result = 0; + slot_cnt = grp_start->slot_cnt; + grp_iter = grp_start; + + list_for_each_entry_from(grp_iter, + &iop_chan->chain, chain_node) { + zero_sum_result |= + iop_desc_get_zero_result(grp_iter); + pr_debug("\titer%d result: %d\n", + grp_iter->idx, zero_sum_result); + slot_cnt -= slots_per_op; + if (slot_cnt == 0) + break; + } + pr_debug("\tgrp_start->xor_check_result: %p\n", + grp_start->xor_check_result); + *grp_start->xor_check_result = zero_sum_result; + } + + /* clean up the group */ + slot_cnt = grp_start->slot_cnt; + grp_iter = grp_start; + list_for_each_entry_safe_from(grp_iter, _grp_iter, + &iop_chan->chain, chain_node) { + cookie = iop_adma_run_tx_complete_actions( + grp_iter, iop_chan, cookie); + + slot_cnt -= slots_per_op; + end_of_chain = iop_adma_clean_slot(grp_iter, + iop_chan); + + if (slot_cnt == 0 || end_of_chain) + break; + } + + /* the group should be complete at this point */ + BUG_ON(slot_cnt); + + slots_per_op = 0; + grp_start = NULL; + if (end_of_chain) + break; + else + continue; + } else if (slots_per_op) /* wait for group completion */ + continue; + + /* write back zero sum results (single descriptor case) */ + if (iter->xor_check_result && iter->async_tx.cookie) + *iter->xor_check_result = + iop_desc_get_zero_result(iter); + + cookie = iop_adma_run_tx_complete_actions( + iter, iop_chan, cookie); + + if (iop_adma_clean_slot(iter, iop_chan)) + break; + } + + BUG_ON(!seen_current); + + iop_chan_idle(busy, iop_chan); + + if (cookie > 0) { + iop_chan->completed_cookie = cookie; + pr_debug("\tcompleted cookie %d\n", cookie); + } +} + +static void +iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) +{ + spin_lock_bh(&iop_chan->lock); + __iop_adma_slot_cleanup(iop_chan); + spin_unlock_bh(&iop_chan->lock); +} + +static void iop_adma_tasklet(unsigned long data) +{ + struct iop_adma_chan *chan = (struct iop_adma_chan *) data; + __iop_adma_slot_cleanup(chan); +} + +static struct iop_adma_desc_slot * +iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, + int slots_per_op) +{ + struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; + struct list_head chain = LIST_HEAD_INIT(chain); + int slots_found, retry = 0; + + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = iop_chan->last_used; + else + iter = list_entry(&iop_chan->all_slots, + struct iop_adma_desc_slot, + slot_node); + + list_for_each_entry_safe_continue( + iter, _iter, &iop_chan->all_slots, slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + /* give up after finding the first busy slot + * on the second pass through the list + */ + if (retry) + break; + + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) { + if (iop_desc_is_aligned(iter, slots_per_op)) + alloc_start = iter; + else { + slots_found = 0; + continue; + } + } + + if (slots_found == num_slots) { + struct iop_adma_desc_slot *alloc_tail = NULL; + struct iop_adma_desc_slot *last_used = NULL; + iter = alloc_start; + while (num_slots) { + int i; + dev_dbg(iop_chan->device->common.dev, + "allocated slot: %d " + "(desc %p phys: %#x) slots_per_op %d\n", + iter->idx, iter->hw_desc, iter->phys, + slots_per_op); + + /* pre-ack all but the last descriptor */ + if (num_slots != slots_per_op) + iter->async_tx.ack = 1; + else + iter->async_tx.ack = 0; + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct iop_adma_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->group_list); + iop_chan->last_used = last_used; + iop_desc_clear_next_desc(alloc_start); + iop_desc_clear_next_desc(alloc_tail); + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&iop_chan->irq_tasklet); + + return NULL; +} + +static dma_cookie_t +iop_desc_assign_cookie(struct iop_adma_chan *iop_chan, + struct iop_adma_desc_slot *desc) +{ + dma_cookie_t cookie = iop_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + iop_chan->common.cookie = desc->async_tx.cookie = cookie; + return cookie; +} + +static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) +{ + dev_dbg(iop_chan->device->common.dev, "pending: %d\n", + iop_chan->pending); + + if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { + iop_chan->pending = 0; + iop_chan_append(iop_chan); + } +} + +static dma_cookie_t +iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); + struct iop_adma_desc_slot *grp_start, *old_chain_tail; + int slot_cnt; + int slots_per_op; + dma_cookie_t cookie; + + grp_start = sw_desc->group_head; + slot_cnt = grp_start->slot_cnt; + slots_per_op = grp_start->slots_per_op; + + spin_lock_bh(&iop_chan->lock); + cookie = iop_desc_assign_cookie(iop_chan, sw_desc); + + old_chain_tail = list_entry(iop_chan->chain.prev, + struct iop_adma_desc_slot, chain_node); + list_splice_init(&sw_desc->group_list, &old_chain_tail->chain_node); + + /* fix up the hardware chain */ + iop_desc_set_next_desc(old_chain_tail, grp_start->phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* increment the pending count by the number of slots + * memcpy operations have a 1:1 (slot:operation) relation + * other operations are heavier and will pop the threshold + * more often. + */ + iop_chan->pending += slot_cnt; + iop_adma_check_threshold(iop_chan); + spin_unlock_bh(&iop_chan->lock); + + dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", + __FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx); + + return cookie; +} + +static void +iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, + int index) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); + + /* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */ + iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr); +} + +static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); +static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); + +/* returns the number of allocated descriptors */ +static int iop_adma_alloc_chan_resources(struct dma_chan *chan) +{ + char *hw_desc; + int idx; + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *slot = NULL; + int init = iop_chan->slots_allocated ? 0 : 1; + struct iop_adma_platform_data *plat_data = + iop_chan->device->pdev->dev.platform_data; + int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; + + /* Allocate descriptor slots */ + do { + idx = iop_chan->slots_allocated; + if (idx == num_descs_in_pool) + break; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "IOP ADMA Channel only initialized" + " %d descriptor slots", idx); + break; + } + hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = iop_adma_tx_submit; + slot->async_tx.tx_set_dest = iop_adma_set_dest; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->group_list); + hw_desc = (char *) iop_chan->device->dma_desc_pool; + slot->phys = (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; + slot->idx = idx; + + spin_lock_bh(&iop_chan->lock); + iop_chan->slots_allocated++; + list_add_tail(&slot->slot_node, &iop_chan->all_slots); + spin_unlock_bh(&iop_chan->lock); + } while (iop_chan->slots_allocated < num_descs_in_pool); + + if (idx && !iop_chan->last_used) + iop_chan->last_used = list_entry(iop_chan->all_slots.next, + struct iop_adma_desc_slot, + slot_node); + + dev_dbg(iop_chan->device->common.dev, + "allocated %d descriptor slots last_used: %p\n", + iop_chan->slots_allocated, iop_chan->last_used); + + /* initialize the channel and the chain with a null operation */ + if (init) { + if (dma_has_cap(DMA_MEMCPY, + iop_chan->device->common.cap_mask)) + iop_chan_start_null_memcpy(iop_chan); + else if (dma_has_cap(DMA_XOR, + iop_chan->device->common.cap_mask)) + iop_chan_start_null_xor(iop_chan); + else + BUG(); + } + + return (idx > 0) ? idx : -ENOMEM; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_interrupt(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_interrupt(grp_start, iop_chan); + grp_start->unmap_len = 0; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void +iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, + int index) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_desc_slot *grp_start = sw_desc->group_head; + + iop_desc_set_memcpy_src_addr(grp_start, addr); +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", + __FUNCTION__, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_memcpy(grp_start, int_en); + iop_desc_set_byte_count(grp_start, iop_chan, len); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len, + int int_en) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", + __FUNCTION__, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_memset(grp_start, int_en); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_block_fill_val(grp_start, value); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void +iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, + int index) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_desc_slot *grp_start = sw_desc->group_head; + + iop_desc_set_xor_src_addr(grp_start, index, addr); +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len, + int int_en) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u int_en: %d\n", + __FUNCTION__, src_cnt, len, int_en); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_xor(grp_start, src_cnt, int_en); + iop_desc_set_byte_count(grp_start, iop_chan, len); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void +iop_adma_xor_zero_sum_set_src(dma_addr_t addr, + struct dma_async_tx_descriptor *tx, + int index) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_desc_slot *grp_start = sw_desc->group_head; + + iop_desc_set_zero_sum_src_addr(grp_start, index, addr); +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt, + size_t len, u32 *result, int int_en) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __FUNCTION__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_zero_sum(grp_start, src_cnt, int_en); + iop_desc_set_zero_sum_byte_count(grp_start, len); + grp_start->xor_check_result = result; + pr_debug("\t%s: grp_start->xor_check_result: %p\n", + __FUNCTION__, grp_start->xor_check_result); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void iop_adma_dependency_added(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + tasklet_schedule(&iop_chan->irq_tasklet); +} + +static void iop_adma_free_chan_resources(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *iter, *_iter; + int in_use_descs = 0; + + iop_adma_slot_cleanup(iop_chan); + + spin_lock_bh(&iop_chan->lock); + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe_reverse( + iter, _iter, &iop_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + iop_chan->slots_allocated--; + } + iop_chan->last_used = NULL; + + dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", + __FUNCTION__, iop_chan->slots_allocated); + spin_unlock_bh(&iop_chan->lock); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", + in_use_descs - 1); +} + +/** + * iop_adma_is_complete - poll the status of an ADMA transaction + * @chan: ADMA channel handle + * @cookie: ADMA transaction identifier + */ +static enum dma_status iop_adma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + last_used = chan->cookie; + last_complete = iop_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) + return ret; + + iop_adma_slot_cleanup(iop_chan); + + last_used = chan->cookie; + last_complete = iop_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static irqreturn_t iop_adma_eot_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__); + + tasklet_schedule(&chan->irq_tasklet); + + iop_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +static irqreturn_t iop_adma_eoc_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__); + + tasklet_schedule(&chan->irq_tasklet); + + iop_adma_device_clear_eoc_status(chan); + + return IRQ_HANDLED; +} + +static irqreturn_t iop_adma_err_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + unsigned long status = iop_chan_get_status(chan); + + dev_printk(KERN_ERR, chan->device->common.dev, + "error ( %s%s%s%s%s%s%s)\n", + iop_is_err_int_parity(status, chan) ? "int_parity " : "", + iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", + iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", + iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", + iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", + iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", + iop_is_err_split_tx(status, chan) ? "split_tx " : ""); + + iop_adma_device_clear_err_status(chan); + + BUG(); + + return IRQ_HANDLED; +} + +static void iop_adma_issue_pending(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + + if (iop_chan->pending) { + iop_chan->pending = 0; + iop_chan_append(iop_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ +#define IOP_ADMA_TEST_SIZE 2000 + +static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) +{ + int i; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + int err = 0; + struct iop_adma_chan *iop_chan; + + dev_dbg(device->common.dev, "%s\n", __FUNCTION__); + + src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + memset(dest, 0, IOP_ADMA_TEST_SIZE); + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1); + dest_dma = dma_map_single(dma_chan->device->dev, dest, + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); + iop_adma_set_dest(dest_dma, tx, 0); + src_dma = dma_map_single(dma_chan->device->dev, src, + IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); + iop_adma_memcpy_set_src(src_dma, tx, 0); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(1); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + iop_chan = to_iop_adma_chan(dma_chan); + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ +static int __devinit +iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; + struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 zero_sum_result; + int err = 0; + struct iop_adma_chan *iop_chan; + + dev_dbg(device->common.dev, "%s\n", __FUNCTION__); + + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST, + PAGE_SIZE, 1); + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + iop_adma_set_dest(dest_dma, tx, 0); + + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { + dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0, + PAGE_SIZE, DMA_TO_DEVICE); + iop_adma_xor_set_src(dma_addr, tx, i); + } + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + iop_chan = to_iop_adma_chan(dma_chan); + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_TO_DEVICE); + + /* skip zero sum if the capability is not present */ + if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) + goto free_resources; + + /* zero sum the sources with the destintation page */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + zero_sum_srcs[i] = xor_srcs[i]; + zero_sum_srcs[i] = dest; + + zero_sum_result = 1; + + tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1, + PAGE_SIZE, &zero_sum_result, 1); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) { + dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + iop_adma_xor_zero_sum_set_src(dma_addr, tx, i); + } + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test zero sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test zero sum failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test memset */ + tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1); + dma_addr = dma_map_page(dma_chan->device->dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + iop_adma_set_dest(dma_addr, tx, 0); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test memset timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test memset failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + zero_sum_result = 0; + tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1, + PAGE_SIZE, &zero_sum_result, 1); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) { + dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + iop_adma_xor_zero_sum_set_src(dma_addr, tx, i); + } + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test non-zero sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 1) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test non-zero sum failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + src_idx = IOP_ADMA_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devexit iop_adma_remove(struct platform_device *dev) +{ + struct iop_adma_device *device = platform_get_drvdata(dev); + struct dma_chan *chan, *_chan; + struct iop_adma_chan *iop_chan; + int i; + struct iop_adma_platform_data *plat_data = dev->dev.platform_data; + + dma_async_device_unregister(&device->common); + + for (i = 0; i < 3; i++) { + unsigned int irq; + irq = platform_get_irq(dev, i); + free_irq(irq, device); + } + + dma_free_coherent(&dev->dev, plat_data->pool_size, + device->dma_desc_pool_virt, device->dma_desc_pool); + + do { + struct resource *res; + res = platform_get_resource(dev, IORESOURCE_MEM, 0); + release_mem_region(res->start, res->end - res->start); + } while (0); + + list_for_each_entry_safe(chan, _chan, &device->common.channels, + device_node) { + iop_chan = to_iop_adma_chan(chan); + list_del(&chan->device_node); + kfree(iop_chan); + } + kfree(device); + + return 0; +} + +static int __devinit iop_adma_probe(struct platform_device *pdev) +{ + struct resource *res; + int ret = 0, i; + struct iop_adma_device *adev; + struct iop_adma_chan *iop_chan; + struct dma_device *dma_dev; + struct iop_adma_platform_data *plat_data = pdev->dev.platform_data; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + if (!devm_request_mem_region(&pdev->dev, res->start, + res->end - res->start, pdev->name)) + return -EBUSY; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + dma_dev = &adev->common; + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL)) == NULL) { + ret = -ENOMEM; + goto err_free_adev; + } + + dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n", + __FUNCTION__, adev->dma_desc_pool_virt, + (void *) adev->dma_desc_pool); + + adev->id = plat_data->hw_id; + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = plat_data->cap_mask; + + adev->pdev = pdev; + platform_set_drvdata(pdev, adev); + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; + dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; + dma_dev->device_is_tx_complete = iop_adma_is_complete; + dma_dev->device_issue_pending = iop_adma_issue_pending; + dma_dev->device_dependency_added = iop_adma_dependency_added; + dma_dev->dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = iop_adma_get_max_xor(); + dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; + } + if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) + dma_dev->device_prep_dma_zero_sum = + iop_adma_prep_dma_zero_sum; + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) + dma_dev->device_prep_dma_interrupt = + iop_adma_prep_dma_interrupt; + + iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); + if (!iop_chan) { + ret = -ENOMEM; + goto err_free_dma; + } + iop_chan->device = adev; + + iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, + res->end - res->start); + if (!iop_chan->mmr_base) { + ret = -ENOMEM; + goto err_free_iop_chan; + } + tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long) + iop_chan); + + /* clear errors before enabling interrupts */ + iop_adma_device_clear_err_status(iop_chan); + + for (i = 0; i < 3; i++) { + irq_handler_t handler[] = { iop_adma_eot_handler, + iop_adma_eoc_handler, + iop_adma_err_handler }; + int irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = -ENXIO; + goto err_free_iop_chan; + } else { + ret = devm_request_irq(&pdev->dev, irq, + handler[i], 0, pdev->name, iop_chan); + if (ret) + goto err_free_iop_chan; + } + } + + spin_lock_init(&iop_chan->lock); + init_timer(&iop_chan->cleanup_watchdog); + iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan; + iop_chan->cleanup_watchdog.function = iop_adma_tasklet; + INIT_LIST_HEAD(&iop_chan->chain); + INIT_LIST_HEAD(&iop_chan->all_slots); + INIT_RCU_HEAD(&iop_chan->common.rcu); + iop_chan->common.device = dma_dev; + list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = iop_adma_memcpy_self_test(adev); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_iop_chan; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + ret = iop_adma_xor_zero_sum_self_test(adev); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_iop_chan; + } + + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " + "( %s%s%s%s%s%s%s%s%s%s)\n", + dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", + dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", + dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", + dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", + dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + dma_async_device_register(dma_dev); + goto out; + + err_free_iop_chan: + kfree(iop_chan); + err_free_dma: + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + err_free_adev: + kfree(adev); + out: + return ret; +} + +static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *sw_desc, *grp_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + + list_splice_init(&sw_desc->group_list, &iop_chan->chain); + sw_desc->async_tx.ack = 1; + iop_desc_init_memcpy(grp_start, 0); + iop_desc_set_byte_count(grp_start, iop_chan, 0); + iop_desc_set_dest_addr(grp_start, iop_chan, 0); + iop_desc_set_memcpy_src_addr(grp_start, 0); + + cookie = iop_chan->common.cookie; + cookie++; + if (cookie <= 1) + cookie = 2; + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + iop_chan->completed_cookie = cookie - 1; + iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; + + /* channel should not be busy */ + BUG_ON(iop_chan_is_busy(iop_chan)); + + /* clear any prior error-status bits */ + iop_adma_device_clear_err_status(iop_chan); + + /* disable operation */ + iop_chan_disable(iop_chan); + + /* set the descriptor address */ + iop_chan_set_next_descriptor(iop_chan, sw_desc->phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* run the descriptor */ + iop_chan_enable(iop_chan); + } else + dev_printk(KERN_ERR, iop_chan->device->common.dev, + "failed to allocate null descriptor\n"); + spin_unlock_bh(&iop_chan->lock); +} + +static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *sw_desc, *grp_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + list_splice_init(&sw_desc->group_list, &iop_chan->chain); + sw_desc->async_tx.ack = 1; + iop_desc_init_null_xor(grp_start, 2, 0); + iop_desc_set_byte_count(grp_start, iop_chan, 0); + iop_desc_set_dest_addr(grp_start, iop_chan, 0); + iop_desc_set_xor_src_addr(grp_start, 0, 0); + iop_desc_set_xor_src_addr(grp_start, 1, 0); + + cookie = iop_chan->common.cookie; + cookie++; + if (cookie <= 1) + cookie = 2; + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + iop_chan->completed_cookie = cookie - 1; + iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; + + /* channel should not be busy */ + BUG_ON(iop_chan_is_busy(iop_chan)); + + /* clear any prior error-status bits */ + iop_adma_device_clear_err_status(iop_chan); + + /* disable operation */ + iop_chan_disable(iop_chan); + + /* set the descriptor address */ + iop_chan_set_next_descriptor(iop_chan, sw_desc->phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* run the descriptor */ + iop_chan_enable(iop_chan); + } else + dev_printk(KERN_ERR, iop_chan->device->common.dev, + "failed to allocate null descriptor\n"); + spin_unlock_bh(&iop_chan->lock); +} + +static struct platform_driver iop_adma_driver = { + .probe = iop_adma_probe, + .remove = iop_adma_remove, + .driver = { + .owner = THIS_MODULE, + .name = "iop-adma", + }, +}; + +static int __init iop_adma_init (void) +{ + /* it's currently unsafe to unload this module */ + /* if forced, worst case is that rmmod hangs */ + __unsafe(THIS_MODULE); + + return platform_driver_register(&iop_adma_driver); +} + +static void __exit iop_adma_exit (void) +{ + platform_driver_unregister(&iop_adma_driver); + return; +} + +module_init(iop_adma_init); +module_exit(iop_adma_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("IOP ADMA Engine Driver"); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/drivers/edac/edac_mc.c linux-2.6.22-591/drivers/edac/edac_mc.c --- linux-2.6.22-570/drivers/edac/edac_mc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/edac/edac_mc.c 2007-12-21 15:36:11.000000000 -0500 @@ -1906,6 +1906,7 @@ static int edac_kernel_thread(void *arg) { + set_freezable(); while (!kthread_should_stop()) { do_edac_check(); diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.c linux-2.6.22-591/drivers/firmware/dcdbas.c --- linux-2.6.22-570/drivers/firmware/dcdbas.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/firmware/dcdbas.c 2007-12-21 15:36:11.000000000 -0500 @@ -149,8 +149,9 @@ return count; } -static ssize_t smi_data_read(struct kobject *kobj, char *buf, loff_t pos, - size_t count) +static ssize_t smi_data_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { size_t max_read; ssize_t ret; @@ -170,8 +171,9 @@ return ret; } -static ssize_t smi_data_write(struct kobject *kobj, char *buf, loff_t pos, - size_t count) +static ssize_t smi_data_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { ssize_t ret; diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.h linux-2.6.22-591/drivers/firmware/dcdbas.h --- linux-2.6.22-570/drivers/firmware/dcdbas.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/firmware/dcdbas.h 2007-12-21 15:36:11.000000000 -0500 @@ -67,8 +67,7 @@ #define DCDBAS_BIN_ATTR_RW(_name) \ struct bin_attribute bin_attr_##_name = { \ .attr = { .name = __stringify(_name), \ - .mode = 0600, \ - .owner = THIS_MODULE }, \ + .mode = 0600 }, \ .read = _name##_read, \ .write = _name##_write, \ } diff -Nurb linux-2.6.22-570/drivers/firmware/dell_rbu.c linux-2.6.22-591/drivers/firmware/dell_rbu.c --- linux-2.6.22-570/drivers/firmware/dell_rbu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/firmware/dell_rbu.c 2007-12-21 15:36:11.000000000 -0500 @@ -543,8 +543,9 @@ return ret_count; } -static ssize_t read_rbu_data(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t read_rbu_data(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { ssize_t ret_count = 0; @@ -591,8 +592,9 @@ spin_unlock(&rbu_data.lock); } -static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t read_rbu_image_type(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { int size = 0; if (!pos) @@ -600,8 +602,9 @@ return size; } -static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t write_rbu_image_type(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { int rc = count; int req_firm_rc = 0; @@ -660,8 +663,9 @@ return rc; } -static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t read_rbu_packet_size(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { int size = 0; if (!pos) { @@ -672,8 +676,9 @@ return size; } -static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer, - loff_t pos, size_t count) +static ssize_t write_rbu_packet_size(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) { unsigned long temp; spin_lock(&rbu_data.lock); @@ -687,18 +692,18 @@ } static struct bin_attribute rbu_data_attr = { - .attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444}, + .attr = {.name = "data", .mode = 0444}, .read = read_rbu_data, }; static struct bin_attribute rbu_image_type_attr = { - .attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644}, + .attr = {.name = "image_type", .mode = 0644}, .read = read_rbu_image_type, .write = write_rbu_image_type, }; static struct bin_attribute rbu_packet_size_attr = { - .attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644}, + .attr = {.name = "packet_size", .mode = 0644}, .read = read_rbu_packet_size, .write = write_rbu_packet_size, }; diff -Nurb linux-2.6.22-570/drivers/firmware/edd.c linux-2.6.22-591/drivers/firmware/edd.c --- linux-2.6.22-570/drivers/firmware/edd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/firmware/edd.c 2007-12-21 15:36:11.000000000 -0500 @@ -74,7 +74,7 @@ #define EDD_DEVICE_ATTR(_name,_mode,_show,_test) \ struct edd_attribute edd_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .test = _test, \ }; diff -Nurb linux-2.6.22-570/drivers/firmware/efivars.c linux-2.6.22-591/drivers/firmware/efivars.c --- linux-2.6.22-570/drivers/firmware/efivars.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/firmware/efivars.c 2007-12-21 15:36:11.000000000 -0500 @@ -131,21 +131,21 @@ #define EFI_ATTR(_name, _mode, _show, _store) \ struct subsys_attribute efi_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; #define EFIVAR_ATTR(_name, _mode, _show, _store) \ struct efivar_attribute efivar_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; #define VAR_SUBSYS_ATTR(_name, _mode, _show, _store) \ struct subsys_attribute var_subsys_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; diff -Nurb linux-2.6.22-570/drivers/i2c/chips/eeprom.c linux-2.6.22-591/drivers/i2c/chips/eeprom.c --- linux-2.6.22-570/drivers/i2c/chips/eeprom.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/i2c/chips/eeprom.c 2007-12-21 15:36:11.000000000 -0500 @@ -110,7 +110,8 @@ mutex_unlock(&data->update_lock); } -static ssize_t eeprom_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj)); struct eeprom_data *data = i2c_get_clientdata(client); @@ -150,7 +151,6 @@ .attr = { .name = "eeprom", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = EEPROM_SIZE, .read = eeprom_read, diff -Nurb linux-2.6.22-570/drivers/i2c/chips/max6875.c linux-2.6.22-591/drivers/i2c/chips/max6875.c --- linux-2.6.22-570/drivers/i2c/chips/max6875.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/i2c/chips/max6875.c 2007-12-21 15:36:11.000000000 -0500 @@ -125,8 +125,9 @@ mutex_unlock(&data->update_lock); } -static ssize_t max6875_read(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t max6875_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct i2c_client *client = kobj_to_i2c_client(kobj); struct max6875_data *data = i2c_get_clientdata(client); @@ -152,7 +153,6 @@ .attr = { .name = "eeprom", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = USER_EEPROM_SIZE, .read = max6875_read, diff -Nurb linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c linux-2.6.22-591/drivers/ieee1394/ieee1394_core.c --- linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/ieee1394/ieee1394_core.c 2007-12-21 15:36:11.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1133,8 +1134,6 @@ struct list_head tmp; int may_schedule; - current->flags |= PF_NOFREEZE; - while (!kthread_should_stop()) { INIT_LIST_HEAD(&tmp); diff -Nurb linux-2.6.22-570/drivers/ieee1394/nodemgr.c linux-2.6.22-591/drivers/ieee1394/nodemgr.c --- linux-2.6.22-570/drivers/ieee1394/nodemgr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/ieee1394/nodemgr.c 2007-12-21 15:36:11.000000000 -0500 @@ -1669,6 +1669,7 @@ unsigned int g, generation = 0; int i, reset_cycles = 0; + set_freezable(); /* Setup our device-model entries */ nodemgr_create_host_dev_files(host); diff -Nurb linux-2.6.22-570/drivers/ieee1394/sbp2.c linux-2.6.22-591/drivers/ieee1394/sbp2.c --- linux-2.6.22-570/drivers/ieee1394/sbp2.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/ieee1394/sbp2.c 2007-12-21 15:36:11.000000000 -0500 @@ -1505,69 +1505,6 @@ } } -static void sbp2_prep_command_orb_no_sg(struct sbp2_command_orb *orb, - struct sbp2_fwhost_info *hi, - struct sbp2_command_info *cmd, - struct scatterlist *sgpnt, - u32 orb_direction, - unsigned int scsi_request_bufflen, - void *scsi_request_buffer, - enum dma_data_direction dma_dir) -{ - cmd->dma_dir = dma_dir; - cmd->dma_size = scsi_request_bufflen; - cmd->dma_type = CMD_DMA_SINGLE; - cmd->cmd_dma = dma_map_single(hi->host->device.parent, - scsi_request_buffer, - cmd->dma_size, cmd->dma_dir); - orb->data_descriptor_hi = ORB_SET_NODE_ID(hi->host->node_id); - orb->misc |= ORB_SET_DIRECTION(orb_direction); - - /* handle case where we get a command w/o s/g enabled - * (but check for transfers larger than 64K) */ - if (scsi_request_bufflen <= SBP2_MAX_SG_ELEMENT_LENGTH) { - - orb->data_descriptor_lo = cmd->cmd_dma; - orb->misc |= ORB_SET_DATA_SIZE(scsi_request_bufflen); - - } else { - /* The buffer is too large. Turn this into page tables. */ - - struct sbp2_unrestricted_page_table *sg_element = - &cmd->scatter_gather_element[0]; - u32 sg_count, sg_len; - dma_addr_t sg_addr; - - orb->data_descriptor_lo = cmd->sge_dma; - orb->misc |= ORB_SET_PAGE_TABLE_PRESENT(0x1); - - /* fill out our SBP-2 page tables; split up the large buffer */ - sg_count = 0; - sg_len = scsi_request_bufflen; - sg_addr = cmd->cmd_dma; - while (sg_len) { - sg_element[sg_count].segment_base_lo = sg_addr; - if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) { - sg_element[sg_count].length_segment_base_hi = - PAGE_TABLE_SET_SEGMENT_LENGTH(SBP2_MAX_SG_ELEMENT_LENGTH); - sg_addr += SBP2_MAX_SG_ELEMENT_LENGTH; - sg_len -= SBP2_MAX_SG_ELEMENT_LENGTH; - } else { - sg_element[sg_count].length_segment_base_hi = - PAGE_TABLE_SET_SEGMENT_LENGTH(sg_len); - sg_len = 0; - } - sg_count++; - } - - orb->misc |= ORB_SET_DATA_SIZE(sg_count); - - sbp2util_cpu_to_be32_buffer(sg_element, - (sizeof(struct sbp2_unrestricted_page_table)) * - sg_count); - } -} - static void sbp2_create_command_orb(struct sbp2_lu *lu, struct sbp2_command_info *cmd, unchar *scsi_cmd, @@ -1611,13 +1548,9 @@ orb->data_descriptor_hi = 0x0; orb->data_descriptor_lo = 0x0; orb->misc |= ORB_SET_DIRECTION(1); - } else if (scsi_use_sg) + } else sbp2_prep_command_orb_sg(orb, hi, cmd, scsi_use_sg, sgpnt, orb_direction, dma_dir); - else - sbp2_prep_command_orb_no_sg(orb, hi, cmd, sgpnt, orb_direction, - scsi_request_bufflen, - scsi_request_buffer, dma_dir); sbp2util_cpu_to_be32_buffer(orb, sizeof(*orb)); @@ -1706,15 +1639,15 @@ void (*done)(struct scsi_cmnd *)) { unchar *scsi_cmd = (unchar *)SCpnt->cmnd; - unsigned int request_bufflen = SCpnt->request_bufflen; + unsigned int request_bufflen = scsi_bufflen(SCpnt); struct sbp2_command_info *cmd; cmd = sbp2util_allocate_command_orb(lu, SCpnt, done); if (!cmd) return -EIO; - sbp2_create_command_orb(lu, cmd, scsi_cmd, SCpnt->use_sg, - request_bufflen, SCpnt->request_buffer, + sbp2_create_command_orb(lu, cmd, scsi_cmd, scsi_sg_count(SCpnt), + request_bufflen, scsi_sglist(SCpnt), SCpnt->sc_data_direction); sbp2_link_orb_command(lu, cmd); diff -Nurb linux-2.6.22-570/drivers/infiniband/core/addr.c linux-2.6.22-591/drivers/infiniband/core/addr.c --- linux-2.6.22-570/drivers/infiniband/core/addr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/core/addr.c 2007-12-21 15:36:14.000000000 -0500 @@ -110,7 +110,7 @@ __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; int ret; - dev = ip_dev_find(ip); + dev = ip_dev_find(&init_net, ip); if (!dev) return -EADDRNOTAVAIL; @@ -157,6 +157,7 @@ u32 dst_ip = dst_in->sin_addr.s_addr; memset(&fl, 0, sizeof fl); + fl.fl_net = &init_net; fl.nl_u.ip4_u.daddr = dst_ip; if (ip_route_output_key(&rt, &fl)) return; @@ -178,6 +179,7 @@ int ret; memset(&fl, 0, sizeof fl); + fl.fl_net = &init_net; fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; ret = ip_route_output_key(&rt, &fl); @@ -262,7 +264,7 @@ __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; - dev = ip_dev_find(dst_ip); + dev = ip_dev_find(&init_net, dst_ip); if (!dev) return -EADDRNOTAVAIL; diff -Nurb linux-2.6.22-570/drivers/infiniband/core/cma.c linux-2.6.22-591/drivers/infiniband/core/cma.c --- linux-2.6.22-570/drivers/infiniband/core/cma.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/core/cma.c 2007-12-21 15:36:14.000000000 -0500 @@ -1267,7 +1267,7 @@ atomic_inc(&conn_id->dev_remove); conn_id->state = CMA_CONNECT; - dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; cma_enable_remove(conn_id); @@ -1880,18 +1880,18 @@ if (ret) goto err1; - if (port > sysctl_local_port_range[1]) { - if (next_port != sysctl_local_port_range[0]) { + if (port > init_net.sysctl_local_port_range[1]) { + if (next_port != init_net.sysctl_local_port_range[0]) { idr_remove(ps, port); - next_port = sysctl_local_port_range[0]; + next_port = init_net.sysctl_local_port_range[0]; goto retry; } ret = -EADDRNOTAVAIL; goto err2; } - if (port == sysctl_local_port_range[1]) - next_port = sysctl_local_port_range[0]; + if (port == init_net.sysctl_local_port_range[1]) + next_port = init_net.sysctl_local_port_range[0]; else next_port = port + 1; @@ -2774,8 +2774,9 @@ get_random_bytes(&next_port, sizeof next_port); next_port = ((unsigned int) next_port % - (sysctl_local_port_range[1] - sysctl_local_port_range[0])) + - sysctl_local_port_range[0]; + (init_net.sysctl_local_port_range[1] - + init_net.sysctl_local_port_range[0])) + + init_net.sysctl_local_port_range[0]; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) return -ENOMEM; diff -Nurb linux-2.6.22-570/drivers/infiniband/core/sysfs.c linux-2.6.22-591/drivers/infiniband/core/sysfs.c --- linux-2.6.22-570/drivers/infiniband/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/core/sysfs.c 2007-12-21 15:36:11.000000000 -0500 @@ -479,7 +479,6 @@ element->attr.attr.name = element->name; element->attr.attr.mode = S_IRUGO; - element->attr.attr.owner = THIS_MODULE; element->attr.show = show; element->index = i; diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.c --- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.c 2007-12-21 15:36:11.000000000 -0500 @@ -134,19 +134,9 @@ { struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; - struct scsi_cmnd *sc = ctask->sc; iser_ctask->command_sent = 0; iser_ctask->iser_conn = iser_conn; - - if (sc->sc_data_direction == DMA_TO_DEVICE) { - BUG_ON(ctask->total_length == 0); - - debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", - ctask->itt, ctask->total_length, ctask->imm_count, - ctask->unsol_count); - } - iser_ctask_rdma_init(iser_ctask); } @@ -219,6 +209,14 @@ struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; int error = 0; + if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) { + BUG_ON(scsi_bufflen(ctask->sc) == 0); + + debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", + ctask->itt, scsi_bufflen(ctask->sc), + ctask->imm_count, ctask->unsol_count); + } + debug_scsi("ctask deq [cid %d itt 0x%x]\n", conn->id, ctask->itt); @@ -375,6 +373,7 @@ static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_transport *iscsit, struct scsi_transport_template *scsit, + uint16_t cmds_max, uint16_t qdepth, uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; @@ -386,7 +385,13 @@ struct iscsi_iser_cmd_task *iser_ctask; struct iser_desc *desc; + /* + * we do not support setting can_queue cmd_per_lun from userspace yet + * because we preallocate so many resources + */ cls_session = iscsi_session_setup(iscsit, scsit, + ISCSI_DEF_XMIT_CMDS_MAX, + ISCSI_MAX_CMD_PER_LUN, sizeof(struct iscsi_iser_cmd_task), sizeof(struct iser_desc), initial_cmdsn, &hn); @@ -545,7 +550,7 @@ static struct scsi_host_template iscsi_iser_sht = { .name = "iSCSI Initiator over iSER, v." DRV_VER, .queuecommand = iscsi_queuecommand, - .can_queue = ISCSI_XMIT_CMDS_MAX - 1, + .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, @@ -574,8 +579,12 @@ ISCSI_EXP_STATSN | ISCSI_PERSISTENT_PORT | ISCSI_PERSISTENT_ADDRESS | - ISCSI_TARGET_NAME | - ISCSI_TPGT, + ISCSI_TARGET_NAME | ISCSI_TPGT | + ISCSI_USERNAME | ISCSI_PASSWORD | + ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN, + .host_param_mask = ISCSI_HOST_HWADDRESS | + ISCSI_HOST_NETDEV_NAME | + ISCSI_HOST_INITIATOR_NAME, .host_template = &iscsi_iser_sht, .conndata_size = sizeof(struct iscsi_conn), .max_lun = ISCSI_ISER_MAX_LUN, @@ -592,6 +601,9 @@ .get_session_param = iscsi_session_get_param, .start_conn = iscsi_iser_conn_start, .stop_conn = iscsi_conn_stop, + /* iscsi host params */ + .get_host_param = iscsi_host_get_param, + .set_host_param = iscsi_host_set_param, /* IO */ .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_iser_conn_get_stats, diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.h --- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.h 2007-12-21 15:36:11.000000000 -0500 @@ -98,7 +98,7 @@ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * * SCSI_TMFUNC(2), LOGOUT(1) */ -#define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \ +#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ ISER_MAX_RX_MISC_PDUS + \ ISER_MAX_TX_MISC_PDUS) @@ -110,7 +110,7 @@ #define ISER_INFLIGHT_DATAOUTS 8 -#define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \ +#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \ (1 + ISER_INFLIGHT_DATAOUTS) + \ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_initiator.c --- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_initiator.c 2007-12-21 15:36:11.000000000 -0500 @@ -351,18 +351,12 @@ else data_buf = &iser_ctask->data[ISER_DIR_OUT]; - if (sc->use_sg) { /* using a scatter list */ - data_buf->buf = sc->request_buffer; - data_buf->size = sc->use_sg; - } else if (sc->request_bufflen) { - /* using a single buffer - convert it into one entry SG */ - sg_init_one(&data_buf->sg_single, - sc->request_buffer, sc->request_bufflen); - data_buf->buf = &data_buf->sg_single; - data_buf->size = 1; + if (scsi_sg_count(sc)) { /* using a scatter list */ + data_buf->buf = scsi_sglist(sc); + data_buf->size = scsi_sg_count(sc); } - data_buf->data_len = sc->request_bufflen; + data_buf->data_len = scsi_bufflen(sc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { err = iser_prepare_read_cmd(ctask, edtl); diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_verbs.c --- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_verbs.c 2007-12-21 15:36:11.000000000 -0500 @@ -155,8 +155,8 @@ params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ - params.pool_size = ISCSI_XMIT_CMDS_MAX * 2; - params.dirty_watermark = ISCSI_XMIT_CMDS_MAX; + params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; + params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.c --- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.c 2007-12-21 15:36:11.000000000 -0500 @@ -455,10 +455,7 @@ struct srp_target_port *target, struct srp_request *req) { - struct scatterlist *scat; - int nents; - - if (!scmnd->request_buffer || + if (!scsi_sglist(scmnd) || (scmnd->sc_data_direction != DMA_TO_DEVICE && scmnd->sc_data_direction != DMA_FROM_DEVICE)) return; @@ -468,20 +465,8 @@ req->fmr = NULL; } - /* - * This handling of non-SG commands can be killed when the - * SCSI midlayer no longer generates non-SG commands. - */ - if (likely(scmnd->use_sg)) { - nents = scmnd->use_sg; - scat = scmnd->request_buffer; - } else { - nents = 1; - scat = &req->fake_sg; - } - - ib_dma_unmap_sg(target->srp_host->dev->dev, scat, nents, - scmnd->sc_data_direction); + ib_dma_unmap_sg(target->srp_host->dev->dev, scsi_sglist(scmnd), + scsi_sg_count(scmnd), scmnd->sc_data_direction); } static void srp_remove_req(struct srp_target_port *target, struct srp_request *req) @@ -595,6 +580,7 @@ int ret; struct srp_device *dev = target->srp_host->dev; struct ib_device *ibdev = dev->dev; + struct scatterlist *sg; if (!dev->fmr_pool) return -ENODEV; @@ -604,16 +590,16 @@ return -EINVAL; len = page_cnt = 0; - for (i = 0; i < sg_cnt; ++i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); + scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { + unsigned int dma_len = ib_sg_dma_len(ibdev, sg); - if (ib_sg_dma_address(ibdev, &scat[i]) & ~dev->fmr_page_mask) { + if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) { if (i > 0) return -EINVAL; else ++page_cnt; } - if ((ib_sg_dma_address(ibdev, &scat[i]) + dma_len) & + if ((ib_sg_dma_address(ibdev, sg) + dma_len) & ~dev->fmr_page_mask) { if (i < sg_cnt - 1) return -EINVAL; @@ -633,12 +619,12 @@ return -ENOMEM; page_cnt = 0; - for (i = 0; i < sg_cnt; ++i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); + scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { + unsigned int dma_len = ib_sg_dma_len(ibdev, sg); for (j = 0; j < dma_len; j += dev->fmr_page_size) dma_pages[page_cnt++] = - (ib_sg_dma_address(ibdev, &scat[i]) & + (ib_sg_dma_address(ibdev, sg) & dev->fmr_page_mask) + j; } @@ -673,7 +659,7 @@ struct srp_device *dev; struct ib_device *ibdev; - if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) + if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) return sizeof (struct srp_cmd); if (scmnd->sc_data_direction != DMA_FROM_DEVICE && @@ -683,18 +669,8 @@ return -EINVAL; } - /* - * This handling of non-SG commands can be killed when the - * SCSI midlayer no longer generates non-SG commands. - */ - if (likely(scmnd->use_sg)) { - nents = scmnd->use_sg; - scat = scmnd->request_buffer; - } else { - nents = 1; - scat = &req->fake_sg; - sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen); - } + nents = scsi_sg_count(scmnd); + scat = scsi_sglist(scmnd); dev = target->srp_host->dev; ibdev = dev->dev; @@ -724,6 +700,7 @@ * descriptor. */ struct srp_indirect_buf *buf = (void *) cmd->add_data; + struct scatterlist *sg; u32 datalen = 0; int i; @@ -732,11 +709,11 @@ sizeof (struct srp_indirect_buf) + count * sizeof (struct srp_direct_buf); - for (i = 0; i < count; ++i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); + scsi_for_each_sg(scmnd, sg, count, i) { + unsigned int dma_len = ib_sg_dma_len(ibdev, sg); buf->desc_list[i].va = - cpu_to_be64(ib_sg_dma_address(ibdev, &scat[i])); + cpu_to_be64(ib_sg_dma_address(ibdev, sg)); buf->desc_list[i].key = cpu_to_be32(dev->mr->rkey); buf->desc_list[i].len = cpu_to_be32(dma_len); @@ -802,9 +779,9 @@ } if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) - scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt); + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) - scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt); + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); if (!req->tsk_mgmt) { scmnd->host_scribble = (void *) -1L; diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.h --- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.h 2007-12-21 15:36:11.000000000 -0500 @@ -106,11 +106,6 @@ struct srp_iu *cmd; struct srp_iu *tsk_mgmt; struct ib_pool_fmr *fmr; - /* - * Fake scatterlist used when scmnd->use_sg==0. Can be killed - * when the SCSI midlayer no longer generates non-SG commands. - */ - struct scatterlist fake_sg; struct completion done; short index; u8 cmd_done; diff -Nurb linux-2.6.22-570/drivers/input/gameport/gameport.c linux-2.6.22-591/drivers/input/gameport/gameport.c --- linux-2.6.22-570/drivers/input/gameport/gameport.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/input/gameport/gameport.c 2007-12-21 15:36:11.000000000 -0500 @@ -445,6 +445,7 @@ static int gameport_thread(void *nothing) { + set_freezable(); do { gameport_handle_event(); wait_event_interruptible(gameport_wait, diff -Nurb linux-2.6.22-570/drivers/input/mouse/psmouse.h linux-2.6.22-591/drivers/input/mouse/psmouse.h --- linux-2.6.22-570/drivers/input/mouse/psmouse.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/input/mouse/psmouse.h 2007-12-21 15:36:11.000000000 -0500 @@ -118,7 +118,6 @@ .attr = { \ .name = __stringify(_name), \ .mode = _mode, \ - .owner = THIS_MODULE, \ }, \ .show = psmouse_attr_show_helper, \ .store = psmouse_attr_set_helper, \ diff -Nurb linux-2.6.22-570/drivers/input/serio/serio.c linux-2.6.22-591/drivers/input/serio/serio.c --- linux-2.6.22-570/drivers/input/serio/serio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/input/serio/serio.c 2007-12-21 15:36:11.000000000 -0500 @@ -384,6 +384,7 @@ static int serio_thread(void *nothing) { + set_freezable(); do { serio_handle_event(); wait_event_interruptible(serio_wait, diff -Nurb linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c linux-2.6.22-591/drivers/input/touchscreen/ucb1400_ts.c --- linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/input/touchscreen/ucb1400_ts.c 2007-12-21 15:36:11.000000000 -0500 @@ -292,6 +292,7 @@ sched_setscheduler(tsk, SCHED_FIFO, ¶m); + set_freezable(); while (!kthread_should_stop()) { unsigned int x, y, p; long timeout; diff -Nurb linux-2.6.22-570/drivers/isdn/divert/divert_procfs.c linux-2.6.22-591/drivers/isdn/divert/divert_procfs.c --- linux-2.6.22-570/drivers/isdn/divert/divert_procfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/isdn/divert/divert_procfs.c 2007-12-21 15:36:14.000000000 -0500 @@ -17,6 +17,7 @@ #include #endif #include +#include #include "isdn_divert.h" @@ -284,12 +285,12 @@ init_waitqueue_head(&rd_queue); #ifdef CONFIG_PROC_FS - isdn_proc_entry = proc_mkdir("net/isdn", NULL); + isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net); if (!isdn_proc_entry) return (-1); isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry); if (!isdn_divert_entry) { - remove_proc_entry("net/isdn", NULL); + remove_proc_entry("isdn", init_net.proc_net); return (-1); } isdn_divert_entry->proc_fops = &isdn_fops; @@ -309,7 +310,7 @@ #ifdef CONFIG_PROC_FS remove_proc_entry("divert", isdn_proc_entry); - remove_proc_entry("net/isdn", NULL); + remove_proc_entry("isdn", init_net.proc_net); #endif /* CONFIG_PROC_FS */ return (0); diff -Nurb linux-2.6.22-570/drivers/isdn/hardware/eicon/diva_didd.c linux-2.6.22-591/drivers/isdn/hardware/eicon/diva_didd.c --- linux-2.6.22-570/drivers/isdn/hardware/eicon/diva_didd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/isdn/hardware/eicon/diva_didd.c 2007-12-21 15:36:14.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include #include "platform.h" #include "di_defs.h" @@ -86,7 +87,7 @@ static int DIVA_INIT_FUNCTION create_proc(void) { - proc_net_eicon = proc_mkdir("net/eicon", NULL); + proc_net_eicon = proc_mkdir("eicon", init_net.proc_net); if (proc_net_eicon) { if ((proc_didd = @@ -102,7 +103,7 @@ static void remove_proc(void) { remove_proc_entry(DRIVERLNAME, proc_net_eicon); - remove_proc_entry("net/eicon", NULL); + remove_proc_entry("eicon", init_net.proc_net); } static int DIVA_INIT_FUNCTION divadidd_init(void) diff -Nurb linux-2.6.22-570/drivers/isdn/hysdn/hysdn_procconf.c linux-2.6.22-591/drivers/isdn/hysdn/hysdn_procconf.c --- linux-2.6.22-570/drivers/isdn/hysdn/hysdn_procconf.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/isdn/hysdn/hysdn_procconf.c 2007-12-21 15:36:14.000000000 -0500 @@ -392,7 +392,7 @@ hysdn_card *card; unsigned char conf_name[20]; - hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net); + hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, init_net.proc_net); if (!hysdn_proc_entry) { printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n"); return (-1); @@ -437,5 +437,5 @@ card = card->next; /* point to next card */ } - remove_proc_entry(PROC_SUBDIR_NAME, proc_net); + remove_proc_entry(PROC_SUBDIR_NAME, init_net.proc_net); } diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_adt746x.c linux-2.6.22-591/drivers/macintosh/therm_adt746x.c --- linux-2.6.22-570/drivers/macintosh/therm_adt746x.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/macintosh/therm_adt746x.c 2007-12-21 15:36:11.000000000 -0500 @@ -335,6 +335,7 @@ { struct thermostat* th = arg; + set_freezable(); while(!kthread_should_stop()) { try_to_freeze(); msleep_interruptible(2000); diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_pm72.c linux-2.6.22-591/drivers/macintosh/therm_pm72.c --- linux-2.6.22-570/drivers/macintosh/therm_pm72.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/macintosh/therm_pm72.c 2007-12-21 15:36:11.000000000 -0500 @@ -1770,7 +1770,8 @@ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - return call_usermodehelper(critical_overtemp_path, argv, envp, 0); + return call_usermodehelper(critical_overtemp_path, + argv, envp, UMH_WAIT_EXEC); } diff -Nurb linux-2.6.22-570/drivers/macintosh/windfarm_core.c linux-2.6.22-591/drivers/macintosh/windfarm_core.c --- linux-2.6.22-570/drivers/macintosh/windfarm_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/macintosh/windfarm_core.c 2007-12-21 15:36:11.000000000 -0500 @@ -80,7 +80,8 @@ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - return call_usermodehelper(critical_overtemp_path, argv, envp, 0); + return call_usermodehelper(critical_overtemp_path, + argv, envp, UMH_WAIT_EXEC); } EXPORT_SYMBOL_GPL(wf_critical_overtemp); @@ -92,6 +93,7 @@ DBG("wf: thread started\n"); + set_freezable(); while(!kthread_should_stop()) { if (time_after_eq(jiffies, next)) { wf_notify(WF_EVENT_TICK, NULL); @@ -212,7 +214,6 @@ list_add(&new_ct->link, &wf_controls); new_ct->attr.attr.name = new_ct->name; - new_ct->attr.attr.owner = THIS_MODULE; new_ct->attr.attr.mode = 0644; new_ct->attr.show = wf_show_control; new_ct->attr.store = wf_store_control; @@ -325,7 +326,6 @@ list_add(&new_sr->link, &wf_sensors); new_sr->attr.attr.name = new_sr->name; - new_sr->attr.attr.owner = THIS_MODULE; new_sr->attr.attr.mode = 0444; new_sr->attr.show = wf_show_sensor; new_sr->attr.store = NULL; diff -Nurb linux-2.6.22-570/drivers/md/Kconfig linux-2.6.22-591/drivers/md/Kconfig --- linux-2.6.22-570/drivers/md/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/md/Kconfig 2007-12-21 15:36:11.000000000 -0500 @@ -109,6 +109,8 @@ config MD_RAID456 tristate "RAID-4/RAID-5/RAID-6 mode" depends on BLK_DEV_MD + select ASYNC_MEMCPY + select ASYNC_XOR ---help--- A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure @@ -271,6 +273,11 @@ If unsure, say N. +config DM_NETLINK + bool "DM netlink events (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Generate netlink events for DM events. endmenu endif diff -Nurb linux-2.6.22-570/drivers/md/Makefile linux-2.6.22-591/drivers/md/Makefile --- linux-2.6.22-570/drivers/md/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/md/Makefile 2007-12-21 15:36:11.000000000 -0500 @@ -17,7 +17,7 @@ hostprogs-y := mktables # Note: link order is important. All raid personalities -# and xor.o must come before md.o, as they each initialise +# and must come before md.o, as they each initialise # themselves, and md.o may use the personalities when it # auto-initialised. @@ -25,7 +25,7 @@ obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID10) += raid10.o -obj-$(CONFIG_MD_RAID456) += raid456.o xor.o +obj-$(CONFIG_MD_RAID456) += raid456.o obj-$(CONFIG_MD_MULTIPATH) += multipath.o obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_BLK_DEV_MD) += md-mod.o @@ -46,6 +46,10 @@ altivec_flags := -maltivec -mabi=altivec endif +ifeq ($(CONFIG_DM_NETLINK),y) +dm-mod-objs += dm-netlink.o +endif + targets += raid6int1.c $(obj)/raid6int1.c: UNROLL := 1 $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.c linux-2.6.22-591/drivers/md/dm-netlink.c --- linux-2.6.22-570/drivers/md/dm-netlink.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/md/dm-netlink.c 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,103 @@ +/* + * Device Mapper Netlink Support (dm-netlink) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2005, 2006 + * Author: Mike Anderson + */ +#include +#include +#include +#include +#include +#include +#include + +#include "dm.h" +#include "dm-netlink.h" + +#define DM_MSG_PREFIX "netlink" + +#define DM_EVENT_SKB_SIZE NLMSG_GOODSIZE + +struct dm_event_cache { + struct kmem_cache *cache; + unsigned skb_size; +}; + +static struct dm_event_cache _dme_cache; + +static int dme_cache_init(struct dm_event_cache *dc, unsigned skb_size) +{ + dc->skb_size = skb_size; + + dc->cache = KMEM_CACHE(dm_event, 0); + if (!dc->cache) + return -ENOMEM; + + return 0; +} + +static void dme_cache_destroy(struct dm_event_cache *dc) +{ + kmem_cache_destroy(dc->cache); +} + +static void dme_cache_event_put(struct dm_event *evt) +{ + struct dm_event_cache *dc = evt->cdata; + + kmem_cache_free(dc->cache, evt); +} + +static struct dm_event *dme_cache_event_get(struct dm_event_cache *dc, + struct mapped_device *md) +{ + struct dm_event *evt; + + evt = kmem_cache_alloc(dc->cache, GFP_ATOMIC); + if (!evt) + return NULL; + + INIT_LIST_HEAD(&evt->elist); + evt->cdata = dc; + evt->md = md; + evt->skb = alloc_skb(dc->skb_size, GFP_ATOMIC); + if (!evt->skb) + goto cache_err; + + return evt; + +cache_err: + dme_cache_event_put(evt); + return NULL; +} + +int __init dm_netlink_init(void) +{ + int r; + + r = dme_cache_init(&_dme_cache, DM_EVENT_SKB_SIZE); + if (!r) + DMINFO("version 1.0.0 loaded"); + + return r; +} + +void dm_netlink_exit(void) +{ + dme_cache_destroy(&_dme_cache); +} diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.h linux-2.6.22-591/drivers/md/dm-netlink.h --- linux-2.6.22-570/drivers/md/dm-netlink.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/md/dm-netlink.h 2007-12-21 15:36:11.000000000 -0500 @@ -0,0 +1,50 @@ +/* + * Device Mapper Netlink Support + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2005, 2006 + * Author: Mike Anderson + */ +#ifndef DM_NETLINK_H +#define DM_NETLINK_H + +struct dm_event_cache; +struct mapped_device; +struct dm_event { + struct dm_event_cache *cdata; + struct mapped_device *md; + struct sk_buff *skb; + struct list_head elist; +}; + +#ifdef CONFIG_DM_NETLINK + +int dm_netlink_init(void); +void dm_netlink_exit(void); + +#else /* CONFIG_DM_NETLINK */ + +static inline int __init dm_netlink_init(void) +{ + return 0; +} +static inline void dm_netlink_exit(void) +{ +} + +#endif /* CONFIG_DM_NETLINK */ + +#endif /* DM_NETLINK_H */ diff -Nurb linux-2.6.22-570/drivers/md/dm.c linux-2.6.22-591/drivers/md/dm.c --- linux-2.6.22-570/drivers/md/dm.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/drivers/md/dm.c 2007-12-21 15:36:12.000000000 -0500 @@ -7,6 +7,7 @@ #include "dm.h" #include "dm-bio-list.h" +#include "dm-netlink.h" #include #include @@ -180,6 +181,7 @@ dm_linear_init, dm_stripe_init, dm_interface_init, + dm_netlink_init, }; void (*_exits[])(void) = { @@ -188,6 +190,7 @@ dm_linear_exit, dm_stripe_exit, dm_interface_exit, + dm_netlink_exit, }; static int __init dm_init(void) diff -Nurb linux-2.6.22-570/drivers/md/md.c linux-2.6.22-591/drivers/md/md.c --- linux-2.6.22-570/drivers/md/md.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/md/md.c 2007-12-21 15:36:12.000000000 -0500 @@ -4642,7 +4642,6 @@ * many dirty RAID5 blocks. */ - current->flags |= PF_NOFREEZE; allow_signal(SIGKILL); while (!kthread_should_stop()) { @@ -5814,7 +5813,7 @@ } } -module_init(md_init) +subsys_initcall(md_init); module_exit(md_exit) static int get_ro(char *buffer, struct kernel_param *kp) diff -Nurb linux-2.6.22-570/drivers/md/raid5.c linux-2.6.22-591/drivers/md/raid5.c --- linux-2.6.22-570/drivers/md/raid5.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/md/raid5.c 2007-12-21 15:36:12.000000000 -0500 @@ -52,6 +52,7 @@ #include "raid6.h" #include +#include /* * Stripe cache @@ -80,7 +81,6 @@ /* * The following can be used to debug the driver */ -#define RAID5_DEBUG 0 #define RAID5_PARANOIA 1 #if RAID5_PARANOIA && defined(CONFIG_SMP) # define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock) @@ -88,8 +88,7 @@ # define CHECK_DEVLOCK() #endif -#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x))) -#if RAID5_DEBUG +#ifdef DEBUG #define inline #define __inline__ #endif @@ -125,6 +124,7 @@ } md_wakeup_thread(conf->mddev->thread); } else { + BUG_ON(sh->ops.pending); if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) @@ -152,7 +152,8 @@ static inline void remove_hash(struct stripe_head *sh) { - PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); + pr_debug("remove_hash(), stripe %llu\n", + (unsigned long long)sh->sector); hlist_del_init(&sh->hash); } @@ -161,7 +162,8 @@ { struct hlist_head *hp = stripe_hash(conf, sh->sector); - PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); + pr_debug("insert_hash(), stripe %llu\n", + (unsigned long long)sh->sector); CHECK_DEVLOCK(); hlist_add_head(&sh->hash, hp); @@ -224,9 +226,10 @@ BUG_ON(atomic_read(&sh->count) != 0); BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); + BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete); CHECK_DEVLOCK(); - PRINTK("init_stripe called, stripe %llu\n", + pr_debug("init_stripe called, stripe %llu\n", (unsigned long long)sh->sector); remove_hash(sh); @@ -240,11 +243,11 @@ for (i = sh->disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (dev->toread || dev->towrite || dev->written || + if (dev->toread || dev->read || dev->towrite || dev->written || test_bit(R5_LOCKED, &dev->flags)) { - printk("sector=%llx i=%d %p %p %p %d\n", + printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n", (unsigned long long)sh->sector, i, dev->toread, - dev->towrite, dev->written, + dev->read, dev->towrite, dev->written, test_bit(R5_LOCKED, &dev->flags)); BUG(); } @@ -260,11 +263,11 @@ struct hlist_node *hn; CHECK_DEVLOCK(); - PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); + pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) if (sh->sector == sector && sh->disks == disks) return sh; - PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); + pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); return NULL; } @@ -276,7 +279,7 @@ { struct stripe_head *sh; - PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector); + pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); spin_lock_irq(&conf->device_lock); @@ -324,179 +327,762 @@ return sh; } -static int grow_one_stripe(raid5_conf_t *conf) +/* test_and_ack_op() ensures that we only dequeue an operation once */ +#define test_and_ack_op(op, pend) \ +do { \ + if (test_bit(op, &sh->ops.pending) && \ + !test_bit(op, &sh->ops.complete)) { \ + if (test_and_set_bit(op, &sh->ops.ack)) \ + clear_bit(op, &pend); \ + else \ + ack++; \ + } else \ + clear_bit(op, &pend); \ +} while (0) + +/* find new work to run, do not resubmit work that is already + * in flight + */ +static unsigned long get_stripe_work(struct stripe_head *sh) +{ + unsigned long pending; + int ack = 0; + + pending = sh->ops.pending; + + test_and_ack_op(STRIPE_OP_BIOFILL, pending); + test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending); + test_and_ack_op(STRIPE_OP_PREXOR, pending); + test_and_ack_op(STRIPE_OP_BIODRAIN, pending); + test_and_ack_op(STRIPE_OP_POSTXOR, pending); + test_and_ack_op(STRIPE_OP_CHECK, pending); + if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending)) + ack++; + + sh->ops.count -= ack; + BUG_ON(sh->ops.count < 0); + + return pending; +} + +static int +raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); +static int +raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error); + +static void ops_run_io(struct stripe_head *sh) { - struct stripe_head *sh; - sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); - if (!sh) - return 0; - memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); - sh->raid_conf = conf; - spin_lock_init(&sh->lock); + raid5_conf_t *conf = sh->raid_conf; + int i, disks = sh->disks; - if (grow_buffers(sh, conf->raid_disks)) { - shrink_buffers(sh, conf->raid_disks); - kmem_cache_free(conf->slab_cache, sh); - return 0; - } - sh->disks = conf->raid_disks; - /* we just created an active stripe so... */ - atomic_set(&sh->count, 1); - atomic_inc(&conf->active_stripes); - INIT_LIST_HEAD(&sh->lru); - release_stripe(sh); - return 1; -} + might_sleep(); -static int grow_stripes(raid5_conf_t *conf, int num) -{ - struct kmem_cache *sc; - int devs = conf->raid_disks; + for (i = disks; i--; ) { + int rw; + struct bio *bi; + mdk_rdev_t *rdev; + if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) + rw = WRITE; + else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) + rw = READ; + else + continue; - sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); - sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); - conf->active_name = 0; - sc = kmem_cache_create(conf->cache_name[conf->active_name], - sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), - 0, 0, NULL, NULL); - if (!sc) - return 1; - conf->slab_cache = sc; - conf->pool_size = devs; - while (num--) - if (!grow_one_stripe(conf)) - return 1; - return 0; + bi = &sh->dev[i].req; + + bi->bi_rw = rw; + if (rw == WRITE) + bi->bi_end_io = raid5_end_write_request; + else + bi->bi_end_io = raid5_end_read_request; + + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(Faulty, &rdev->flags)) + rdev = NULL; + if (rdev) + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + + if (rdev) { + if (test_bit(STRIPE_SYNCING, &sh->state) || + test_bit(STRIPE_EXPAND_SOURCE, &sh->state) || + test_bit(STRIPE_EXPAND_READY, &sh->state)) + md_sync_acct(rdev->bdev, STRIPE_SECTORS); + + bi->bi_bdev = rdev->bdev; + pr_debug("%s: for %llu schedule op %ld on disc %d\n", + __FUNCTION__, (unsigned long long)sh->sector, + bi->bi_rw, i); + atomic_inc(&sh->count); + bi->bi_sector = sh->sector + rdev->data_offset; + bi->bi_flags = 1 << BIO_UPTODATE; + bi->bi_vcnt = 1; + bi->bi_max_vecs = 1; + bi->bi_idx = 0; + bi->bi_io_vec = &sh->dev[i].vec; + bi->bi_io_vec[0].bv_len = STRIPE_SIZE; + bi->bi_io_vec[0].bv_offset = 0; + bi->bi_size = STRIPE_SIZE; + bi->bi_next = NULL; + if (rw == WRITE && + test_bit(R5_ReWrite, &sh->dev[i].flags)) + atomic_add(STRIPE_SECTORS, + &rdev->corrected_errors); + generic_make_request(bi); + } else { + if (rw == WRITE) + set_bit(STRIPE_DEGRADED, &sh->state); + pr_debug("skip op %ld on disc %d for sector %llu\n", + bi->bi_rw, i, (unsigned long long)sh->sector); + clear_bit(R5_LOCKED, &sh->dev[i].flags); + set_bit(STRIPE_HANDLE, &sh->state); + } + } } -#ifdef CONFIG_MD_RAID5_RESHAPE -static int resize_stripes(raid5_conf_t *conf, int newsize) +static struct dma_async_tx_descriptor * +async_copy_data(int frombio, struct bio *bio, struct page *page, + sector_t sector, struct dma_async_tx_descriptor *tx) { - /* Make all the stripes able to hold 'newsize' devices. - * New slots in each stripe get 'page' set to a new page. - * - * This happens in stages: - * 1/ create a new kmem_cache and allocate the required number of - * stripe_heads. - * 2/ gather all the old stripe_heads and tranfer the pages across - * to the new stripe_heads. This will have the side effect of - * freezing the array as once all stripe_heads have been collected, - * no IO will be possible. Old stripe heads are freed once their - * pages have been transferred over, and the old kmem_cache is - * freed when all stripes are done. - * 3/ reallocate conf->disks to be suitable bigger. If this fails, - * we simple return a failre status - no need to clean anything up. - * 4/ allocate new pages for the new slots in the new stripe_heads. - * If this fails, we don't bother trying the shrink the - * stripe_heads down again, we just leave them as they are. - * As each stripe_head is processed the new one is released into - * active service. - * - * Once step2 is started, we cannot afford to wait for a write, - * so we use GFP_NOIO allocations. - */ - struct stripe_head *osh, *nsh; - LIST_HEAD(newstripes); - struct disk_info *ndisks; - int err = 0; - struct kmem_cache *sc; + struct bio_vec *bvl; + struct page *bio_page; int i; + int page_offset; - if (newsize <= conf->pool_size) - return 0; /* never bother to shrink */ + if (bio->bi_sector >= sector) + page_offset = (signed)(bio->bi_sector - sector) * 512; + else + page_offset = (signed)(sector - bio->bi_sector) * -512; + bio_for_each_segment(bvl, bio, i) { + int len = bio_iovec_idx(bio, i)->bv_len; + int clen; + int b_offset = 0; - md_allow_write(conf->mddev); + if (page_offset < 0) { + b_offset = -page_offset; + page_offset += b_offset; + len -= b_offset; + } - /* Step 1 */ - sc = kmem_cache_create(conf->cache_name[1-conf->active_name], - sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), - 0, 0, NULL, NULL); - if (!sc) - return -ENOMEM; + if (len > 0 && page_offset + len > STRIPE_SIZE) + clen = STRIPE_SIZE - page_offset; + else + clen = len; - for (i = conf->max_nr_stripes; i; i--) { - nsh = kmem_cache_alloc(sc, GFP_KERNEL); - if (!nsh) + if (clen > 0) { + b_offset += bio_iovec_idx(bio, i)->bv_offset; + bio_page = bio_iovec_idx(bio, i)->bv_page; + if (frombio) + tx = async_memcpy(page, bio_page, page_offset, + b_offset, clen, + ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC, + tx, NULL, NULL); + else + tx = async_memcpy(bio_page, page, b_offset, + page_offset, clen, + ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST, + tx, NULL, NULL); + } + if (clen < len) /* hit end of page */ break; + page_offset += len; + } - memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); + return tx; +} - nsh->raid_conf = conf; - spin_lock_init(&nsh->lock); +static void ops_complete_biofill(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + struct bio *return_bi = NULL, *bi; + raid5_conf_t *conf = sh->raid_conf; + int i, more_to_read = 0; - list_add(&nsh->lru, &newstripes); - } - if (i) { - /* didn't get enough, give up */ - while (!list_empty(&newstripes)) { - nsh = list_entry(newstripes.next, struct stripe_head, lru); - list_del(&nsh->lru); - kmem_cache_free(sc, nsh); - } - kmem_cache_destroy(sc); - return -ENOMEM; - } - /* Step 2 - Must use GFP_NOIO now. - * OK, we have enough stripes, start collecting inactive - * stripes and copying them over + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + /* clear completed biofills */ + for (i = sh->disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + /* check if this stripe has new incoming reads */ + if (dev->toread) + more_to_read++; + + /* acknowledge completion of a biofill operation */ + /* and check if we need to reply to a read request */ - list_for_each_entry(nsh, &newstripes, lru) { + if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) { + struct bio *rbi, *rbi2; + clear_bit(R5_Wantfill, &dev->flags); + + /* The access to dev->read is outside of the + * spin_lock_irq(&conf->device_lock), but is protected + * by the STRIPE_OP_BIOFILL pending bit + */ + BUG_ON(!dev->read); + rbi = dev->read; + dev->read = NULL; + while (rbi && rbi->bi_sector < + dev->sector + STRIPE_SECTORS) { + rbi2 = r5_next_bio(rbi, dev->sector); spin_lock_irq(&conf->device_lock); - wait_event_lock_irq(conf->wait_for_stripe, - !list_empty(&conf->inactive_list), - conf->device_lock, - unplug_slaves(conf->mddev) - ); - osh = get_free_stripe(conf); + if (--rbi->bi_phys_segments == 0) { + rbi->bi_next = return_bi; + return_bi = rbi; + } spin_unlock_irq(&conf->device_lock); - atomic_set(&nsh->count, 1); - for(i=0; ipool_size; i++) - nsh->dev[i].page = osh->dev[i].page; - for( ; idev[i].page = NULL; - kmem_cache_free(conf->slab_cache, osh); + rbi = rbi2; } - kmem_cache_destroy(conf->slab_cache); + } + } + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); - /* Step 3. - * At this point, we are holding all the stripes so the array - * is completely stalled, so now is a good time to resize - * conf->disks. - */ - ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); - if (ndisks) { - for (i=0; iraid_disks; i++) - ndisks[i] = conf->disks[i]; - kfree(conf->disks); - conf->disks = ndisks; - } else - err = -ENOMEM; + bi = return_bi; + while (bi) { + int bytes = bi->bi_size; - /* Step 4, return new stripes to service */ - while(!list_empty(&newstripes)) { - nsh = list_entry(newstripes.next, struct stripe_head, lru); - list_del_init(&nsh->lru); - for (i=conf->raid_disks; i < newsize; i++) - if (nsh->dev[i].page == NULL) { - struct page *p = alloc_page(GFP_NOIO); - nsh->dev[i].page = p; - if (!p) - err = -ENOMEM; - } - release_stripe(nsh); + return_bi = bi->bi_next; + bi->bi_next = NULL; + bi->bi_size = 0; + bi->bi_end_io(bi, bytes, + test_bit(BIO_UPTODATE, &bi->bi_flags) ? 0 : -EIO); + bi = return_bi; } - /* critical section pass, GFP_NOIO no longer needed */ - conf->slab_cache = sc; - conf->active_name = 1-conf->active_name; - conf->pool_size = newsize; - return err; + if (more_to_read) + set_bit(STRIPE_HANDLE, &sh->state); + release_stripe(sh); } -#endif -static int drop_one_stripe(raid5_conf_t *conf) +static void ops_run_biofill(struct stripe_head *sh) { - struct stripe_head *sh; + struct dma_async_tx_descriptor *tx = NULL; + raid5_conf_t *conf = sh->raid_conf; + int i; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + for (i = sh->disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (test_bit(R5_Wantfill, &dev->flags)) { + struct bio *rbi; + spin_lock_irq(&conf->device_lock); + dev->read = rbi = dev->toread; + dev->toread = NULL; + spin_unlock_irq(&conf->device_lock); + while (rbi && rbi->bi_sector < + dev->sector + STRIPE_SECTORS) { + tx = async_copy_data(0, rbi, dev->page, + dev->sector, tx); + rbi = r5_next_bio(rbi, dev->sector); + } + } + } + + atomic_inc(&sh->count); + async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, + ops_complete_biofill, sh); +} + +static void ops_complete_compute5(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + int target = sh->ops.target; + struct r5dev *tgt = &sh->dev[target]; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + set_bit(R5_UPTODATE, &tgt->flags); + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + clear_bit(R5_Wantcompute, &tgt->flags); + set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + set_bit(STRIPE_HANDLE, &sh->state); + release_stripe(sh); +} + +static struct dma_async_tx_descriptor * +ops_run_compute5(struct stripe_head *sh, unsigned long pending) +{ + /* kernel stack size limits the total number of disks */ + int disks = sh->disks; + struct page *xor_srcs[disks]; + int target = sh->ops.target; + struct r5dev *tgt = &sh->dev[target]; + struct page *xor_dest = tgt->page; + int count = 0; + struct dma_async_tx_descriptor *tx; + int i; + + pr_debug("%s: stripe %llu block: %d\n", + __FUNCTION__, (unsigned long long)sh->sector, target); + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + + for (i = disks; i--; ) + if (i != target) + xor_srcs[count++] = sh->dev[i].page; + + atomic_inc(&sh->count); + + if (unlikely(count == 1)) + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, + 0, NULL, ops_complete_compute5, sh); + else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute5, sh); + + /* ack now if postxor is not set to be run */ + if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending)) + async_tx_ack(tx); + + return tx; +} + +static void ops_complete_prexor(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); +} + +static struct dma_async_tx_descriptor * +ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +{ + /* kernel stack size limits the total number of disks */ + int disks = sh->disks; + struct page *xor_srcs[disks]; + int count = 0, pd_idx = sh->pd_idx, i; + + /* existing parity data subtracted */ + struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + /* Only process blocks that are known to be uptodate */ + if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags)) + xor_srcs[count++] = dev->page; + } + + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh); + + return tx; +} + +static struct dma_async_tx_descriptor * +ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +{ + int disks = sh->disks; + int pd_idx = sh->pd_idx, i; + + /* check if prexor is active which means only process blocks + * that are part of a read-modify-write (Wantprexor) + */ + int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + struct bio *chosen; + int towrite; + + towrite = 0; + if (prexor) { /* rmw */ + if (dev->towrite && + test_bit(R5_Wantprexor, &dev->flags)) + towrite = 1; + } else { /* rcw */ + if (i != pd_idx && dev->towrite && + test_bit(R5_LOCKED, &dev->flags)) + towrite = 1; + } + + if (towrite) { + struct bio *wbi; + + spin_lock(&sh->lock); + chosen = dev->towrite; + dev->towrite = NULL; + BUG_ON(dev->written); + wbi = dev->written = chosen; + spin_unlock(&sh->lock); + + while (wbi && wbi->bi_sector < + dev->sector + STRIPE_SECTORS) { + tx = async_copy_data(1, wbi, dev->page, + dev->sector, tx); + wbi = r5_next_bio(wbi, dev->sector); + } + } + } + + return tx; +} + +static void ops_complete_postxor(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); + set_bit(STRIPE_HANDLE, &sh->state); + release_stripe(sh); +} + +static void ops_complete_write(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + int disks = sh->disks, i, pd_idx = sh->pd_idx; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (dev->written || i == pd_idx) + set_bit(R5_UPTODATE, &dev->flags); + } + + set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); + set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); + + set_bit(STRIPE_HANDLE, &sh->state); + release_stripe(sh); +} + +static void +ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +{ + /* kernel stack size limits the total number of disks */ + int disks = sh->disks; + struct page *xor_srcs[disks]; + + int count = 0, pd_idx = sh->pd_idx, i; + struct page *xor_dest; + int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); + unsigned long flags; + dma_async_tx_callback callback; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + /* check if prexor is active which means only process blocks + * that are part of a read-modify-write (written) + */ + if (prexor) { + xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (dev->written) + xor_srcs[count++] = dev->page; + } + } else { + xor_dest = sh->dev[pd_idx].page; + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (i != pd_idx) + xor_srcs[count++] = dev->page; + } + } + + /* check whether this postxor is part of a write */ + callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ? + ops_complete_write : ops_complete_postxor; + + /* 1/ if we prexor'd then the dest is reused as a source + * 2/ if we did not prexor then we are redoing the parity + * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST + * for the synchronous xor case + */ + flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); + + atomic_inc(&sh->count); + + if (unlikely(count == 1)) { + flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, + flags, tx, callback, sh); + } else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + flags, tx, callback, sh); +} + +static void ops_complete_check(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + int pd_idx = sh->pd_idx; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && + sh->ops.zero_sum_result == 0) + set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); + + set_bit(STRIPE_OP_CHECK, &sh->ops.complete); + set_bit(STRIPE_HANDLE, &sh->state); + release_stripe(sh); +} + +static void ops_run_check(struct stripe_head *sh) +{ + /* kernel stack size limits the total number of disks */ + int disks = sh->disks; + struct page *xor_srcs[disks]; + struct dma_async_tx_descriptor *tx; + + int count = 0, pd_idx = sh->pd_idx, i; + struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; + + pr_debug("%s: stripe %llu\n", __FUNCTION__, + (unsigned long long)sh->sector); + + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (i != pd_idx) + xor_srcs[count++] = dev->page; + } + + tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); + + if (tx) + set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); + else + clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); + + atomic_inc(&sh->count); + tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, + ops_complete_check, sh); +} + +static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) +{ + int overlap_clear = 0, i, disks = sh->disks; + struct dma_async_tx_descriptor *tx = NULL; + + if (test_bit(STRIPE_OP_BIOFILL, &pending)) { + ops_run_biofill(sh); + overlap_clear++; + } + + if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending)) + tx = ops_run_compute5(sh, pending); + + if (test_bit(STRIPE_OP_PREXOR, &pending)) + tx = ops_run_prexor(sh, tx); + + if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { + tx = ops_run_biodrain(sh, tx); + overlap_clear++; + } + + if (test_bit(STRIPE_OP_POSTXOR, &pending)) + ops_run_postxor(sh, tx); + + if (test_bit(STRIPE_OP_CHECK, &pending)) + ops_run_check(sh); + + if (test_bit(STRIPE_OP_IO, &pending)) + ops_run_io(sh); + + if (overlap_clear) + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (test_and_clear_bit(R5_Overlap, &dev->flags)) + wake_up(&sh->raid_conf->wait_for_overlap); + } +} + +static int grow_one_stripe(raid5_conf_t *conf) +{ + struct stripe_head *sh; + sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); + if (!sh) + return 0; + memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); + sh->raid_conf = conf; + spin_lock_init(&sh->lock); + + if (grow_buffers(sh, conf->raid_disks)) { + shrink_buffers(sh, conf->raid_disks); + kmem_cache_free(conf->slab_cache, sh); + return 0; + } + sh->disks = conf->raid_disks; + /* we just created an active stripe so... */ + atomic_set(&sh->count, 1); + atomic_inc(&conf->active_stripes); + INIT_LIST_HEAD(&sh->lru); + release_stripe(sh); + return 1; +} + +static int grow_stripes(raid5_conf_t *conf, int num) +{ + struct kmem_cache *sc; + int devs = conf->raid_disks; + + sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); + sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); + conf->active_name = 0; + sc = kmem_cache_create(conf->cache_name[conf->active_name], + sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), + 0, 0, NULL, NULL); + if (!sc) + return 1; + conf->slab_cache = sc; + conf->pool_size = devs; + while (num--) + if (!grow_one_stripe(conf)) + return 1; + return 0; +} + +#ifdef CONFIG_MD_RAID5_RESHAPE +static int resize_stripes(raid5_conf_t *conf, int newsize) +{ + /* Make all the stripes able to hold 'newsize' devices. + * New slots in each stripe get 'page' set to a new page. + * + * This happens in stages: + * 1/ create a new kmem_cache and allocate the required number of + * stripe_heads. + * 2/ gather all the old stripe_heads and tranfer the pages across + * to the new stripe_heads. This will have the side effect of + * freezing the array as once all stripe_heads have been collected, + * no IO will be possible. Old stripe heads are freed once their + * pages have been transferred over, and the old kmem_cache is + * freed when all stripes are done. + * 3/ reallocate conf->disks to be suitable bigger. If this fails, + * we simple return a failre status - no need to clean anything up. + * 4/ allocate new pages for the new slots in the new stripe_heads. + * If this fails, we don't bother trying the shrink the + * stripe_heads down again, we just leave them as they are. + * As each stripe_head is processed the new one is released into + * active service. + * + * Once step2 is started, we cannot afford to wait for a write, + * so we use GFP_NOIO allocations. + */ + struct stripe_head *osh, *nsh; + LIST_HEAD(newstripes); + struct disk_info *ndisks; + int err = 0; + struct kmem_cache *sc; + int i; + + if (newsize <= conf->pool_size) + return 0; /* never bother to shrink */ + + md_allow_write(conf->mddev); + + /* Step 1 */ + sc = kmem_cache_create(conf->cache_name[1-conf->active_name], + sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), + 0, 0, NULL, NULL); + if (!sc) + return -ENOMEM; + + for (i = conf->max_nr_stripes; i; i--) { + nsh = kmem_cache_alloc(sc, GFP_KERNEL); + if (!nsh) + break; + + memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); + + nsh->raid_conf = conf; + spin_lock_init(&nsh->lock); + + list_add(&nsh->lru, &newstripes); + } + if (i) { + /* didn't get enough, give up */ + while (!list_empty(&newstripes)) { + nsh = list_entry(newstripes.next, struct stripe_head, lru); + list_del(&nsh->lru); + kmem_cache_free(sc, nsh); + } + kmem_cache_destroy(sc); + return -ENOMEM; + } + /* Step 2 - Must use GFP_NOIO now. + * OK, we have enough stripes, start collecting inactive + * stripes and copying them over + */ + list_for_each_entry(nsh, &newstripes, lru) { + spin_lock_irq(&conf->device_lock); + wait_event_lock_irq(conf->wait_for_stripe, + !list_empty(&conf->inactive_list), + conf->device_lock, + unplug_slaves(conf->mddev) + ); + osh = get_free_stripe(conf); + spin_unlock_irq(&conf->device_lock); + atomic_set(&nsh->count, 1); + for(i=0; ipool_size; i++) + nsh->dev[i].page = osh->dev[i].page; + for( ; idev[i].page = NULL; + kmem_cache_free(conf->slab_cache, osh); + } + kmem_cache_destroy(conf->slab_cache); + + /* Step 3. + * At this point, we are holding all the stripes so the array + * is completely stalled, so now is a good time to resize + * conf->disks. + */ + ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); + if (ndisks) { + for (i=0; iraid_disks; i++) + ndisks[i] = conf->disks[i]; + kfree(conf->disks); + conf->disks = ndisks; + } else + err = -ENOMEM; + + /* Step 4, return new stripes to service */ + while(!list_empty(&newstripes)) { + nsh = list_entry(newstripes.next, struct stripe_head, lru); + list_del_init(&nsh->lru); + for (i=conf->raid_disks; i < newsize; i++) + if (nsh->dev[i].page == NULL) { + struct page *p = alloc_page(GFP_NOIO); + nsh->dev[i].page = p; + if (!p) + err = -ENOMEM; + } + release_stripe(nsh); + } + /* critical section pass, GFP_NOIO no longer needed */ + + conf->slab_cache = sc; + conf->active_name = 1-conf->active_name; + conf->pool_size = newsize; + return err; +} +#endif + +static int drop_one_stripe(raid5_conf_t *conf) +{ + struct stripe_head *sh; spin_lock_irq(&conf->device_lock); sh = get_free_stripe(conf); @@ -537,7 +1123,7 @@ if (bi == &sh->dev[i].req) break; - PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", + pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { @@ -613,7 +1199,7 @@ if (bi == &sh->dev[i].req) break; - PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", + pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { @@ -658,7 +1244,7 @@ { char b[BDEVNAME_SIZE]; raid5_conf_t *conf = (raid5_conf_t *) mddev->private; - PRINTK("raid5: error called\n"); + pr_debug("raid5: error called\n"); if (!test_bit(Faulty, &rdev->flags)) { set_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -918,135 +1504,11 @@ #define check_xor() do { \ if (count == MAX_XOR_BLOCKS) { \ - xor_block(count, STRIPE_SIZE, ptr); \ - count = 1; \ + xor_blocks(count, STRIPE_SIZE, dest, ptr);\ + count = 0; \ } \ } while(0) - -static void compute_block(struct stripe_head *sh, int dd_idx) -{ - int i, count, disks = sh->disks; - void *ptr[MAX_XOR_BLOCKS], *p; - - PRINTK("compute_block, stripe %llu, idx %d\n", - (unsigned long long)sh->sector, dd_idx); - - ptr[0] = page_address(sh->dev[dd_idx].page); - memset(ptr[0], 0, STRIPE_SIZE); - count = 1; - for (i = disks ; i--; ) { - if (i == dd_idx) - continue; - p = page_address(sh->dev[i].page); - if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) - ptr[count++] = p; - else - printk(KERN_ERR "compute_block() %d, stripe %llu, %d" - " not present\n", dd_idx, - (unsigned long long)sh->sector, i); - - check_xor(); - } - if (count != 1) - xor_block(count, STRIPE_SIZE, ptr); - set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); -} - -static void compute_parity5(struct stripe_head *sh, int method) -{ - raid5_conf_t *conf = sh->raid_conf; - int i, pd_idx = sh->pd_idx, disks = sh->disks, count; - void *ptr[MAX_XOR_BLOCKS]; - struct bio *chosen; - - PRINTK("compute_parity5, stripe %llu, method %d\n", - (unsigned long long)sh->sector, method); - - count = 1; - ptr[0] = page_address(sh->dev[pd_idx].page); - switch(method) { - case READ_MODIFY_WRITE: - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags)); - for (i=disks ; i-- ;) { - if (i==pd_idx) - continue; - if (sh->dev[i].towrite && - test_bit(R5_UPTODATE, &sh->dev[i].flags)) { - ptr[count++] = page_address(sh->dev[i].page); - chosen = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - - BUG_ON(sh->dev[i].written); - sh->dev[i].written = chosen; - check_xor(); - } - } - break; - case RECONSTRUCT_WRITE: - memset(ptr[0], 0, STRIPE_SIZE); - for (i= disks; i-- ;) - if (i!=pd_idx && sh->dev[i].towrite) { - chosen = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - - BUG_ON(sh->dev[i].written); - sh->dev[i].written = chosen; - } - break; - case CHECK_PARITY: - break; - } - if (count>1) { - xor_block(count, STRIPE_SIZE, ptr); - count = 1; - } - - for (i = disks; i--;) - if (sh->dev[i].written) { - sector_t sector = sh->dev[i].sector; - struct bio *wbi = sh->dev[i].written; - while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { - copy_data(1, wbi, sh->dev[i].page, sector); - wbi = r5_next_bio(wbi, sector); - } - - set_bit(R5_LOCKED, &sh->dev[i].flags); - set_bit(R5_UPTODATE, &sh->dev[i].flags); - } - - switch(method) { - case RECONSTRUCT_WRITE: - case CHECK_PARITY: - for (i=disks; i--;) - if (i != pd_idx) { - ptr[count++] = page_address(sh->dev[i].page); - check_xor(); - } - break; - case READ_MODIFY_WRITE: - for (i = disks; i--;) - if (sh->dev[i].written) { - ptr[count++] = page_address(sh->dev[i].page); - check_xor(); - } - } - if (count != 1) - xor_block(count, STRIPE_SIZE, ptr); - - if (method != CHECK_PARITY) { - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); - set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); - } else - clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); -} - static void compute_parity6(struct stripe_head *sh, int method) { raid6_conf_t *conf = sh->raid_conf; @@ -1058,7 +1520,7 @@ qd_idx = raid6_next_disk(pd_idx, disks); d0_idx = raid6_next_disk(qd_idx, disks); - PRINTK("compute_parity, stripe %llu, method %d\n", + pr_debug("compute_parity, stripe %llu, method %d\n", (unsigned long long)sh->sector, method); switch(method) { @@ -1132,20 +1594,20 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) { int i, count, disks = sh->disks; - void *ptr[MAX_XOR_BLOCKS], *p; + void *ptr[MAX_XOR_BLOCKS], *dest, *p; int pd_idx = sh->pd_idx; int qd_idx = raid6_next_disk(pd_idx, disks); - PRINTK("compute_block_1, stripe %llu, idx %d\n", + pr_debug("compute_block_1, stripe %llu, idx %d\n", (unsigned long long)sh->sector, dd_idx); if ( dd_idx == qd_idx ) { /* We're actually computing the Q drive */ compute_parity6(sh, UPDATE_PARITY); } else { - ptr[0] = page_address(sh->dev[dd_idx].page); - if (!nozero) memset(ptr[0], 0, STRIPE_SIZE); - count = 1; + dest = page_address(sh->dev[dd_idx].page); + if (!nozero) memset(dest, 0, STRIPE_SIZE); + count = 0; for (i = disks ; i--; ) { if (i == dd_idx || i == qd_idx) continue; @@ -1159,8 +1621,8 @@ check_xor(); } - if (count != 1) - xor_block(count, STRIPE_SIZE, ptr); + if (count) + xor_blocks(count, STRIPE_SIZE, dest, ptr); if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); } @@ -1183,7 +1645,7 @@ BUG_ON(faila == failb); if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } - PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", + pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); if ( failb == disks-1 ) { @@ -1229,7 +1691,79 @@ } } +static int +handle_write_operations5(struct stripe_head *sh, int rcw, int expand) +{ + int i, pd_idx = sh->pd_idx, disks = sh->disks; + int locked = 0; + + if (rcw) { + /* if we are not expanding this is a proper write request, and + * there will be bios with new data to be drained into the + * stripe cache + */ + if (!expand) { + set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); + sh->ops.count++; + } + + set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); + sh->ops.count++; + + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + + if (dev->towrite) { + set_bit(R5_LOCKED, &dev->flags); + if (!expand) + clear_bit(R5_UPTODATE, &dev->flags); + locked++; + } + } + } else { + BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || + test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); + + set_bit(STRIPE_OP_PREXOR, &sh->ops.pending); + set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); + set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); + + sh->ops.count += 3; + + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (i == pd_idx) + continue; + + /* For a read-modify write there may be blocks that are + * locked for reading while others are ready to be + * written so we distinguish these blocks by the + * R5_Wantprexor bit + */ + if (dev->towrite && + (test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags))) { + set_bit(R5_Wantprexor, &dev->flags); + set_bit(R5_LOCKED, &dev->flags); + clear_bit(R5_UPTODATE, &dev->flags); + locked++; + } + } + } + + /* keep the parity disk locked while asynchronous operations + * are in flight + */ + set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); + clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); + locked++; + + pr_debug("%s: stripe %llu locked: %d pending: %lx\n", + __FUNCTION__, (unsigned long long)sh->sector, + locked, sh->ops.pending); + return locked; +} /* * Each stripe/dev can have one or more bion attached. @@ -1242,7 +1776,7 @@ raid5_conf_t *conf = sh->raid_conf; int firstwrite=0; - PRINTK("adding bh b#%llu to stripe s#%llu\n", + pr_debug("adding bh b#%llu to stripe s#%llu\n", (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector); @@ -1271,7 +1805,7 @@ spin_unlock_irq(&conf->device_lock); spin_unlock(&sh->lock); - PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n", + pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector, dd_idx); @@ -1326,116 +1860,14 @@ return pd_idx; } - -/* - * handle_stripe - do things to a stripe. - * - * We lock the stripe and then examine the state of various bits - * to see what needs to be done. - * Possible results: - * return some read request which now have data - * return some write requests which are safely on disc - * schedule a read on some buffers - * schedule a write of some buffers - * return confirmation of parity correctness - * - * Parity calculations are done inside the stripe lock - * buffers are taken off read_list or write_list, and bh_cache buffers - * get BH_Lock set before the stripe lock is released. - * - */ - -static void handle_stripe5(struct stripe_head *sh) +static void +handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, + struct stripe_head_state *s, int disks, + struct bio **return_bi) { - raid5_conf_t *conf = sh->raid_conf; - int disks = sh->disks; - struct bio *return_bi= NULL; - struct bio *bi; int i; - int syncing, expanding, expanded; - int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; - int non_overwrite = 0; - int failed_num=0; - struct r5dev *dev; - - PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n", - (unsigned long long)sh->sector, atomic_read(&sh->count), - sh->pd_idx); - - spin_lock(&sh->lock); - clear_bit(STRIPE_HANDLE, &sh->state); - clear_bit(STRIPE_DELAYED, &sh->state); - - syncing = test_bit(STRIPE_SYNCING, &sh->state); - expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); - expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); - /* Now to look around and see what can be done */ - - rcu_read_lock(); - for (i=disks; i--; ) { - mdk_rdev_t *rdev; - dev = &sh->dev[i]; - clear_bit(R5_Insync, &dev->flags); - - PRINTK("check %d: state 0x%lx read %p write %p written %p\n", - i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { - struct bio *rbi, *rbi2; - PRINTK("Return read for disc %d\n", i); - spin_lock_irq(&conf->device_lock); - rbi = dev->toread; - dev->toread = NULL; - if (test_and_clear_bit(R5_Overlap, &dev->flags)) - wake_up(&conf->wait_for_overlap); - spin_unlock_irq(&conf->device_lock); - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { - copy_data(0, rbi, dev->page, dev->sector); - rbi2 = r5_next_bio(rbi, dev->sector); - spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { - rbi->bi_next = return_bi; - return_bi = rbi; - } - spin_unlock_irq(&conf->device_lock); - rbi = rbi2; - } - } - - /* now count some things */ - if (test_bit(R5_LOCKED, &dev->flags)) locked++; - if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; - - - if (dev->toread) to_read++; - if (dev->towrite) { - to_write++; - if (!test_bit(R5_OVERWRITE, &dev->flags)) - non_overwrite++; - } - if (dev->written) written++; - rdev = rcu_dereference(conf->disks[i].rdev); - if (!rdev || !test_bit(In_sync, &rdev->flags)) { - /* The ReadError flag will just be confusing now */ - clear_bit(R5_ReadError, &dev->flags); - clear_bit(R5_ReWrite, &dev->flags); - } - if (!rdev || !test_bit(In_sync, &rdev->flags) - || test_bit(R5_ReadError, &dev->flags)) { - failed++; - failed_num = i; - } else - set_bit(R5_Insync, &dev->flags); - } - rcu_read_unlock(); - PRINTK("locked=%d uptodate=%d to_read=%d" - " to_write=%d failed=%d failed_num=%d\n", - locked, uptodate, to_read, to_write, failed, failed_num); - /* check if the array has lost two devices and, if so, some requests might - * need to be failed - */ - if (failed > 1 && to_read+to_write+written) { - for (i=disks; i--; ) { + for (i = disks; i--; ) { + struct bio *bi; int bitmap_end = 0; if (test_bit(R5_ReadError, &sh->dev[i].flags)) { @@ -1447,23 +1879,26 @@ md_error(conf->mddev, rdev); rcu_read_unlock(); } - spin_lock_irq(&conf->device_lock); /* fail all writes first */ bi = sh->dev[i].towrite; sh->dev[i].towrite = NULL; - if (bi) { to_write--; bitmap_end = 1; } + if (bi) { + s->to_write--; + bitmap_end = 1; + } if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ + while (bi && bi->bi_sector < + sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); if (--bi->bi_phys_segments == 0) { md_write_end(conf->mddev); - bi->bi_next = return_bi; - return_bi = bi; + bi->bi_next = *return_bi; + *return_bi = bi; } bi = nextbi; } @@ -1471,78 +1906,235 @@ bi = sh->dev[i].written; sh->dev[i].written = NULL; if (bi) bitmap_end = 1; - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { + while (bi && bi->bi_sector < + sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); if (--bi->bi_phys_segments == 0) { md_write_end(conf->mddev); - bi->bi_next = return_bi; - return_bi = bi; + bi->bi_next = *return_bi; + *return_bi = bi; + } + bi = bi2; + } + + /* fail any reads if this device is non-operational and + * the data has not reached the cache yet. + */ + if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && + (!test_bit(R5_Insync, &sh->dev[i].flags) || + test_bit(R5_ReadError, &sh->dev[i].flags))) { + bi = sh->dev[i].toread; + sh->dev[i].toread = NULL; + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + wake_up(&conf->wait_for_overlap); + if (bi) s->to_read--; + while (bi && bi->bi_sector < + sh->dev[i].sector + STRIPE_SECTORS) { + struct bio *nextbi = + r5_next_bio(bi, sh->dev[i].sector); + clear_bit(BIO_UPTODATE, &bi->bi_flags); + if (--bi->bi_phys_segments == 0) { + bi->bi_next = *return_bi; + *return_bi = bi; + } + bi = nextbi; + } + } + spin_unlock_irq(&conf->device_lock); + if (bitmap_end) + bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0, 0); + } + +} + +/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks + * to process + */ +static int __handle_issuing_new_read_requests5(struct stripe_head *sh, + struct stripe_head_state *s, int disk_idx, int disks) +{ + struct r5dev *dev = &sh->dev[disk_idx]; + struct r5dev *failed_dev = &sh->dev[s->failed_num]; + + /* don't schedule compute operations or reads on the parity block while + * a check is in flight + */ + if ((disk_idx == sh->pd_idx) && + test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) + return ~0; + + /* is the data in this block needed, and can we get it? */ + if (!test_bit(R5_LOCKED, &dev->flags) && + !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || + (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || + s->syncing || s->expanding || (s->failed && + (failed_dev->toread || (failed_dev->towrite && + !test_bit(R5_OVERWRITE, &failed_dev->flags) + ))))) { + /* 1/ We would like to get this block, possibly by computing it, + * but we might not be able to. + * + * 2/ Since parity check operations potentially make the parity + * block !uptodate it will need to be refreshed before any + * compute operations on data disks are scheduled. + * + * 3/ We hold off parity block re-reads until check operations + * have quiesced. + */ + if ((s->uptodate == disks - 1) && + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { + set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + set_bit(R5_Wantcompute, &dev->flags); + sh->ops.target = disk_idx; + s->req_compute = 1; + sh->ops.count++; + /* Careful: from this point on 'uptodate' is in the eye + * of raid5_run_ops which services 'compute' operations + * before writes. R5_Wantcompute flags a block that will + * be R5_UPTODATE by the time it is needed for a + * subsequent operation. + */ + s->uptodate++; + return 0; /* uptodate + compute == disks */ + } else if ((s->uptodate < disks - 1) && + test_bit(R5_Insync, &dev->flags)) { + /* Note: we hold off compute operations while checks are + * in flight, but we still prefer 'compute' over 'read' + * hence we only read if (uptodate < * disks-1) + */ + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + s->locked++; + pr_debug("Reading block %d (sync=%d)\n", disk_idx, + s->syncing); } - bi = bi2; } - /* fail any reads if this device is non-operational */ - if (!test_bit(R5_Insync, &sh->dev[i].flags) || - test_bit(R5_ReadError, &sh->dev[i].flags)) { - bi = sh->dev[i].toread; - sh->dev[i].toread = NULL; - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - if (bi) to_read--; - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { - bi->bi_next = return_bi; - return_bi = bi; - } - bi = nextbi; + return ~0; +} + +static void handle_issuing_new_read_requests5(struct stripe_head *sh, + struct stripe_head_state *s, int disks) +{ + int i; + + /* Clear completed compute operations. Parity recovery + * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled + * later on in this routine + */ + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && + !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + } + + /* look for blocks to read/compute, skip this if a compute + * is already in flight, or if the stripe contents are in the + * midst of changing due to a write + */ + if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && + !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) && + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { + for (i = disks; i--; ) + if (__handle_issuing_new_read_requests5( + sh, s, i, disks) == 0) + break; } + set_bit(STRIPE_HANDLE, &sh->state); +} + +static void handle_issuing_new_read_requests6(struct stripe_head *sh, + struct stripe_head_state *s, struct r6_state *r6s, + int disks) +{ + int i; + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (!test_bit(R5_LOCKED, &dev->flags) && + !test_bit(R5_UPTODATE, &dev->flags) && + (dev->toread || (dev->towrite && + !test_bit(R5_OVERWRITE, &dev->flags)) || + s->syncing || s->expanding || + (s->failed >= 1 && + (sh->dev[r6s->failed_num[0]].toread || + s->to_write)) || + (s->failed >= 2 && + (sh->dev[r6s->failed_num[1]].toread || + s->to_write)))) { + /* we would like to get this block, possibly + * by computing it, but we might not be able to + */ + if (s->uptodate == disks-1) { + pr_debug("Computing stripe %llu block %d\n", + (unsigned long long)sh->sector, i); + compute_block_1(sh, i, 0); + s->uptodate++; + } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { + /* Computing 2-failure is *very* expensive; only + * do it if failed >= 2 + */ + int other; + for (other = disks; other--; ) { + if (other == i) + continue; + if (!test_bit(R5_UPTODATE, + &sh->dev[other].flags)) + break; } - spin_unlock_irq(&conf->device_lock); - if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0, 0); + BUG_ON(other < 0); + pr_debug("Computing stripe %llu blocks %d,%d\n", + (unsigned long long)sh->sector, + i, other); + compute_block_2(sh, i, other); + s->uptodate += 2; + } else if (test_bit(R5_Insync, &dev->flags)) { + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + s->locked++; + pr_debug("Reading block %d (sync=%d)\n", + i, s->syncing); } } - if (failed > 1 && syncing) { - md_done_sync(conf->mddev, STRIPE_SECTORS,0); - clear_bit(STRIPE_SYNCING, &sh->state); - syncing = 0; } + set_bit(STRIPE_HANDLE, &sh->state); +} - /* might be able to return some write requests if the parity block - * is safe, or on a failed drive - */ - dev = &sh->dev[sh->pd_idx]; - if ( written && - ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && - test_bit(R5_UPTODATE, &dev->flags)) - || (failed == 1 && failed_num == sh->pd_idx)) - ) { - /* any written block on an uptodate or failed drive can be returned. + +/* handle_completed_write_requests + * any written block on an uptodate or failed drive can be returned. * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but * never LOCKED, so we don't need to test 'failed' directly. */ - for (i=disks; i--; ) +static void handle_completed_write_requests(raid5_conf_t *conf, + struct stripe_head *sh, int disks, struct bio **return_bi) +{ + int i; + struct r5dev *dev; + + for (i = disks; i--; ) if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) && - test_bit(R5_UPTODATE, &dev->flags) ) { + test_bit(R5_UPTODATE, &dev->flags)) { /* We can return any write requests */ struct bio *wbi, *wbi2; int bitmap_end = 0; - PRINTK("Return write for disc %d\n", i); + pr_debug("Return write for disc %d\n", i); spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL; - while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { + while (wbi && wbi->bi_sector < + dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) { md_write_end(conf->mddev); - wbi->bi_next = return_bi; - return_bi = wbi; + wbi->bi_next = *return_bi; + *return_bi = wbi; } wbi = wbi2; } @@ -1550,89 +2142,63 @@ bitmap_end = 1; spin_unlock_irq(&conf->device_lock); if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, sh->sector, + bitmap_endwrite(conf->mddev->bitmap, + sh->sector, STRIPE_SECTORS, - !test_bit(STRIPE_DEGRADED, &sh->state), 0); - } - } - } - - /* Now we might consider reading some blocks, either to check/generate - * parity, or to satisfy requests - * or to load a block that is being partially written. - */ - if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) { - for (i=disks; i--;) { - dev = &sh->dev[i]; - if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && - (dev->toread || - (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || - syncing || - expanding || - (failed && (sh->dev[failed_num].toread || - (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags)))) - ) - ) { - /* we would like to get this block, possibly - * by computing it, but we might not be able to - */ - if (uptodate == disks-1) { - PRINTK("Computing block %d\n", i); - compute_block(sh, i); - uptodate++; - } else if (test_bit(R5_Insync, &dev->flags)) { - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - locked++; - PRINTK("Reading block %d (sync=%d)\n", - i, syncing); - } + !test_bit(STRIPE_DEGRADED, &sh->state), + 0); } } - set_bit(STRIPE_HANDLE, &sh->state); - } +} - /* now to consider writing and what else, if anything should be read */ - if (to_write) { - int rmw=0, rcw=0; - for (i=disks ; i--;) { +static void handle_issuing_new_write_requests5(raid5_conf_t *conf, + struct stripe_head *sh, struct stripe_head_state *s, int disks) +{ + int rmw = 0, rcw = 0, i; + for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ - dev = &sh->dev[i]; + struct r5dev *dev = &sh->dev[i]; if ((dev->towrite || i == sh->pd_idx) && - (!test_bit(R5_LOCKED, &dev->flags) - ) && - !test_bit(R5_UPTODATE, &dev->flags)) { - if (test_bit(R5_Insync, &dev->flags) -/* && !(!mddev->insync && i == sh->pd_idx) */ - ) + !test_bit(R5_LOCKED, &dev->flags) && + !(test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags))) { + if (test_bit(R5_Insync, &dev->flags)) rmw++; - else rmw += 2*disks; /* cannot read it */ + else + rmw += 2*disks; /* cannot read it */ } /* Would I have to read this buffer for reconstruct_write */ if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && - (!test_bit(R5_LOCKED, &dev->flags) - ) && - !test_bit(R5_UPTODATE, &dev->flags)) { + !test_bit(R5_LOCKED, &dev->flags) && + !(test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags))) { if (test_bit(R5_Insync, &dev->flags)) rcw++; - else rcw += 2*disks; + else + rcw += 2*disks; } } - PRINTK("for sector %llu, rmw=%d rcw=%d\n", + pr_debug("for sector %llu, rmw=%d rcw=%d\n", (unsigned long long)sh->sector, rmw, rcw); set_bit(STRIPE_HANDLE, &sh->state); if (rmw < rcw && rmw > 0) /* prefer read-modify-write, but need to get some data */ - for (i=disks; i--;) { - dev = &sh->dev[i]; + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; if ((dev->towrite || i == sh->pd_idx) && - !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && + !test_bit(R5_LOCKED, &dev->flags) && + !(test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags)) && test_bit(R5_Insync, &dev->flags)) { - if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - { - PRINTK("Read_old block %d for r-m-w\n", i); + if ( + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + pr_debug("Read_old block " + "%d for r-m-w\n", i); set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); - locked++; + if (!test_and_set_bit( + STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + s->locked++; } else { set_bit(STRIPE_DELAYED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); @@ -1641,165 +2207,367 @@ } if (rcw <= rmw && rcw > 0) /* want reconstruct write, but need to get some data */ - for (i=disks; i--;) { - dev = &sh->dev[i]; - if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && - !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (!test_bit(R5_OVERWRITE, &dev->flags) && + i != sh->pd_idx && + !test_bit(R5_LOCKED, &dev->flags) && + !(test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags)) && test_bit(R5_Insync, &dev->flags)) { - if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - { - PRINTK("Read_old block %d for Reconstruct\n", i); + if ( + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + pr_debug("Read_old block " + "%d for Reconstruct\n", i); set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); - locked++; + if (!test_and_set_bit( + STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + s->locked++; + } else { + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); + } + } + } + /* now if nothing is locked, and if we have enough data, + * we can start a write request + */ + /* since handle_stripe can be called at any time we need to handle the + * case where a compute block operation has been submitted and then a + * subsequent call wants to start a write request. raid5_run_ops only + * handles the case where compute block and postxor are requested + * simultaneously. If this is not the case then new writes need to be + * held off until the compute completes. + */ + if ((s->req_compute || + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) && + (s->locked == 0 && (rcw == 0 || rmw == 0) && + !test_bit(STRIPE_BIT_DELAY, &sh->state))) + s->locked += handle_write_operations5(sh, rcw == 0, 0); +} + +static void handle_issuing_new_write_requests6(raid5_conf_t *conf, + struct stripe_head *sh, struct stripe_head_state *s, + struct r6_state *r6s, int disks) +{ + int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; + int qd_idx = r6s->qd_idx; + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + /* Would I have to read this buffer for reconstruct_write */ + if (!test_bit(R5_OVERWRITE, &dev->flags) + && i != pd_idx && i != qd_idx + && (!test_bit(R5_LOCKED, &dev->flags) + ) && + !test_bit(R5_UPTODATE, &dev->flags)) { + if (test_bit(R5_Insync, &dev->flags)) rcw++; + else { + pr_debug("raid6: must_compute: " + "disk %d flags=%#lx\n", i, dev->flags); + must_compute++; + } + } + } + pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", + (unsigned long long)sh->sector, rcw, must_compute); + set_bit(STRIPE_HANDLE, &sh->state); + + if (rcw > 0) + /* want reconstruct write, but need to get some data */ + for (i = disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + if (!test_bit(R5_OVERWRITE, &dev->flags) + && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) + && !test_bit(R5_LOCKED, &dev->flags) && + !test_bit(R5_UPTODATE, &dev->flags) && + test_bit(R5_Insync, &dev->flags)) { + if ( + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + pr_debug("Read_old stripe %llu " + "block %d for Reconstruct\n", + (unsigned long long)sh->sector, i); + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + s->locked++; } else { + pr_debug("Request delayed stripe %llu " + "block %d for Reconstruct\n", + (unsigned long long)sh->sector, i); set_bit(STRIPE_DELAYED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); } } } - /* now if nothing is locked, and if we have enough data, we can start a write request */ - if (locked == 0 && (rcw == 0 ||rmw == 0) && + /* now if nothing is locked, and if we have enough data, we can start a + * write request + */ + if (s->locked == 0 && rcw == 0 && !test_bit(STRIPE_BIT_DELAY, &sh->state)) { - PRINTK("Computing parity...\n"); - compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); + if (must_compute > 0) { + /* We have failed blocks and need to compute them */ + switch (s->failed) { + case 0: + BUG(); + case 1: + compute_block_1(sh, r6s->failed_num[0], 0); + break; + case 2: + compute_block_2(sh, r6s->failed_num[0], + r6s->failed_num[1]); + break; + default: /* This request should have been failed? */ + BUG(); + } + } + + pr_debug("Computing parity for stripe %llu\n", + (unsigned long long)sh->sector); + compute_parity6(sh, RECONSTRUCT_WRITE); /* now every locked buffer is ready to be written */ - for (i=disks; i--;) + for (i = disks; i--; ) if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { - PRINTK("Writing block %d\n", i); - locked++; + pr_debug("Writing stripe %llu block %d\n", + (unsigned long long)sh->sector, i); + s->locked++; set_bit(R5_Wantwrite, &sh->dev[i].flags); - if (!test_bit(R5_Insync, &sh->dev[i].flags) - || (i==sh->pd_idx && failed == 0)) - set_bit(STRIPE_INSYNC, &sh->state); } + /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ + set_bit(STRIPE_INSYNC, &sh->state); + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); - if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) + if (atomic_read(&conf->preread_active_stripes) < + IO_THRESHOLD) md_wakeup_thread(conf->mddev->thread); } } - } +} - /* maybe we need to check and possibly fix the parity for this stripe - * Any reads will already have been scheduled, so we just see if enough data - * is available - */ - if (syncing && locked == 0 && - !test_bit(STRIPE_INSYNC, &sh->state)) { +static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, + struct stripe_head_state *s, int disks) +{ set_bit(STRIPE_HANDLE, &sh->state); - if (failed == 0) { - BUG_ON(uptodate != disks); - compute_parity5(sh, CHECK_PARITY); - uptodate--; - if (page_is_zero(sh->dev[sh->pd_idx].page)) { - /* parity is correct (on disc, not in buffer any more) */ + /* Take one of the following actions: + * 1/ start a check parity operation if (uptodate == disks) + * 2/ finish a check parity operation and act on the result + * 3/ skip to the writeback section if we previously + * initiated a recovery operation + */ + if (s->failed == 0 && + !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { + BUG_ON(s->uptodate != disks); + clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); + sh->ops.count++; + s->uptodate--; + } else if ( + test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); + + if (sh->ops.zero_sum_result == 0) + /* parity is correct (on disc, + * not in buffer any more) + */ set_bit(STRIPE_INSYNC, &sh->state); - } else { - conf->mddev->resync_mismatches += STRIPE_SECTORS; - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + else { + conf->mddev->resync_mismatches += + STRIPE_SECTORS; + if (test_bit( + MD_RECOVERY_CHECK, &conf->mddev->recovery)) /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); else { - compute_block(sh, sh->pd_idx); - uptodate++; + set_bit(STRIPE_OP_COMPUTE_BLK, + &sh->ops.pending); + set_bit(STRIPE_OP_MOD_REPAIR_PD, + &sh->ops.pending); + set_bit(R5_Wantcompute, + &sh->dev[sh->pd_idx].flags); + sh->ops.target = sh->pd_idx; + sh->ops.count++; + s->uptodate++; + } + } } } + + /* check if we can clear a parity disk reconstruct */ + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); } - if (!test_bit(STRIPE_INSYNC, &sh->state)) { + + /* Wait for check parity and compute block operations to complete + * before write-back + */ + if (!test_bit(STRIPE_INSYNC, &sh->state) && + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { + struct r5dev *dev; /* either failed parity check, or recovery is happening */ - if (failed==0) - failed_num = sh->pd_idx; - dev = &sh->dev[failed_num]; + if (s->failed == 0) + s->failed_num = sh->pd_idx; + dev = &sh->dev[s->failed_num]; BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); - BUG_ON(uptodate != disks); + BUG_ON(s->uptodate != disks); set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + clear_bit(STRIPE_DEGRADED, &sh->state); - locked++; + s->locked++; set_bit(STRIPE_INSYNC, &sh->state); } +} + + +static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, + struct stripe_head_state *s, + struct r6_state *r6s, struct page *tmp_page, + int disks) +{ + int update_p = 0, update_q = 0; + struct r5dev *dev; + int pd_idx = sh->pd_idx; + int qd_idx = r6s->qd_idx; + + set_bit(STRIPE_HANDLE, &sh->state); + + BUG_ON(s->failed > 2); + BUG_ON(s->uptodate < disks); + /* Want to check and possibly repair P and Q. + * However there could be one 'failed' device, in which + * case we can only check one of them, possibly using the + * other to generate missing data + */ + + /* If !tmp_page, we cannot do the calculations, + * but as we have set STRIPE_HANDLE, we will soon be called + * by stripe_handle with a tmp_page - just wait until then. + */ + if (tmp_page) { + if (s->failed == r6s->q_failed) { + /* The only possible failed device holds 'Q', so it + * makes sense to check P (If anything else were failed, + * we would have used P to recreate it). + */ + compute_block_1(sh, pd_idx, 1); + if (!page_is_zero(sh->dev[pd_idx].page)) { + compute_block_1(sh, pd_idx, 0); + update_p = 1; + } } - if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { - md_done_sync(conf->mddev, STRIPE_SECTORS,1); - clear_bit(STRIPE_SYNCING, &sh->state); + if (!r6s->q_failed && s->failed < 2) { + /* q is not failed, and we didn't use it to generate + * anything, so it makes sense to check it + */ + memcpy(page_address(tmp_page), + page_address(sh->dev[qd_idx].page), + STRIPE_SIZE); + compute_parity6(sh, UPDATE_PARITY); + if (memcmp(page_address(tmp_page), + page_address(sh->dev[qd_idx].page), + STRIPE_SIZE) != 0) { + clear_bit(STRIPE_INSYNC, &sh->state); + update_q = 1; + } + } + if (update_p || update_q) { + conf->mddev->resync_mismatches += STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + update_p = update_q = 0; } - /* If the failed drive is just a ReadError, then we might need to progress - * the repair/check process + /* now write out any block on a failed drive, + * or P or Q if they need it */ - if (failed == 1 && ! conf->mddev->ro && - test_bit(R5_ReadError, &sh->dev[failed_num].flags) - && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags) - && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags) - ) { - dev = &sh->dev[failed_num]; - if (!test_bit(R5_ReWrite, &dev->flags)) { + + if (s->failed == 2) { + dev = &sh->dev[r6s->failed_num[1]]; + s->locked++; + set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_ReWrite, &dev->flags); + } + if (s->failed >= 1) { + dev = &sh->dev[r6s->failed_num[0]]; + s->locked++; set_bit(R5_LOCKED, &dev->flags); - locked++; - } else { - /* let's read it back */ - set_bit(R5_Wantread, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + } + + if (update_p) { + dev = &sh->dev[pd_idx]; + s->locked++; set_bit(R5_LOCKED, &dev->flags); - locked++; + set_bit(R5_Wantwrite, &dev->flags); } + if (update_q) { + dev = &sh->dev[qd_idx]; + s->locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); } + clear_bit(STRIPE_DEGRADED, &sh->state); - if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { - /* Need to write out all blocks after computing parity */ - sh->disks = conf->raid_disks; - sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks); - compute_parity5(sh, RECONSTRUCT_WRITE); - for (i= conf->raid_disks; i--;) { - set_bit(R5_LOCKED, &sh->dev[i].flags); - locked++; - set_bit(R5_Wantwrite, &sh->dev[i].flags); - } - clear_bit(STRIPE_EXPANDING, &sh->state); - } else if (expanded) { - clear_bit(STRIPE_EXPAND_READY, &sh->state); - atomic_dec(&conf->reshape_stripes); - wake_up(&conf->wait_for_overlap); - md_done_sync(conf->mddev, STRIPE_SECTORS, 1); + set_bit(STRIPE_INSYNC, &sh->state); } +} + +static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, + struct r6_state *r6s) +{ + int i; - if (expanding && locked == 0) { /* We have read all the blocks in this stripe and now we need to * copy some of them into a target stripe for expand. */ + struct dma_async_tx_descriptor *tx = NULL; clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); - for (i=0; i< sh->disks; i++) - if (i != sh->pd_idx) { + for (i = 0; i < sh->disks; i++) + if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) { int dd_idx, pd_idx, j; struct stripe_head *sh2; sector_t bn = compute_blocknr(sh, i); sector_t s = raid5_compute_sector(bn, conf->raid_disks, - conf->raid_disks-1, - &dd_idx, &pd_idx, conf); - sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1); + conf->raid_disks - + conf->max_degraded, &dd_idx, + &pd_idx, conf); + sh2 = get_active_stripe(conf, s, conf->raid_disks, + pd_idx, 1); if (sh2 == NULL) /* so far only the early blocks of this stripe * have been requested. When later blocks * get requested, we will try again */ continue; - if(!test_bit(STRIPE_EXPANDING, &sh2->state) || + if (!test_bit(STRIPE_EXPANDING, &sh2->state) || test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { /* must have already done this block */ release_stripe(sh2); continue; } - memcpy(page_address(sh2->dev[dd_idx].page), - page_address(sh->dev[i].page), - STRIPE_SIZE); + + /* place all the copies on one channel */ + tx = async_memcpy(sh2->dev[dd_idx].page, + sh->dev[i].page, 0, 0, STRIPE_SIZE, + ASYNC_TX_DEP_ACK, tx, NULL, NULL); + set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); - for (j=0; jraid_disks; j++) + for (j = 0; j < conf->raid_disks; j++) if (j != sh2->pd_idx && + (r6s && j != r6s->qd_idx) && !test_bit(R5_Expanded, &sh2->dev[j].flags)) break; if (j == conf->raid_disks) { @@ -1807,153 +2575,91 @@ set_bit(STRIPE_HANDLE, &sh2->state); } release_stripe(sh2); - } - } - - spin_unlock(&sh->lock); - - while ((bi=return_bi)) { - int bytes = bi->bi_size; - - return_bi = bi->bi_next; - bi->bi_next = NULL; - bi->bi_size = 0; - bi->bi_end_io(bi, bytes, - test_bit(BIO_UPTODATE, &bi->bi_flags) - ? 0 : -EIO); - } - for (i=disks; i-- ;) { - int rw; - struct bio *bi; - mdk_rdev_t *rdev; - if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) - rw = WRITE; - else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) - rw = READ; - else - continue; - - bi = &sh->dev[i].req; - - bi->bi_rw = rw; - if (rw == WRITE) - bi->bi_end_io = raid5_end_write_request; - else - bi->bi_end_io = raid5_end_read_request; - - rcu_read_lock(); - rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && test_bit(Faulty, &rdev->flags)) - rdev = NULL; - if (rdev) - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - - if (rdev) { - if (syncing || expanding || expanded) - md_sync_acct(rdev->bdev, STRIPE_SECTORS); - bi->bi_bdev = rdev->bdev; - PRINTK("for %llu schedule op %ld on disc %d\n", - (unsigned long long)sh->sector, bi->bi_rw, i); - atomic_inc(&sh->count); - bi->bi_sector = sh->sector + rdev->data_offset; - bi->bi_flags = 1 << BIO_UPTODATE; - bi->bi_vcnt = 1; - bi->bi_max_vecs = 1; - bi->bi_idx = 0; - bi->bi_io_vec = &sh->dev[i].vec; - bi->bi_io_vec[0].bv_len = STRIPE_SIZE; - bi->bi_io_vec[0].bv_offset = 0; - bi->bi_size = STRIPE_SIZE; - bi->bi_next = NULL; - if (rw == WRITE && - test_bit(R5_ReWrite, &sh->dev[i].flags)) - atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); - generic_make_request(bi); - } else { - if (rw == WRITE) - set_bit(STRIPE_DEGRADED, &sh->state); - PRINTK("skip op %ld on disc %d for sector %llu\n", - bi->bi_rw, i, (unsigned long long)sh->sector); - clear_bit(R5_LOCKED, &sh->dev[i].flags); - set_bit(STRIPE_HANDLE, &sh->state); + /* done submitting copies, wait for them to complete */ + if (i + 1 >= sh->disks) { + async_tx_ack(tx); + dma_wait_for_async_tx(tx); } } } -static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) +/* + * handle_stripe - do things to a stripe. + * + * We lock the stripe and then examine the state of various bits + * to see what needs to be done. + * Possible results: + * return some read request which now have data + * return some write requests which are safely on disc + * schedule a read on some buffers + * schedule a write of some buffers + * return confirmation of parity correctness + * + * buffers are taken off read_list or write_list, and bh_cache buffers + * get BH_Lock set before the stripe lock is released. + * + */ + +static void handle_stripe5(struct stripe_head *sh) { - raid6_conf_t *conf = sh->raid_conf; - int disks = sh->disks; - struct bio *return_bi= NULL; - struct bio *bi; - int i; - int syncing, expanding, expanded; - int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; - int non_overwrite = 0; - int failed_num[2] = {0, 0}; - struct r5dev *dev, *pdev, *qdev; - int pd_idx = sh->pd_idx; - int qd_idx = raid6_next_disk(pd_idx, disks); - int p_failed, q_failed; + raid5_conf_t *conf = sh->raid_conf; + int disks = sh->disks, i; + struct bio *return_bi = NULL, *bi; + struct stripe_head_state s; + struct r5dev *dev; + unsigned long pending = 0; - PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n", - (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count), - pd_idx, qd_idx); + memset(&s, 0, sizeof(s)); + pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " + "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state, + atomic_read(&sh->count), sh->pd_idx, + sh->ops.pending, sh->ops.ack, sh->ops.complete); spin_lock(&sh->lock); clear_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); - syncing = test_bit(STRIPE_SYNCING, &sh->state); - expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); - expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + s.syncing = test_bit(STRIPE_SYNCING, &sh->state); + s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); + s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); /* Now to look around and see what can be done */ rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; - dev = &sh->dev[i]; + struct r5dev *dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); - PRINTK("check %d: state 0x%lx read %p write %p written %p\n", - i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { - struct bio *rbi, *rbi2; - PRINTK("Return read for disc %d\n", i); - spin_lock_irq(&conf->device_lock); - rbi = dev->toread; - dev->toread = NULL; - if (test_and_clear_bit(R5_Overlap, &dev->flags)) - wake_up(&conf->wait_for_overlap); - spin_unlock_irq(&conf->device_lock); - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { - copy_data(0, rbi, dev->page, dev->sector); - rbi2 = r5_next_bio(rbi, dev->sector); - spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { - rbi->bi_next = return_bi; - return_bi = rbi; - } - spin_unlock_irq(&conf->device_lock); - rbi = rbi2; - } - } - - /* now count some things */ - if (test_bit(R5_LOCKED, &dev->flags)) locked++; - if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; + pr_debug("check %d: state 0x%lx toread %p read %p write %p " + "written %p\n", i, dev->flags, dev->toread, dev->read, + dev->towrite, dev->written); + /* maybe we can request a biofill operation + * + * new wantfill requests are only permitted while + * STRIPE_OP_BIOFILL is clear + */ + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && + !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) + set_bit(R5_Wantfill, &dev->flags); - if (dev->toread) to_read++; + /* now count some things */ + if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; + if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; + if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; + + if (test_bit(R5_Wantfill, &dev->flags)) + s.to_fill++; + else if (dev->toread) + s.to_read++; if (dev->towrite) { - to_write++; + s.to_write++; if (!test_bit(R5_OVERWRITE, &dev->flags)) - non_overwrite++; + s.non_overwrite++; } - if (dev->written) written++; + if (dev->written) + s.written++; rdev = rcu_dereference(conf->disks[i].rdev); if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -1962,376 +2668,361 @@ } if (!rdev || !test_bit(In_sync, &rdev->flags) || test_bit(R5_ReadError, &dev->flags)) { - if ( failed < 2 ) - failed_num[failed] = i; - failed++; + s.failed++; + s.failed_num = i; } else set_bit(R5_Insync, &dev->flags); } rcu_read_unlock(); - PRINTK("locked=%d uptodate=%d to_read=%d" - " to_write=%d failed=%d failed_num=%d,%d\n", - locked, uptodate, to_read, to_write, failed, - failed_num[0], failed_num[1]); - /* check if the array has lost >2 devices and, if so, some requests might - * need to be failed - */ - if (failed > 2 && to_read+to_write+written) { - for (i=disks; i--; ) { - int bitmap_end = 0; - - if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - mdk_rdev_t *rdev; - rcu_read_lock(); - rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && test_bit(In_sync, &rdev->flags)) - /* multiple read failures in one stripe */ - md_error(conf->mddev, rdev); - rcu_read_unlock(); - } - - spin_lock_irq(&conf->device_lock); - /* fail all writes first */ - bi = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - if (bi) { to_write--; bitmap_end = 1; } - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { - md_write_end(conf->mddev); - bi->bi_next = return_bi; - return_bi = bi; - } - bi = nextbi; - } - /* and fail all 'written' */ - bi = sh->dev[i].written; - sh->dev[i].written = NULL; - if (bi) bitmap_end = 1; - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { - struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { - md_write_end(conf->mddev); - bi->bi_next = return_bi; - return_bi = bi; - } - bi = bi2; - } + if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) + sh->ops.count++; - /* fail any reads if this device is non-operational */ - if (!test_bit(R5_Insync, &sh->dev[i].flags) || - test_bit(R5_ReadError, &sh->dev[i].flags)) { - bi = sh->dev[i].toread; - sh->dev[i].toread = NULL; - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - if (bi) to_read--; - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { - bi->bi_next = return_bi; - return_bi = bi; - } - bi = nextbi; - } - } - spin_unlock_irq(&conf->device_lock); - if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0, 0); - } - } - if (failed > 2 && syncing) { + pr_debug("locked=%d uptodate=%d to_read=%d" + " to_write=%d failed=%d failed_num=%d\n", + s.locked, s.uptodate, s.to_read, s.to_write, + s.failed, s.failed_num); + /* check if the array has lost two devices and, if so, some requests might + * need to be failed + */ + if (s.failed > 1 && s.to_read+s.to_write+s.written) + handle_requests_to_failed_array(conf, sh, &s, disks, + &return_bi); + if (s.failed > 1 && s.syncing) { md_done_sync(conf->mddev, STRIPE_SECTORS,0); clear_bit(STRIPE_SYNCING, &sh->state); - syncing = 0; + s.syncing = 0; } - /* - * might be able to return some write requests if the parity blocks - * are safe, or on a failed drive - */ - pdev = &sh->dev[pd_idx]; - p_failed = (failed >= 1 && failed_num[0] == pd_idx) - || (failed >= 2 && failed_num[1] == pd_idx); - qdev = &sh->dev[qd_idx]; - q_failed = (failed >= 1 && failed_num[0] == qd_idx) - || (failed >= 2 && failed_num[1] == qd_idx); - - if ( written && - ( p_failed || ((test_bit(R5_Insync, &pdev->flags) - && !test_bit(R5_LOCKED, &pdev->flags) - && test_bit(R5_UPTODATE, &pdev->flags))) ) && - ( q_failed || ((test_bit(R5_Insync, &qdev->flags) - && !test_bit(R5_LOCKED, &qdev->flags) - && test_bit(R5_UPTODATE, &qdev->flags))) ) ) { - /* any written block on an uptodate or failed drive can be - * returned. Note that if we 'wrote' to a failed drive, - * it will be UPTODATE, but never LOCKED, so we don't need - * to test 'failed' directly. + /* might be able to return some write requests if the parity block + * is safe, or on a failed drive */ - for (i=disks; i--; ) - if (sh->dev[i].written) { - dev = &sh->dev[i]; - if (!test_bit(R5_LOCKED, &dev->flags) && - test_bit(R5_UPTODATE, &dev->flags) ) { - /* We can return any write requests */ - int bitmap_end = 0; - struct bio *wbi, *wbi2; - PRINTK("Return write for stripe %llu disc %d\n", - (unsigned long long)sh->sector, i); - spin_lock_irq(&conf->device_lock); - wbi = dev->written; - dev->written = NULL; - while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); - if (--wbi->bi_phys_segments == 0) { - md_write_end(conf->mddev); - wbi->bi_next = return_bi; - return_bi = wbi; - } - wbi = wbi2; - } - if (dev->towrite == NULL) - bitmap_end = 1; - spin_unlock_irq(&conf->device_lock); - if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, - !test_bit(STRIPE_DEGRADED, &sh->state), 0); - } - } - } + dev = &sh->dev[sh->pd_idx]; + if ( s.written && + ((test_bit(R5_Insync, &dev->flags) && + !test_bit(R5_LOCKED, &dev->flags) && + test_bit(R5_UPTODATE, &dev->flags)) || + (s.failed == 1 && s.failed_num == sh->pd_idx))) + handle_completed_write_requests(conf, sh, disks, &return_bi); /* Now we might consider reading some blocks, either to check/generate * parity, or to satisfy requests * or to load a block that is being partially written. */ - if (to_read || non_overwrite || (to_write && failed) || - (syncing && (uptodate < disks)) || expanding) { - for (i=disks; i--;) { - dev = &sh->dev[i]; - if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && - (dev->toread || - (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || - syncing || - expanding || - (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) || - (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write)) - ) - ) { - /* we would like to get this block, possibly - * by computing it, but we might not be able to + if (s.to_read || s.non_overwrite || + (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding || + test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) + handle_issuing_new_read_requests5(sh, &s, disks); + + /* Now we check to see if any write operations have recently + * completed */ - if (uptodate == disks-1) { - PRINTK("Computing stripe %llu block %d\n", - (unsigned long long)sh->sector, i); - compute_block_1(sh, i, 0); - uptodate++; - } else if ( uptodate == disks-2 && failed >= 2 ) { - /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */ - int other; - for (other=disks; other--;) { - if ( other == i ) - continue; - if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) ) - break; - } - BUG_ON(other < 0); - PRINTK("Computing stripe %llu blocks %d,%d\n", - (unsigned long long)sh->sector, i, other); - compute_block_2(sh, i, other); - uptodate += 2; - } else if (test_bit(R5_Insync, &dev->flags)) { - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - locked++; - PRINTK("Reading block %d (sync=%d)\n", - i, syncing); - } - } - } - set_bit(STRIPE_HANDLE, &sh->state); + + /* leave prexor set until postxor is done, allows us to distinguish + * a rmw from a rcw during biodrain + */ + if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && + test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { + + clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); + clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); + clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); + + for (i = disks; i--; ) + clear_bit(R5_Wantprexor, &sh->dev[i].flags); } - /* now to consider writing and what else, if anything should be read */ - if (to_write) { - int rcw=0, must_compute=0; - for (i=disks ; i--;) { + /* if only POSTXOR is set then this is an 'expand' postxor */ + if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) && + test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { + + clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); + clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack); + clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); + + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); + + /* All the 'written' buffers and the parity block are ready to + * be written back to disk + */ + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); + for (i = disks; i--; ) { dev = &sh->dev[i]; - /* Would I have to read this buffer for reconstruct_write */ - if (!test_bit(R5_OVERWRITE, &dev->flags) - && i != pd_idx && i != qd_idx - && (!test_bit(R5_LOCKED, &dev->flags) - ) && - !test_bit(R5_UPTODATE, &dev->flags)) { - if (test_bit(R5_Insync, &dev->flags)) rcw++; - else { - PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags); - must_compute++; + if (test_bit(R5_LOCKED, &dev->flags) && + (i == sh->pd_idx || dev->written)) { + pr_debug("Writing block %d\n", i); + set_bit(R5_Wantwrite, &dev->flags); + if (!test_and_set_bit( + STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + if (!test_bit(R5_Insync, &dev->flags) || + (i == sh->pd_idx && s.failed == 0)) + set_bit(STRIPE_INSYNC, &sh->state); + } } + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + atomic_dec(&conf->preread_active_stripes); + if (atomic_read(&conf->preread_active_stripes) < + IO_THRESHOLD) + md_wakeup_thread(conf->mddev->thread); } } - PRINTK("for sector %llu, rcw=%d, must_compute=%d\n", - (unsigned long long)sh->sector, rcw, must_compute); - set_bit(STRIPE_HANDLE, &sh->state); - if (rcw > 0) - /* want reconstruct write, but need to get some data */ - for (i=disks; i--;) { - dev = &sh->dev[i]; - if (!test_bit(R5_OVERWRITE, &dev->flags) - && !(failed == 0 && (i == pd_idx || i == qd_idx)) - && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && - test_bit(R5_Insync, &dev->flags)) { - if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - { - PRINTK("Read_old stripe %llu block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); + /* Now to consider new write requests and what else, if anything + * should be read. We do not handle new writes when: + * 1/ A 'write' operation (copy+xor) is already in flight. + * 2/ A 'check' operation is in flight, as it may clobber the parity + * block. + */ + if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) && + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) + handle_issuing_new_write_requests5(conf, sh, &s, disks); + + /* maybe we need to check and possibly fix the parity for this stripe + * Any reads will already have been scheduled, so we just see if enough + * data is available. The parity check is held off while parity + * dependent operations are in flight. + */ + if ((s.syncing && s.locked == 0 && + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && + !test_bit(STRIPE_INSYNC, &sh->state)) || + test_bit(STRIPE_OP_CHECK, &sh->ops.pending) || + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) + handle_parity_checks5(conf, sh, &s, disks); + + if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { + md_done_sync(conf->mddev, STRIPE_SECTORS,1); + clear_bit(STRIPE_SYNCING, &sh->state); + } + + /* If the failed drive is just a ReadError, then we might need to progress + * the repair/check process + */ + if (s.failed == 1 && !conf->mddev->ro && + test_bit(R5_ReadError, &sh->dev[s.failed_num].flags) + && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags) + && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags) + ) { + dev = &sh->dev[s.failed_num]; + if (!test_bit(R5_ReWrite, &dev->flags)) { + set_bit(R5_Wantwrite, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + set_bit(R5_ReWrite, &dev->flags); set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - locked++; + s.locked++; } else { - PRINTK("Request delayed stripe %llu block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); + /* let's read it back */ + set_bit(R5_Wantread, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + set_bit(R5_LOCKED, &dev->flags); + s.locked++; } } + + /* Finish postxor operations initiated by the expansion + * process + */ + if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) && + !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) { + + clear_bit(STRIPE_EXPANDING, &sh->state); + + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); + + for (i = conf->raid_disks; i--; ) { + set_bit(R5_Wantwrite, &sh->dev[i].flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; } - /* now if nothing is locked, and if we have enough data, we can start a write request */ - if (locked == 0 && rcw == 0 && - !test_bit(STRIPE_BIT_DELAY, &sh->state)) { - if ( must_compute > 0 ) { - /* We have failed blocks and need to compute them */ - switch ( failed ) { - case 0: BUG(); - case 1: compute_block_1(sh, failed_num[0], 0); break; - case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break; - default: BUG(); /* This request should have been failed? */ } + + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { + /* Need to write out all blocks after computing parity */ + sh->disks = conf->raid_disks; + sh->pd_idx = stripe_to_pdidx(sh->sector, conf, + conf->raid_disks); + s.locked += handle_write_operations5(sh, 0, 1); + } else if (s.expanded && + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { + clear_bit(STRIPE_EXPAND_READY, &sh->state); + atomic_dec(&conf->reshape_stripes); + wake_up(&conf->wait_for_overlap); + md_done_sync(conf->mddev, STRIPE_SECTORS, 1); } - PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector); - compute_parity6(sh, RECONSTRUCT_WRITE); - /* now every locked buffer is ready to be written */ - for (i=disks; i--;) - if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { - PRINTK("Writing stripe %llu block %d\n", - (unsigned long long)sh->sector, i); - locked++; - set_bit(R5_Wantwrite, &sh->dev[i].flags); + if (s.expanding && s.locked == 0) + handle_stripe_expansion(conf, sh, NULL); + + if (sh->ops.count) + pending = get_stripe_work(sh); + + spin_unlock(&sh->lock); + + if (pending) + raid5_run_ops(sh, pending); + + while ((bi=return_bi)) { + int bytes = bi->bi_size; + + return_bi = bi->bi_next; + bi->bi_next = NULL; + bi->bi_size = 0; + bi->bi_end_io(bi, bytes, + test_bit(BIO_UPTODATE, &bi->bi_flags) + ? 0 : -EIO); } - /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ - set_bit(STRIPE_INSYNC, &sh->state); +} - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - atomic_dec(&conf->preread_active_stripes); - if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) - md_wakeup_thread(conf->mddev->thread); +static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) +{ + raid6_conf_t *conf = sh->raid_conf; + int disks = sh->disks; + struct bio *return_bi = NULL; + struct bio *bi; + int i, pd_idx = sh->pd_idx; + struct stripe_head_state s; + struct r6_state r6s; + struct r5dev *dev, *pdev, *qdev; + + r6s.qd_idx = raid6_next_disk(pd_idx, disks); + pr_debug("handling stripe %llu, state=%#lx cnt=%d, " + "pd_idx=%d, qd_idx=%d\n", + (unsigned long long)sh->sector, sh->state, + atomic_read(&sh->count), pd_idx, r6s.qd_idx); + memset(&s, 0, sizeof(s)); + + spin_lock(&sh->lock); + clear_bit(STRIPE_HANDLE, &sh->state); + clear_bit(STRIPE_DELAYED, &sh->state); + + s.syncing = test_bit(STRIPE_SYNCING, &sh->state); + s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); + s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + /* Now to look around and see what can be done */ + + rcu_read_lock(); + for (i=disks; i--; ) { + mdk_rdev_t *rdev; + dev = &sh->dev[i]; + clear_bit(R5_Insync, &dev->flags); + + pr_debug("check %d: state 0x%lx read %p write %p written %p\n", + i, dev->flags, dev->toread, dev->towrite, dev->written); + /* maybe we can reply to a read */ + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { + struct bio *rbi, *rbi2; + pr_debug("Return read for disc %d\n", i); + spin_lock_irq(&conf->device_lock); + rbi = dev->toread; + dev->toread = NULL; + if (test_and_clear_bit(R5_Overlap, &dev->flags)) + wake_up(&conf->wait_for_overlap); + spin_unlock_irq(&conf->device_lock); + while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { + copy_data(0, rbi, dev->page, dev->sector); + rbi2 = r5_next_bio(rbi, dev->sector); + spin_lock_irq(&conf->device_lock); + if (--rbi->bi_phys_segments == 0) { + rbi->bi_next = return_bi; + return_bi = rbi; } + spin_unlock_irq(&conf->device_lock); + rbi = rbi2; } } - /* maybe we need to check and possibly fix the parity for this stripe - * Any reads will already have been scheduled, so we just see if enough data - * is available - */ - if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) { - int update_p = 0, update_q = 0; - struct r5dev *dev; - - set_bit(STRIPE_HANDLE, &sh->state); + /* now count some things */ + if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; + if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; - BUG_ON(failed>2); - BUG_ON(uptodate < disks); - /* Want to check and possibly repair P and Q. - * However there could be one 'failed' device, in which - * case we can only check one of them, possibly using the - * other to generate missing data - */ - /* If !tmp_page, we cannot do the calculations, - * but as we have set STRIPE_HANDLE, we will soon be called - * by stripe_handle with a tmp_page - just wait until then. - */ - if (tmp_page) { - if (failed == q_failed) { - /* The only possible failed device holds 'Q', so it makes - * sense to check P (If anything else were failed, we would - * have used P to recreate it). - */ - compute_block_1(sh, pd_idx, 1); - if (!page_is_zero(sh->dev[pd_idx].page)) { - compute_block_1(sh,pd_idx,0); - update_p = 1; - } + if (dev->toread) + s.to_read++; + if (dev->towrite) { + s.to_write++; + if (!test_bit(R5_OVERWRITE, &dev->flags)) + s.non_overwrite++; } - if (!q_failed && failed < 2) { - /* q is not failed, and we didn't use it to generate - * anything, so it makes sense to check it - */ - memcpy(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE); - compute_parity6(sh, UPDATE_PARITY); - if (memcmp(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE)!= 0) { - clear_bit(STRIPE_INSYNC, &sh->state); - update_q = 1; + if (dev->written) + s.written++; + rdev = rcu_dereference(conf->disks[i].rdev); + if (!rdev || !test_bit(In_sync, &rdev->flags)) { + /* The ReadError flag will just be confusing now */ + clear_bit(R5_ReadError, &dev->flags); + clear_bit(R5_ReWrite, &dev->flags); } + if (!rdev || !test_bit(In_sync, &rdev->flags) + || test_bit(R5_ReadError, &dev->flags)) { + if (s.failed < 2) + r6s.failed_num[s.failed] = i; + s.failed++; + } else + set_bit(R5_Insync, &dev->flags); } - if (update_p || update_q) { - conf->mddev->resync_mismatches += STRIPE_SECTORS; - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) - /* don't try to repair!! */ - update_p = update_q = 0; + rcu_read_unlock(); + pr_debug("locked=%d uptodate=%d to_read=%d" + " to_write=%d failed=%d failed_num=%d,%d\n", + s.locked, s.uptodate, s.to_read, s.to_write, s.failed, + r6s.failed_num[0], r6s.failed_num[1]); + /* check if the array has lost >2 devices and, if so, some requests + * might need to be failed + */ + if (s.failed > 2 && s.to_read+s.to_write+s.written) + handle_requests_to_failed_array(conf, sh, &s, disks, + &return_bi); + if (s.failed > 2 && s.syncing) { + md_done_sync(conf->mddev, STRIPE_SECTORS,0); + clear_bit(STRIPE_SYNCING, &sh->state); + s.syncing = 0; } - /* now write out any block on a failed drive, - * or P or Q if they need it + /* + * might be able to return some write requests if the parity blocks + * are safe, or on a failed drive */ + pdev = &sh->dev[pd_idx]; + r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) + || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); + qdev = &sh->dev[r6s.qd_idx]; + r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx) + || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx); - if (failed == 2) { - dev = &sh->dev[failed_num[1]]; - locked++; - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantwrite, &dev->flags); - } - if (failed >= 1) { - dev = &sh->dev[failed_num[0]]; - locked++; - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantwrite, &dev->flags); - } + if ( s.written && + ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) + && !test_bit(R5_LOCKED, &pdev->flags) + && test_bit(R5_UPTODATE, &pdev->flags)))) && + ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) + && !test_bit(R5_LOCKED, &qdev->flags) + && test_bit(R5_UPTODATE, &qdev->flags))))) + handle_completed_write_requests(conf, sh, disks, &return_bi); - if (update_p) { - dev = &sh->dev[pd_idx]; - locked ++; - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantwrite, &dev->flags); - } - if (update_q) { - dev = &sh->dev[qd_idx]; - locked++; - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantwrite, &dev->flags); - } - clear_bit(STRIPE_DEGRADED, &sh->state); + /* Now we might consider reading some blocks, either to check/generate + * parity, or to satisfy requests + * or to load a block that is being partially written. + */ + if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || + (s.syncing && (s.uptodate < disks)) || s.expanding) + handle_issuing_new_read_requests6(sh, &s, &r6s, disks); - set_bit(STRIPE_INSYNC, &sh->state); - } - } + /* now to consider writing and what else, if anything should be read */ + if (s.to_write) + handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks); + + /* maybe we need to check and possibly fix the parity for this stripe + * Any reads will already have been scheduled, so we just see if enough + * data is available + */ + if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) + handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); - if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { + if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS,1); clear_bit(STRIPE_SYNCING, &sh->state); } @@ -2339,9 +3030,9 @@ /* If the failed drives are just a ReadError, then we might need * to progress the repair/check process */ - if (failed <= 2 && ! conf->mddev->ro) - for (i=0; idev[failed_num[i]]; + if (s.failed <= 2 && !conf->mddev->ro) + for (i = 0; i < s.failed; i++) { + dev = &sh->dev[r6s.failed_num[i]]; if (test_bit(R5_ReadError, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && test_bit(R5_UPTODATE, &dev->flags) @@ -2358,7 +3049,7 @@ } } - if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { /* Need to write out all blocks after computing P&Q */ sh->disks = conf->raid_disks; sh->pd_idx = stripe_to_pdidx(sh->sector, conf, @@ -2366,69 +3057,19 @@ compute_parity6(sh, RECONSTRUCT_WRITE); for (i = conf->raid_disks ; i-- ; ) { set_bit(R5_LOCKED, &sh->dev[i].flags); - locked++; + s.locked++; set_bit(R5_Wantwrite, &sh->dev[i].flags); } clear_bit(STRIPE_EXPANDING, &sh->state); - } else if (expanded) { + } else if (s.expanded) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); wake_up(&conf->wait_for_overlap); md_done_sync(conf->mddev, STRIPE_SECTORS, 1); } - if (expanding && locked == 0) { - /* We have read all the blocks in this stripe and now we need to - * copy some of them into a target stripe for expand. - */ - clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); - for (i = 0; i < sh->disks ; i++) - if (i != pd_idx && i != qd_idx) { - int dd_idx2, pd_idx2, j; - struct stripe_head *sh2; - - sector_t bn = compute_blocknr(sh, i); - sector_t s = raid5_compute_sector( - bn, conf->raid_disks, - conf->raid_disks - conf->max_degraded, - &dd_idx2, &pd_idx2, conf); - sh2 = get_active_stripe(conf, s, - conf->raid_disks, - pd_idx2, 1); - if (sh2 == NULL) - /* so for only the early blocks of - * this stripe have been requests. - * When later blocks get requests, we - * will try again - */ - continue; - if (!test_bit(STRIPE_EXPANDING, &sh2->state) || - test_bit(R5_Expanded, - &sh2->dev[dd_idx2].flags)) { - /* must have already done this block */ - release_stripe(sh2); - continue; - } - memcpy(page_address(sh2->dev[dd_idx2].page), - page_address(sh->dev[i].page), - STRIPE_SIZE); - set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags); - set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags); - for (j = 0 ; j < conf->raid_disks ; j++) - if (j != sh2->pd_idx && - j != raid6_next_disk(sh2->pd_idx, - sh2->disks) && - !test_bit(R5_Expanded, - &sh2->dev[j].flags)) - break; - if (j == conf->raid_disks) { - set_bit(STRIPE_EXPAND_READY, - &sh2->state); - set_bit(STRIPE_HANDLE, &sh2->state); - } - release_stripe(sh2); - } - } + if (s.expanding && s.locked == 0) + handle_stripe_expansion(conf, sh, &r6s); spin_unlock(&sh->lock); @@ -2470,11 +3111,11 @@ rcu_read_unlock(); if (rdev) { - if (syncing || expanding || expanded) + if (s.syncing || s.expanding || s.expanded) md_sync_acct(rdev->bdev, STRIPE_SECTORS); bi->bi_bdev = rdev->bdev; - PRINTK("for %llu schedule op %ld on disc %d\n", + pr_debug("for %llu schedule op %ld on disc %d\n", (unsigned long long)sh->sector, bi->bi_rw, i); atomic_inc(&sh->count); bi->bi_sector = sh->sector + rdev->data_offset; @@ -2494,7 +3135,7 @@ } else { if (rw == WRITE) set_bit(STRIPE_DEGRADED, &sh->state); - PRINTK("skip op %ld on disc %d for sector %llu\n", + pr_debug("skip op %ld on disc %d for sector %llu\n", bi->bi_rw, i, (unsigned long long)sh->sector); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2738,7 +3379,7 @@ } - PRINTK("raid5_align_endio : io error...handing IO for a retry\n"); + pr_debug("raid5_align_endio : io error...handing IO for a retry\n"); add_bio_to_retry(raid_bi, conf); return 0; @@ -2776,7 +3417,7 @@ mdk_rdev_t *rdev; if (!in_chunk_boundary(mddev, raid_bio)) { - PRINTK("chunk_aligned_read : non aligned\n"); + pr_debug("chunk_aligned_read : non aligned\n"); return 0; } /* @@ -2900,7 +3541,7 @@ new_sector = raid5_compute_sector(logical_sector, disks, data_disks, &dd_idx, &pd_idx, conf); - PRINTK("raid5: make_request, sector %llu logical %llu\n", + pr_debug("raid5: make_request, sector %llu logical %llu\n", (unsigned long long)new_sector, (unsigned long long)logical_sector); @@ -3273,7 +3914,7 @@ raid5_conf_t *conf = mddev_to_conf(mddev); int handled; - PRINTK("+++ raid5d active\n"); + pr_debug("+++ raid5d active\n"); md_check_recovery(mddev); @@ -3308,8 +3949,10 @@ handled++; } - if (list_empty(&conf->handle_list)) + if (list_empty(&conf->handle_list)) { + async_tx_issue_pending_all(); break; + } first = conf->handle_list.next; sh = list_entry(first, struct stripe_head, lru); @@ -3325,13 +3968,13 @@ spin_lock_irq(&conf->device_lock); } - PRINTK("%d stripes handled\n", handled); + pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); unplug_slaves(mddev); - PRINTK("--- raid5d inactive\n"); + pr_debug("--- raid5d inactive\n"); } static ssize_t @@ -3507,7 +4150,7 @@ atomic_set(&conf->preread_active_stripes, 0); atomic_set(&conf->active_aligned_reads, 0); - PRINTK("raid5: run(%s) called.\n", mdname(mddev)); + pr_debug("raid5: run(%s) called.\n", mdname(mddev)); ITERATE_RDEV(mddev,rdev,tmp) { raid_disk = rdev->raid_disk; @@ -3690,7 +4333,7 @@ return 0; } -#if RAID5_DEBUG +#ifdef DEBUG static void print_sh (struct seq_file *seq, struct stripe_head *sh) { int i; @@ -3737,7 +4380,7 @@ conf->disks[i].rdev && test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); seq_printf (seq, "]"); -#if RAID5_DEBUG +#ifdef DEBUG seq_printf (seq, "\n"); printall(seq, conf); #endif diff -Nurb linux-2.6.22-570/drivers/md/xor.c linux-2.6.22-591/drivers/md/xor.c --- linux-2.6.22-570/drivers/md/xor.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/md/xor.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,154 +0,0 @@ -/* - * xor.c : Multiple Devices driver for Linux - * - * Copyright (C) 1996, 1997, 1998, 1999, 2000, - * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. - * - * Dispatch optimized RAID-5 checksumming functions. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define BH_TRACE 0 -#include -#include -#include -#include - -/* The xor routines to use. */ -static struct xor_block_template *active_template; - -void -xor_block(unsigned int count, unsigned int bytes, void **ptr) -{ - unsigned long *p0, *p1, *p2, *p3, *p4; - - p0 = (unsigned long *) ptr[0]; - p1 = (unsigned long *) ptr[1]; - if (count == 2) { - active_template->do_2(bytes, p0, p1); - return; - } - - p2 = (unsigned long *) ptr[2]; - if (count == 3) { - active_template->do_3(bytes, p0, p1, p2); - return; - } - - p3 = (unsigned long *) ptr[3]; - if (count == 4) { - active_template->do_4(bytes, p0, p1, p2, p3); - return; - } - - p4 = (unsigned long *) ptr[4]; - active_template->do_5(bytes, p0, p1, p2, p3, p4); -} - -/* Set of all registered templates. */ -static struct xor_block_template *template_list; - -#define BENCH_SIZE (PAGE_SIZE) - -static void -do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) -{ - int speed; - unsigned long now; - int i, count, max; - - tmpl->next = template_list; - template_list = tmpl; - - /* - * Count the number of XORs done during a whole jiffy, and use - * this to calculate the speed of checksumming. We use a 2-page - * allocation to have guaranteed color L1-cache layout. - */ - max = 0; - for (i = 0; i < 5; i++) { - now = jiffies; - count = 0; - while (jiffies == now) { - mb(); - tmpl->do_2(BENCH_SIZE, b1, b2); - mb(); - count++; - mb(); - } - if (count > max) - max = count; - } - - speed = max * (HZ * BENCH_SIZE / 1024); - tmpl->speed = speed; - - printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name, - speed / 1000, speed % 1000); -} - -static int -calibrate_xor_block(void) -{ - void *b1, *b2; - struct xor_block_template *f, *fastest; - - b1 = (void *) __get_free_pages(GFP_KERNEL, 2); - if (! b1) { - printk("raid5: Yikes! No memory available.\n"); - return -ENOMEM; - } - b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; - - /* - * If this arch/cpu has a short-circuited selection, don't loop through all - * the possible functions, just test the best one - */ - - fastest = NULL; - -#ifdef XOR_SELECT_TEMPLATE - fastest = XOR_SELECT_TEMPLATE(fastest); -#endif - -#define xor_speed(templ) do_xor_speed((templ), b1, b2) - - if (fastest) { - printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n", - fastest->name); - xor_speed(fastest); - } else { - printk(KERN_INFO "raid5: measuring checksumming speed\n"); - XOR_TRY_TEMPLATES; - fastest = template_list; - for (f = fastest; f; f = f->next) - if (f->speed > fastest->speed) - fastest = f; - } - - printk("raid5: using function: %s (%d.%03d MB/sec)\n", - fastest->name, fastest->speed / 1000, fastest->speed % 1000); - -#undef xor_speed - - free_pages((unsigned long)b1, 2); - - active_template = fastest; - return 0; -} - -static __exit void xor_exit(void) { } - -EXPORT_SYMBOL(xor_block); -MODULE_LICENSE("GPL"); - -module_init(calibrate_xor_block); -module_exit(xor_exit); diff -Nurb linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c linux-2.6.22-591/drivers/media/dvb/dvb-core/dvb_frontend.c --- linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-12-21 15:36:12.000000000 -0500 @@ -523,6 +523,7 @@ dvb_frontend_init(fe); + set_freezable(); while (1) { up(&fepriv->sem); /* is locked when we enter the thread... */ restart: diff -Nurb linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c linux-2.6.22-591/drivers/media/video/cx88/cx88-tvaudio.c --- linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/media/video/cx88/cx88-tvaudio.c 2007-12-21 15:36:12.000000000 -0500 @@ -906,6 +906,7 @@ u32 mode = 0; dprintk("cx88: tvaudio thread started\n"); + set_freezable(); for (;;) { msleep_interruptible(1000); if (kthread_should_stop()) diff -Nurb linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c linux-2.6.22-591/drivers/media/video/msp3400-kthreads.c --- linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/media/video/msp3400-kthreads.c 2007-12-21 15:36:12.000000000 -0500 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -468,6 +469,7 @@ v4l_dbg(1, msp_debug, client, "msp3400 daemon started\n"); + set_freezable(); for (;;) { v4l_dbg(2, msp_debug, client, "msp3400 thread: sleep\n"); msp_sleep(state, -1); @@ -646,7 +648,7 @@ int val, i, std, count; v4l_dbg(1, msp_debug, client, "msp3410 daemon started\n"); - + set_freezable(); for (;;) { v4l_dbg(2, msp_debug, client, "msp3410 thread: sleep\n"); msp_sleep(state,-1); @@ -940,7 +942,7 @@ int val, i; v4l_dbg(1, msp_debug, client, "msp34xxg daemon started\n"); - + set_freezable(); for (;;) { v4l_dbg(2, msp_debug, client, "msp34xxg thread: sleep\n"); msp_sleep(state, -1); diff -Nurb linux-2.6.22-570/drivers/media/video/tvaudio.c linux-2.6.22-591/drivers/media/video/tvaudio.c --- linux-2.6.22-570/drivers/media/video/tvaudio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/media/video/tvaudio.c 2007-12-21 15:36:12.000000000 -0500 @@ -271,7 +271,7 @@ struct CHIPDESC *desc = chiplist + chip->type; v4l_dbg(1, debug, &chip->c, "%s: thread started\n", chip->c.name); - + set_freezable(); for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) diff -Nurb linux-2.6.22-570/drivers/media/video/video-buf-dvb.c linux-2.6.22-591/drivers/media/video/video-buf-dvb.c --- linux-2.6.22-570/drivers/media/video/video-buf-dvb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/media/video/video-buf-dvb.c 2007-12-21 15:36:12.000000000 -0500 @@ -47,6 +47,7 @@ int err; dprintk("dvb thread started\n"); + set_freezable(); videobuf_read_start(&dvb->dvbq); for (;;) { diff -Nurb linux-2.6.22-570/drivers/media/video/vivi.c linux-2.6.22-591/drivers/media/video/vivi.c --- linux-2.6.22-570/drivers/media/video/vivi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/media/video/vivi.c 2007-12-21 15:36:12.000000000 -0500 @@ -573,6 +573,7 @@ dprintk(1,"thread started\n"); mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT); + set_freezable(); for (;;) { vivi_sleep(dma_q); diff -Nurb linux-2.6.22-570/drivers/message/fusion/linux_compat.h linux-2.6.22-591/drivers/message/fusion/linux_compat.h --- linux-2.6.22-570/drivers/message/fusion/linux_compat.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/linux_compat.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,9 +0,0 @@ -/* drivers/message/fusion/linux_compat.h */ - -#ifndef FUSION_LINUX_COMPAT_H -#define FUSION_LINUX_COMPAT_H - -#include -#include - -#endif /* _LINUX_COMPAT_H */ diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi.h --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,12 +1,12 @@ /* - * Copyright (c) 2000-2006 LSI Logic Corporation. + * Copyright (c) 2000-2007 LSI Logic Corporation. * * * Name: mpi.h * Title: MPI Message independent structures and definitions * Creation Date: July 27, 2000 * - * mpi.h Version: 01.05.12 + * mpi.h Version: 01.05.13 * * Version History * --------------- @@ -78,6 +78,7 @@ * 08-30-05 01.05.10 Added 2 new IOCStatus codes for Target. * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT. * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT. + * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT. * -------------------------------------------------------------------------- */ @@ -108,7 +109,7 @@ /* Note: The major versions of 0xe0 through 0xff are reserved */ /* versioning for this MPI header set */ -#define MPI_HEADER_VERSION_UNIT (0x0E) +#define MPI_HEADER_VERSION_UNIT (0x10) #define MPI_HEADER_VERSION_DEV (0x00) #define MPI_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI_HEADER_VERSION_UNIT_SHIFT (8) diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_cnfg.h --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_cnfg.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,12 +1,12 @@ /* - * Copyright (c) 2000-2006 LSI Logic Corporation. + * Copyright (c) 2000-2007 LSI Logic Corporation. * * * Name: mpi_cnfg.h * Title: MPI Config message, structures, and Pages * Creation Date: July 27, 2000 * - * mpi_cnfg.h Version: 01.05.13 + * mpi_cnfg.h Version: 01.05.15 * * Version History * --------------- @@ -293,6 +293,21 @@ * Added more AccessStatus values for SAS Device Page 0. * Added bit for SATA Asynchronous Notification Support in * Flags field of SAS Device Page 0. + * 02-28-07 01.05.14 Added ExtFlags field to Manufacturing Page 4. + * Added Disable SMART Polling for CapabilitiesFlags of + * IOC Page 6. + * Added Disable SMART Polling to DeviceSettings of BIOS + * Page 1. + * Added Multi-Port Domain bit for DiscoveryStatus field + * of SAS IO Unit Page. + * Added Multi-Port Domain Illegal flag for SAS IO Unit + * Page 1 AdditionalControlFlags field. + * 05-24-07 01.05.15 Added Hide Physical Disks with Non-Integrated RAID + * Metadata bit to Manufacturing Page 4 ExtFlags field. + * Added Internal Connector to End Device Present bit to + * Expander Page 0 Flags field. + * Fixed define for + * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED. * -------------------------------------------------------------------------- */ @@ -639,7 +654,7 @@ U8 InfoSize1; /* 0Bh */ U8 InquirySize; /* 0Ch */ U8 Flags; /* 0Dh */ - U16 Reserved2; /* 0Eh */ + U16 ExtFlags; /* 0Eh */ U8 InquiryData[56]; /* 10h */ U32 ISVolumeSettings; /* 48h */ U32 IMEVolumeSettings; /* 4Ch */ @@ -658,7 +673,7 @@ } CONFIG_PAGE_MANUFACTURING_4, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_4, ManufacturingPage4_t, MPI_POINTER pManufacturingPage4_t; -#define MPI_MANUFACTURING4_PAGEVERSION (0x04) +#define MPI_MANUFACTURING4_PAGEVERSION (0x05) /* defines for the Flags field */ #define MPI_MANPAGE4_FORCE_BAD_BLOCK_TABLE (0x80) @@ -670,6 +685,12 @@ #define MPI_MANPAGE4_IM_RESYNC_CACHE_ENABLE (0x02) #define MPI_MANPAGE4_IR_NO_MIX_SAS_SATA (0x01) +/* defines for the ExtFlags field */ +#define MPI_MANPAGE4_EXTFLAGS_HIDE_NON_IR_METADATA (0x0008) +#define MPI_MANPAGE4_EXTFLAGS_SAS_CACHE_DISABLE (0x0004) +#define MPI_MANPAGE4_EXTFLAGS_SATA_CACHE_DISABLE (0x0002) +#define MPI_MANPAGE4_EXTFLAGS_LEGACY_MODE (0x0001) + #ifndef MPI_MANPAGE5_NUM_FORCEWWID #define MPI_MANPAGE5_NUM_FORCEWWID (1) @@ -781,7 +802,7 @@ } CONFIG_PAGE_MANUFACTURING_9, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_9, ManufacturingPage9_t, MPI_POINTER pManufacturingPage9_t; -#define MPI_MANUFACTURING6_PAGEVERSION (0x00) +#define MPI_MANUFACTURING9_PAGEVERSION (0x00) typedef struct _CONFIG_PAGE_MANUFACTURING_10 @@ -1138,6 +1159,8 @@ /* IOC Page 6 Capabilities Flags */ +#define MPI_IOCPAGE6_CAP_FLAGS_DISABLE_SMART_POLLING (0x00000008) + #define MPI_IOCPAGE6_CAP_FLAGS_MASK_METADATA_SIZE (0x00000006) #define MPI_IOCPAGE6_CAP_FLAGS_64MB_METADATA_SIZE (0x00000000) #define MPI_IOCPAGE6_CAP_FLAGS_512MB_METADATA_SIZE (0x00000002) @@ -1208,6 +1231,7 @@ #define MPI_BIOSPAGE1_IOCSET_ALTERNATE_CHS (0x00000008) /* values for the DeviceSettings field */ +#define MPI_BIOSPAGE1_DEVSET_DISABLE_SMART_POLLING (0x00000010) #define MPI_BIOSPAGE1_DEVSET_DISABLE_SEQ_LUN (0x00000008) #define MPI_BIOSPAGE1_DEVSET_DISABLE_RM_LUN (0x00000004) #define MPI_BIOSPAGE1_DEVSET_DISABLE_NON_RM_LUN (0x00000002) @@ -2281,11 +2305,11 @@ typedef struct _CONFIG_PAGE_RAID_VOL_1 { CONFIG_PAGE_HEADER Header; /* 00h */ - U8 VolumeID; /* 01h */ - U8 VolumeBus; /* 02h */ - U8 VolumeIOC; /* 03h */ - U8 Reserved0; /* 04h */ - U8 GUID[24]; /* 05h */ + U8 VolumeID; /* 04h */ + U8 VolumeBus; /* 05h */ + U8 VolumeIOC; /* 06h */ + U8 Reserved0; /* 07h */ + U8 GUID[24]; /* 08h */ U8 Name[32]; /* 20h */ U64 WWID; /* 40h */ U32 Reserved1; /* 48h */ @@ -2340,7 +2364,7 @@ } RAID_PHYS_DISK0_STATUS, MPI_POINTER PTR_RAID_PHYS_DISK0_STATUS, RaidPhysDiskStatus_t, MPI_POINTER pRaidPhysDiskStatus_t; -/* RAID Volume 2 IM Physical Disk DiskStatus flags */ +/* RAID Physical Disk PhysDiskStatus flags */ #define MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC (0x01) #define MPI_PHYSDISK0_STATUS_FLAG_QUIESCED (0x02) @@ -2544,6 +2568,7 @@ #define MPI_SAS_IOUNIT0_DS_TABLE_LINK (0x00000400) #define MPI_SAS_IOUNIT0_DS_UNSUPPORTED_DEVICE (0x00000800) #define MPI_SAS_IOUNIT0_DS_MAX_SATA_TARGETS (0x00001000) +#define MPI_SAS_IOUNIT0_DS_MULTI_PORT_DOMAIN (0x00002000) typedef struct _MPI_SAS_IO_UNIT1_PHY_DATA @@ -2607,6 +2632,7 @@ #define MPI_SAS_IOUNIT1_CONTROL_CLEAR_AFFILIATION (0x0001) /* values for SAS IO Unit Page 1 AdditionalControlFlags */ +#define MPI_SAS_IOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL (0x0080) #define MPI_SAS_IOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION (0x0040) #define MPI_SAS_IOUNIT1_ACONTROL_HIDE_NONZERO_ATTACHED_PHY_IDENT (0x0020) #define MPI_SAS_IOUNIT1_ACONTROL_PORT_ENABLE_ONLY_SATA_LINK_RESET (0x0010) @@ -2734,6 +2760,7 @@ #define MPI_SAS_EXPANDER0_DS_UNSUPPORTED_DEVICE (0x00000800) /* values for SAS Expander Page 0 Flags field */ +#define MPI_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE (0x04) #define MPI_SAS_EXPANDER0_FLAGS_ROUTE_TABLE_CONFIG (0x02) #define MPI_SAS_EXPANDER0_FLAGS_CONFIG_IN_PROGRESS (0x01) @@ -2774,7 +2801,7 @@ /* see mpi_sas.h for values for SAS Expander Page 1 AttachedDeviceInfo values */ /* values for SAS Expander Page 1 DiscoveryInfo field */ -#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY DISABLED (0x04) +#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED (0x04) #define MPI_SAS_EXPANDER1_DISCINFO_LINK_STATUS_CHANGE (0x02) #define MPI_SAS_EXPANDER1_DISCINFO_NO_ROUTING_ENTRIES (0x01) @@ -2895,11 +2922,11 @@ U8 AttachedPhyIdentifier; /* 16h */ U8 Reserved2; /* 17h */ U32 AttachedDeviceInfo; /* 18h */ - U8 ProgrammedLinkRate; /* 20h */ - U8 HwLinkRate; /* 21h */ - U8 ChangeCount; /* 22h */ - U8 Flags; /* 23h */ - U32 PhyInfo; /* 24h */ + U8 ProgrammedLinkRate; /* 1Ch */ + U8 HwLinkRate; /* 1Dh */ + U8 ChangeCount; /* 1Eh */ + U8 Flags; /* 1Fh */ + U32 PhyInfo; /* 20h */ } CONFIG_PAGE_SAS_PHY_0, MPI_POINTER PTR_CONFIG_PAGE_SAS_PHY_0, SasPhyPage0_t, MPI_POINTER pSasPhyPage0_t; diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt linux-2.6.22-591/drivers/message/fusion/lsi/mpi_history.txt --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_history.txt 2007-12-21 15:36:12.000000000 -0500 @@ -3,28 +3,28 @@ MPI Header File Change History ============================== - Copyright (c) 2000-2006 LSI Logic Corporation. + Copyright (c) 2000-2007 LSI Logic Corporation. --------------------------------------- - Header Set Release Version: 01.05.14 - Header Set Release Date: 10-11-06 + Header Set Release Version: 01.05.16 + Header Set Release Date: 05-24-07 --------------------------------------- Filename Current version Prior version ---------- --------------- ------------- - mpi.h 01.05.12 01.05.11 - mpi_ioc.h 01.05.12 01.05.11 - mpi_cnfg.h 01.05.13 01.05.12 - mpi_init.h 01.05.08 01.05.07 + mpi.h 01.05.13 01.05.12 + mpi_ioc.h 01.05.14 01.05.13 + mpi_cnfg.h 01.05.15 01.05.14 + mpi_init.h 01.05.09 01.05.09 mpi_targ.h 01.05.06 01.05.06 mpi_fc.h 01.05.01 01.05.01 mpi_lan.h 01.05.01 01.05.01 - mpi_raid.h 01.05.02 01.05.02 + mpi_raid.h 01.05.03 01.05.03 mpi_tool.h 01.05.03 01.05.03 mpi_inb.h 01.05.01 01.05.01 - mpi_sas.h 01.05.04 01.05.03 + mpi_sas.h 01.05.04 01.05.04 mpi_type.h 01.05.02 01.05.02 - mpi_history.txt 01.05.14 01.05.13 + mpi_history.txt 01.05.14 01.05.14 * Date Version Description @@ -95,6 +95,7 @@ * 08-30-05 01.05.10 Added 2 new IOCStatus codes for Target. * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT. * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT. + * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT. * -------------------------------------------------------------------------- mpi_ioc.h @@ -191,6 +192,13 @@ * data structure. * Added new ImageType values for FWDownload and FWUpload * requests. + * 02-28-07 01.05.13 Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS + * Broadcast Event Data (replacing _RESERVED2). + * For Discovery Error Event Data DiscoveryStatus field, + * replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and + * added _MULTI_PORT_DOMAIN. + * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request. + * Added Common Boot Block type to FWUpload Request. * -------------------------------------------------------------------------- mpi_cnfg.h @@ -473,6 +481,21 @@ * Added more AccessStatus values for SAS Device Page 0. * Added bit for SATA Asynchronous Notification Support in * Flags field of SAS Device Page 0. + * 02-28-07 01.05.14 Added ExtFlags field to Manufacturing Page 4. + * Added Disable SMART Polling for CapabilitiesFlags of + * IOC Page 6. + * Added Disable SMART Polling to DeviceSettings of BIOS + * Page 1. + * Added Multi-Port Domain bit for DiscoveryStatus field + * of SAS IO Unit Page. + * Added Multi-Port Domain Illegal flag for SAS IO Unit + * Page 1 AdditionalControlFlags field. + * 05-24-07 01.05.15 Added Hide Physical Disks with Non-Integrated RAID + * Metadata bit to Manufacturing Page 4 ExtFlags field. + * Added Internal Connector to End Device Present bit to + * Expander Page 0 Flags field. + * Fixed define for + * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED. * -------------------------------------------------------------------------- mpi_init.h @@ -517,6 +540,8 @@ * unique in the first 32 characters. * 03-27-06 01.05.07 Added Task Management type of Clear ACA. * 10-11-06 01.05.08 Shortened define for Task Management type of Clear ACA. + * 02-28-07 01.05.09 Defined two new MsgFlags bits for SCSI Task Management + * Request: Do Not Send Task IU and Soft Reset Option. * -------------------------------------------------------------------------- mpi_targ.h @@ -571,7 +596,7 @@ * 11-02-00 01.01.01 Original release for post 1.0 work * 12-04-00 01.01.02 Added messages for Common Transport Send and * Primitive Send. - * 01-09-01 01.01.03 Modified some of the new flags to have an MPI prefix + * 01-09-01 01.01.03 Modifed some of the new flags to have an MPI prefix * and modified the FcPrimitiveSend flags. * 01-25-01 01.01.04 Move InitiatorIndex in LinkServiceRsp reply to a larger * field. @@ -634,6 +659,8 @@ * 08-19-04 01.05.01 Original release for MPI v1.5. * 01-15-05 01.05.02 Added defines for the two new RAID Actions for * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE. + * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and + * associated defines. * -------------------------------------------------------------------------- mpi_tool.h @@ -682,7 +709,22 @@ mpi_history.txt Parts list history -Filename 01.05.13 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09 +Filename 01.05.15 01.05.15 +---------- -------- -------- +mpi.h 01.05.12 01.05.13 +mpi_ioc.h 01.05.13 01.05.14 +mpi_cnfg.h 01.05.14 01.05.15 +mpi_init.h 01.05.09 01.05.09 +mpi_targ.h 01.05.06 01.05.06 +mpi_fc.h 01.05.01 01.05.01 +mpi_lan.h 01.05.01 01.05.01 +mpi_raid.h 01.05.03 01.05.03 +mpi_tool.h 01.05.03 01.05.03 +mpi_inb.h 01.05.01 01.05.01 +mpi_sas.h 01.05.04 01.05.04 +mpi_type.h 01.05.02 01.05.02 + +Filename 01.05.14 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09 ---------- -------- -------- -------- -------- -------- -------- mpi.h 01.05.12 01.05.11 01.05.10 01.05.09 01.05.08 01.05.07 mpi_ioc.h 01.05.12 01.05.11 01.05.10 01.05.09 01.05.09 01.05.08 diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_inb.h --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_inb.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2003-2004 LSI Logic Corporation. - * - * - * Name: mpi_inb.h - * Title: MPI Inband structures and definitions - * Creation Date: September 30, 2003 - * - * mpi_inb.h Version: 01.05.01 - * - * Version History - * --------------- - * - * Date Version Description - * -------- -------- ------------------------------------------------------ - * 05-11-04 01.03.01 Original release. - * 08-19-04 01.05.01 Original release for MPI v1.5. - * -------------------------------------------------------------------------- - */ - -#ifndef MPI_INB_H -#define MPI_INB_H - -/****************************************************************************** -* -* I n b a n d M e s s a g e s -* -*******************************************************************************/ - - -/****************************************************************************/ -/* Inband Buffer Post Request */ -/****************************************************************************/ - -typedef struct _MSG_INBAND_BUFFER_POST_REQUEST -{ - U8 Reserved1; /* 00h */ - U8 BufferCount; /* 01h */ - U8 ChainOffset; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U32 Reserved4; /* 0Ch */ - SGE_TRANS_SIMPLE_UNION SGL; /* 10h */ -} MSG_INBAND_BUFFER_POST_REQUEST, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REQUEST, - MpiInbandBufferPostRequest_t , MPI_POINTER pMpiInbandBufferPostRequest_t; - - -typedef struct _WWN_FC_FORMAT -{ - U64 NodeName; /* 00h */ - U64 PortName; /* 08h */ -} WWN_FC_FORMAT, MPI_POINTER PTR_WWN_FC_FORMAT, - WwnFcFormat_t, MPI_POINTER pWwnFcFormat_t; - -typedef struct _WWN_SAS_FORMAT -{ - U64 WorldWideID; /* 00h */ - U32 Reserved1; /* 08h */ - U32 Reserved2; /* 0Ch */ -} WWN_SAS_FORMAT, MPI_POINTER PTR_WWN_SAS_FORMAT, - WwnSasFormat_t, MPI_POINTER pWwnSasFormat_t; - -typedef union _WWN_INBAND_FORMAT -{ - WWN_FC_FORMAT Fc; - WWN_SAS_FORMAT Sas; -} WWN_INBAND_FORMAT, MPI_POINTER PTR_WWN_INBAND_FORMAT, - WwnInbandFormat, MPI_POINTER pWwnInbandFormat; - - -/* Inband Buffer Post reply message */ - -typedef struct _MSG_INBAND_BUFFER_POST_REPLY -{ - U16 Reserved1; /* 00h */ - U8 MsgLength; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U16 Reserved4; /* 0Ch */ - U16 IOCStatus; /* 0Eh */ - U32 IOCLogInfo; /* 10h */ - U32 TransferLength; /* 14h */ - U32 TransactionContext; /* 18h */ - WWN_INBAND_FORMAT Wwn; /* 1Ch */ - U32 IOCIdentifier[4]; /* 2Ch */ -} MSG_INBAND_BUFFER_POST_REPLY, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REPLY, - MpiInbandBufferPostReply_t, MPI_POINTER pMpiInbandBufferPostReply_t; - - -/****************************************************************************/ -/* Inband Send Request */ -/****************************************************************************/ - -typedef struct _MSG_INBAND_SEND_REQUEST -{ - U16 Reserved1; /* 00h */ - U8 ChainOffset; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U32 Reserved4; /* 0Ch */ - WWN_INBAND_FORMAT Wwn; /* 10h */ - U32 Reserved5; /* 20h */ - SGE_IO_UNION SGL; /* 24h */ -} MSG_INBAND_SEND_REQUEST, MPI_POINTER PTR_MSG_INBAND_SEND_REQUEST, - MpiInbandSendRequest_t , MPI_POINTER pMpiInbandSendRequest_t; - - -/* Inband Send reply message */ - -typedef struct _MSG_INBAND_SEND_REPLY -{ - U16 Reserved1; /* 00h */ - U8 MsgLength; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U16 Reserved4; /* 0Ch */ - U16 IOCStatus; /* 0Eh */ - U32 IOCLogInfo; /* 10h */ - U32 ResponseLength; /* 14h */ -} MSG_INBAND_SEND_REPLY, MPI_POINTER PTR_MSG_INBAND_SEND_REPLY, - MpiInbandSendReply_t, MPI_POINTER pMpiInbandSendReply_t; - - -/****************************************************************************/ -/* Inband Response Request */ -/****************************************************************************/ - -typedef struct _MSG_INBAND_RSP_REQUEST -{ - U16 Reserved1; /* 00h */ - U8 ChainOffset; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U32 Reserved4; /* 0Ch */ - WWN_INBAND_FORMAT Wwn; /* 10h */ - U32 IOCIdentifier[4]; /* 20h */ - U32 ResponseLength; /* 30h */ - SGE_IO_UNION SGL; /* 34h */ -} MSG_INBAND_RSP_REQUEST, MPI_POINTER PTR_MSG_INBAND_RSP_REQUEST, - MpiInbandRspRequest_t , MPI_POINTER pMpiInbandRspRequest_t; - - -/* Inband Response reply message */ - -typedef struct _MSG_INBAND_RSP_REPLY -{ - U16 Reserved1; /* 00h */ - U8 MsgLength; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U16 Reserved4; /* 0Ch */ - U16 IOCStatus; /* 0Eh */ - U32 IOCLogInfo; /* 10h */ -} MSG_INBAND_RSP_REPLY, MPI_POINTER PTR_MSG_INBAND_RSP_REPLY, - MpiInbandRspReply_t, MPI_POINTER pMpiInbandRspReply_t; - - -/****************************************************************************/ -/* Inband Abort Request */ -/****************************************************************************/ - -typedef struct _MSG_INBAND_ABORT_REQUEST -{ - U8 Reserved1; /* 00h */ - U8 AbortType; /* 01h */ - U8 ChainOffset; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U32 Reserved4; /* 0Ch */ - U32 ContextToAbort; /* 10h */ -} MSG_INBAND_ABORT_REQUEST, MPI_POINTER PTR_MSG_INBAND_ABORT_REQUEST, - MpiInbandAbortRequest_t , MPI_POINTER pMpiInbandAbortRequest_t; - -#define MPI_INBAND_ABORT_TYPE_ALL_BUFFERS (0x00) -#define MPI_INBAND_ABORT_TYPE_EXACT_BUFFER (0x01) -#define MPI_INBAND_ABORT_TYPE_SEND_REQUEST (0x02) -#define MPI_INBAND_ABORT_TYPE_RESPONSE_REQUEST (0x03) - - -/* Inband Abort reply message */ - -typedef struct _MSG_INBAND_ABORT_REPLY -{ - U8 Reserved1; /* 00h */ - U8 AbortType; /* 01h */ - U8 MsgLength; /* 02h */ - U8 Function; /* 03h */ - U16 Reserved2; /* 04h */ - U8 Reserved3; /* 06h */ - U8 MsgFlags; /* 07h */ - U32 MsgContext; /* 08h */ - U16 Reserved4; /* 0Ch */ - U16 IOCStatus; /* 0Eh */ - U32 IOCLogInfo; /* 10h */ -} MSG_INBAND_ABORT_REPLY, MPI_POINTER PTR_MSG_INBAND_ABORT_REPLY, - MpiInbandAbortReply_t, MPI_POINTER pMpiInbandAbortReply_t; - - -#endif - diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_init.h --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_init.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,12 +1,12 @@ /* - * Copyright (c) 2000-2006 LSI Logic Corporation. + * Copyright (c) 2000-2007 LSI Logic Corporation. * * * Name: mpi_init.h * Title: MPI initiator mode messages and structures * Creation Date: June 8, 2000 * - * mpi_init.h Version: 01.05.08 + * mpi_init.h Version: 01.05.09 * * Version History * --------------- @@ -54,6 +54,8 @@ * unique in the first 32 characters. * 03-27-06 01.05.07 Added Task Management type of Clear ACA. * 10-11-06 01.05.08 Shortened define for Task Management type of Clear ACA. + * 02-28-07 01.05.09 Defined two new MsgFlags bits for SCSI Task Management + * Request: Do Not Send Task IU and Soft Reset Option. * -------------------------------------------------------------------------- */ @@ -432,10 +434,14 @@ #define MPI_SCSITASKMGMT_TASKTYPE_CLR_ACA (0x08) /* MsgFlags bits */ +#define MPI_SCSITASKMGMT_MSGFLAGS_DO_NOT_SEND_TASK_IU (0x01) + #define MPI_SCSITASKMGMT_MSGFLAGS_TARGET_RESET_OPTION (0x00) #define MPI_SCSITASKMGMT_MSGFLAGS_LIP_RESET_OPTION (0x02) #define MPI_SCSITASKMGMT_MSGFLAGS_LIPRESET_RESET_OPTION (0x04) +#define MPI_SCSITASKMGMT_MSGFLAGS_SOFT_RESET_OPTION (0x08) + /* SCSI Task Management Reply */ typedef struct _MSG_SCSI_TASK_MGMT_REPLY { diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_ioc.h --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_ioc.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,12 +1,12 @@ /* - * Copyright (c) 2000-2006 LSI Logic Corporation. + * Copyright (c) 2000-2007 LSI Logic Corporation. * * * Name: mpi_ioc.h * Title: MPI IOC, Port, Event, FW Download, and FW Upload messages * Creation Date: August 11, 2000 * - * mpi_ioc.h Version: 01.05.12 + * mpi_ioc.h Version: 01.05.14 * * Version History * --------------- @@ -106,6 +106,13 @@ * data structure. * Added new ImageType values for FWDownload and FWUpload * requests. + * 02-28-07 01.05.13 Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS + * Broadcast Event Data (replacing _RESERVED2). + * For Discovery Error Event Data DiscoveryStatus field, + * replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and + * added _MULTI_PORT_DOMAIN. + * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request. + * Added Common Boot Block type to FWUpload Request. * -------------------------------------------------------------------------- */ @@ -792,7 +799,7 @@ #define MPI_EVENT_PRIMITIVE_CHANGE (0x01) #define MPI_EVENT_PRIMITIVE_EXPANDER (0x03) -#define MPI_EVENT_PRIMITIVE_RESERVED2 (0x04) +#define MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT (0x04) #define MPI_EVENT_PRIMITIVE_RESERVED3 (0x05) #define MPI_EVENT_PRIMITIVE_RESERVED4 (0x06) #define MPI_EVENT_PRIMITIVE_CHANGE0_RESERVED (0x07) @@ -857,8 +864,9 @@ #define MPI_EVENT_DSCVRY_ERR_DS_SMP_CRC_ERROR (0x00000100) #define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_SUBTRACTIVE (0x00000200) #define MPI_EVENT_DSCVRY_ERR_DS_TABLE_TO_TABLE (0x00000400) -#define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_PATHS (0x00000800) +#define MPI_EVENT_DSCVRY_ERR_DS_UNSUPPORTED_DEVICE (0x00000800) #define MPI_EVENT_DSCVRY_ERR_DS_MAX_SATA_TARGETS (0x00001000) +#define MPI_EVENT_DSCVRY_ERR_DS_MULTI_PORT_DOMAIN (0x00002000) /* SAS SMP Error Event data */ @@ -990,6 +998,7 @@ #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_1 (0x07) #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_2 (0x08) #define MPI_FW_DOWNLOAD_ITYPE_MEGARAID (0x09) +#define MPI_FW_DOWNLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B) typedef struct _FWDownloadTCSGE @@ -1049,6 +1058,7 @@ #define MPI_FW_UPLOAD_ITYPE_CONFIG_2 (0x08) #define MPI_FW_UPLOAD_ITYPE_MEGARAID (0x09) #define MPI_FW_UPLOAD_ITYPE_COMPLETE (0x0A) +#define MPI_FW_UPLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B) typedef struct _FWUploadTCSGE { diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_raid.h --- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_raid.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,12 +1,12 @@ /* - * Copyright (c) 2001-2005 LSI Logic Corporation. + * Copyright (c) 2001-2007 LSI Logic Corporation. * * * Name: mpi_raid.h * Title: MPI RAID message and structures * Creation Date: February 27, 2001 * - * mpi_raid.h Version: 01.05.02 + * mpi_raid.h Version: 01.05.03 * * Version History * --------------- @@ -32,6 +32,8 @@ * 08-19-04 01.05.01 Original release for MPI v1.5. * 01-15-05 01.05.02 Added defines for the two new RAID Actions for * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE. + * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and + * associated defines. * -------------------------------------------------------------------------- */ @@ -90,6 +92,7 @@ #define MPI_RAID_ACTION_INACTIVATE_VOLUME (0x12) #define MPI_RAID_ACTION_SET_RESYNC_RATE (0x13) #define MPI_RAID_ACTION_SET_DATA_SCRUB_RATE (0x14) +#define MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE (0x15) /* ActionDataWord defines for use with MPI_RAID_ACTION_CREATE_VOLUME action */ #define MPI_RAID_ACTION_ADATA_DO_NOT_SYNC (0x00000001) @@ -111,6 +114,10 @@ /* ActionDataWord defines for use with MPI_RAID_ACTION_SET_DATA_SCRUB_RATE action */ #define MPI_RAID_ACTION_ADATA_DATA_SCRUB_RATE_MASK (0x000000FF) +/* ActionDataWord defines for use with MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE action */ +#define MPI_RAID_ACTION_ADATA_ENABLE_FW_UPDATE (0x00000001) +#define MPI_RAID_ACTION_ADATA_MASK_FW_UPDATE_TIMEOUT (0x0000FF00) +#define MPI_RAID_ACTION_ADATA_SHIFT_FW_UPDATE_TIMEOUT (8) /* RAID Action reply message */ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.c linux-2.6.22-591/drivers/message/fusion/mptbase.c --- linux-2.6.22-570/drivers/message/fusion/mptbase.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptbase.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,7 +6,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -64,6 +64,7 @@ #endif #include "mptbase.h" +#include "lsi/mpi_log_fc.h" /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ #define my_NAME "Fusion MPT base driver" @@ -6349,14 +6350,37 @@ static void mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info) { - static char *subcl_str[8] = { - "FCP Initiator", "FCP Target", "LAN", "MPI Message Layer", - "FC Link", "Context Manager", "Invalid Field Offset", "State Change Info" - }; - u8 subcl = (log_info >> 24) & 0x7; + char *desc = "unknown"; + + switch (log_info & 0xFF000000) { + case MPI_IOCLOGINFO_FC_INIT_BASE: + desc = "FCP Initiator"; + break; + case MPI_IOCLOGINFO_FC_TARGET_BASE: + desc = "FCP Target"; + break; + case MPI_IOCLOGINFO_FC_LAN_BASE: + desc = "LAN"; + break; + case MPI_IOCLOGINFO_FC_MSG_BASE: + desc = "MPI Message Layer"; + break; + case MPI_IOCLOGINFO_FC_LINK_BASE: + desc = "FC Link"; + break; + case MPI_IOCLOGINFO_FC_CTX_BASE: + desc = "Context Manager"; + break; + case MPI_IOCLOGINFO_FC_INVALID_FIELD_BYTE_OFFSET: + desc = "Invalid Field Offset"; + break; + case MPI_IOCLOGINFO_FC_STATE_CHANGE: + desc = "State Change Info"; + break; + } - printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubCl={%s}\n", - ioc->name, log_info, subcl_str[subcl]); + printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubClass={%s}, Value=(0x%06x)\n", + ioc->name, log_info, desc, (log_info & 0xFFFFFF)); } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.h linux-2.6.22-591/drivers/message/fusion/mptbase.h --- linux-2.6.22-570/drivers/message/fusion/mptbase.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptbase.h 2007-12-21 15:36:12.000000000 -0500 @@ -6,7 +6,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -75,8 +75,8 @@ #define COPYRIGHT "Copyright (c) 1999-2007 " MODULEAUTHOR #endif -#define MPT_LINUX_VERSION_COMMON "3.04.04" -#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.04" +#define MPT_LINUX_VERSION_COMMON "3.04.05" +#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.05" #define WHAT_MAGIC_STRING "@" "(" "#" ")" #define show_mptmod_ver(s,ver) \ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.c linux-2.6.22-591/drivers/message/fusion/mptctl.c --- linux-2.6.22-570/drivers/message/fusion/mptctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptctl.c 2007-12-21 15:36:12.000000000 -0500 @@ -5,7 +5,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.h linux-2.6.22-591/drivers/message/fusion/mptctl.h --- linux-2.6.22-570/drivers/message/fusion/mptctl.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptctl.h 2007-12-21 15:36:12.000000000 -0500 @@ -6,7 +6,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptfc.c linux-2.6.22-591/drivers/message/fusion/mptfc.c --- linux-2.6.22-570/drivers/message/fusion/mptfc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptfc.c 2007-12-21 15:36:12.000000000 -0500 @@ -4,7 +4,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -43,7 +43,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -#include "linux_compat.h" /* linux-2.6 tweaks */ #include #include #include diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.c linux-2.6.22-591/drivers/message/fusion/mptlan.c --- linux-2.6.22-570/drivers/message/fusion/mptlan.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptlan.c 2007-12-21 15:36:12.000000000 -0500 @@ -5,7 +5,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 2000-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.h linux-2.6.22-591/drivers/message/fusion/mptlan.h --- linux-2.6.22-570/drivers/message/fusion/mptlan.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptlan.h 2007-12-21 15:36:12.000000000 -0500 @@ -5,7 +5,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 2000-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptsas.c linux-2.6.22-591/drivers/message/fusion/mptsas.c --- linux-2.6.22-570/drivers/message/fusion/mptsas.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptsas.c 2007-12-21 15:36:12.000000000 -0500 @@ -4,7 +4,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * Copyright (c) 2005-2007 Dell */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.c linux-2.6.22-591/drivers/message/fusion/mptscsih.c --- linux-2.6.22-570/drivers/message/fusion/mptscsih.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptscsih.c 2007-12-21 15:36:12.000000000 -0500 @@ -4,7 +4,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -44,7 +44,6 @@ */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -#include "linux_compat.h" /* linux-2.6 tweaks */ #include #include #include @@ -260,30 +259,13 @@ /* Map the data portion, if any. * sges_left = 0 if no data transfer. */ - if ( (sges_left = SCpnt->use_sg) ) { - sges_left = pci_map_sg(ioc->pcidev, - (struct scatterlist *) SCpnt->request_buffer, - SCpnt->use_sg, - SCpnt->sc_data_direction); - if (sges_left == 0) + sges_left = scsi_dma_map(SCpnt); + if (sges_left < 0) return FAILED; - } else if (SCpnt->request_bufflen) { - SCpnt->SCp.dma_handle = pci_map_single(ioc->pcidev, - SCpnt->request_buffer, - SCpnt->request_bufflen, - SCpnt->sc_data_direction); - dsgprintk((MYIOC_s_INFO_FMT "SG: non-SG for %p, len=%d\n", - ioc->name, SCpnt, SCpnt->request_bufflen)); - mptscsih_add_sge((char *) &pReq->SGL, - 0xD1000000|MPT_SGE_FLAGS_ADDRESSING|sgdir|SCpnt->request_bufflen, - SCpnt->SCp.dma_handle); - - return SUCCESS; - } /* Handle the SG case. */ - sg = (struct scatterlist *) SCpnt->request_buffer; + sg = scsi_sglist(SCpnt); sg_done = 0; sgeOffset = sizeof(SCSIIORequest_t) - sizeof(SGE_IO_UNION); chainSge = NULL; @@ -465,7 +447,12 @@ MPT_FRAME_HDR *mf; SEPRequest_t *SEPMsg; - if (ioc->bus_type == FC) + if (ioc->bus_type != SAS) + return; + + /* Not supported for hidden raid components + */ + if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) return; if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) { @@ -662,7 +649,7 @@ scsi_state = pScsiReply->SCSIState; scsi_status = pScsiReply->SCSIStatus; xfer_cnt = le32_to_cpu(pScsiReply->TransferCount); - sc->resid = sc->request_bufflen - xfer_cnt; + scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt); log_info = le32_to_cpu(pScsiReply->IOCLogInfo); /* @@ -767,7 +754,7 @@ break; case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH: /* 0x0049 */ - sc->resid = sc->request_bufflen - xfer_cnt; + scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt); if((xfer_cnt==0)||(sc->underflow > xfer_cnt)) sc->result=DID_SOFT_ERROR << 16; else /* Sufficient data transfer occurred */ @@ -816,7 +803,7 @@ break; case MPI_IOCSTATUS_SCSI_DATA_OVERRUN: /* 0x0044 */ - sc->resid=0; + scsi_set_resid(sc, 0); case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR: /* 0x0040 */ case MPI_IOCSTATUS_SUCCESS: /* 0x0000 */ sc->result = (DID_OK << 16) | scsi_status; @@ -900,22 +887,17 @@ dreplyprintk(("%s: [%d:%d:%d:%d] resid=%d " "bufflen=%d xfer_cnt=%d\n", __FUNCTION__, - sc->device->host->host_no, sc->device->channel, sc->device->id, - sc->device->lun, sc->resid, sc->request_bufflen, - xfer_cnt)); + sc->device->host->host_no, + sc->device->channel, sc->device->id, + sc->device->lun, scsi_get_resid(sc), + scsi_bufflen(sc), xfer_cnt)); } #endif } /* end of address reply case */ /* Unmap the DMA buffers, if any. */ - if (sc->use_sg) { - pci_unmap_sg(ioc->pcidev, (struct scatterlist *) sc->request_buffer, - sc->use_sg, sc->sc_data_direction); - } else if (sc->request_bufflen) { - pci_unmap_single(ioc->pcidev, sc->SCp.dma_handle, - sc->request_bufflen, sc->sc_data_direction); - } + scsi_dma_unmap(sc); sc->scsi_done(sc); /* Issue the command callback */ @@ -970,17 +952,8 @@ /* Set status, free OS resources (SG DMA buffers) * Do OS callback */ - if (SCpnt->use_sg) { - pci_unmap_sg(ioc->pcidev, - (struct scatterlist *) SCpnt->request_buffer, - SCpnt->use_sg, - SCpnt->sc_data_direction); - } else if (SCpnt->request_bufflen) { - pci_unmap_single(ioc->pcidev, - SCpnt->SCp.dma_handle, - SCpnt->request_bufflen, - SCpnt->sc_data_direction); - } + scsi_dma_unmap(SCpnt); + SCpnt->result = DID_RESET << 16; SCpnt->host_scribble = NULL; @@ -1023,14 +996,19 @@ mf = (SCSIIORequest_t *)MPT_INDEX_2_MFPTR(hd->ioc, ii); if (mf == NULL) continue; + /* If the device is a hidden raid component, then its + * expected that the mf->function will be RAID_SCSI_IO + */ + if (vdevice->vtarget->tflags & + MPT_TARGET_FLAGS_RAID_COMPONENT && mf->Function != + MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH) + continue; + int_to_scsilun(vdevice->lun, &lun); if ((mf->Bus != vdevice->vtarget->channel) || (mf->TargetID != vdevice->vtarget->id) || memcmp(lun.scsi_lun, mf->LUN, 8)) continue; - dsprintk(( "search_running: found (sc=%p, mf = %p) " - "channel %d id %d, lun %d \n", hd->ScsiLookup[ii], - mf, mf->Bus, mf->TargetID, vdevice->lun)); /* Cleanup */ @@ -1039,19 +1017,12 @@ mpt_free_msg_frame(hd->ioc, (MPT_FRAME_HDR *)mf); if ((unsigned char *)mf != sc->host_scribble) continue; - if (sc->use_sg) { - pci_unmap_sg(hd->ioc->pcidev, - (struct scatterlist *) sc->request_buffer, - sc->use_sg, - sc->sc_data_direction); - } else if (sc->request_bufflen) { - pci_unmap_single(hd->ioc->pcidev, - sc->SCp.dma_handle, - sc->request_bufflen, - sc->sc_data_direction); - } + scsi_dma_unmap(sc); sc->host_scribble = NULL; sc->result = DID_NO_CONNECT << 16; + dsprintk(( "search_running: found (sc=%p, mf = %p) " + "channel %d id %d, lun %d \n", sc, mf, + vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun)); sc->scsi_done(sc); } } @@ -1380,10 +1351,10 @@ * will be no data transfer! GRRRRR... */ if (SCpnt->sc_data_direction == DMA_FROM_DEVICE) { - datalen = SCpnt->request_bufflen; + datalen = scsi_bufflen(SCpnt); scsidir = MPI_SCSIIO_CONTROL_READ; /* DATA IN (host<--ioc<--dev) */ } else if (SCpnt->sc_data_direction == DMA_TO_DEVICE) { - datalen = SCpnt->request_bufflen; + datalen = scsi_bufflen(SCpnt); scsidir = MPI_SCSIIO_CONTROL_WRITE; /* DATA OUT (host-->ioc-->dev) */ } else { datalen = 0; @@ -1768,20 +1739,45 @@ u32 ctx2abort; int scpnt_idx; int retval; - VirtDevice *vdev; + VirtDevice *vdevice; ulong sn = SCpnt->serial_number; + MPT_ADAPTER *ioc; /* If we can't locate our host adapter structure, return FAILED status. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL) { SCpnt->result = DID_RESET << 16; SCpnt->scsi_done(SCpnt); - dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: " - "Can't locate host! (sc=%p)\n", - SCpnt)); + dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: Can't locate " + "host! (sc=%p)\n", SCpnt)); return FAILED; } + ioc = hd->ioc; + printk(MYIOC_s_INFO_FMT "attempting task abort! (sc=%p)\n", + ioc->name, SCpnt); + scsi_print_command(SCpnt); + + vdevice = SCpnt->device->hostdata; + if (!vdevice || !vdevice->vtarget) { + dtmprintk((MYIOC_s_DEBUG_FMT "task abort: device has been " + "deleted (sc=%p)\n", ioc->name, SCpnt)); + SCpnt->result = DID_NO_CONNECT << 16; + SCpnt->scsi_done(SCpnt); + retval = 0; + goto out; + } + + /* Task aborts are not supported for hidden raid components. + */ + if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) { + dtmprintk((MYIOC_s_DEBUG_FMT "task abort: hidden raid " + "component (sc=%p)\n", ioc->name, SCpnt)); + SCpnt->result = DID_RESET << 16; + retval = FAILED; + goto out; + } + /* Find this command */ if ((scpnt_idx = SCPNT_TO_LOOKUP_IDX(SCpnt)) < 0) { @@ -1790,21 +1786,20 @@ */ SCpnt->result = DID_RESET << 16; dtmprintk((KERN_INFO MYNAM ": %s: mptscsih_abort: " - "Command not in the active list! (sc=%p)\n", - hd->ioc->name, SCpnt)); - return SUCCESS; + "Command not in the active list! (sc=%p)\n", ioc->name, + SCpnt)); + retval = 0; + goto out; } - if (hd->resetPending) - return FAILED; + if (hd->resetPending) { + retval = FAILED; + goto out; + } if (hd->timeouts < -1) hd->timeouts++; - printk(KERN_WARNING MYNAM ": %s: attempting task abort! (sc=%p)\n", - hd->ioc->name, SCpnt); - scsi_print_command(SCpnt); - /* Most important! Set TaskMsgContext to SCpnt's MsgContext! * (the IO to be ABORT'd) * @@ -1817,18 +1812,17 @@ hd->abortSCpnt = SCpnt; - vdev = SCpnt->device->hostdata; retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK, - vdev->vtarget->channel, vdev->vtarget->id, vdev->lun, - ctx2abort, mptscsih_get_tm_timeout(hd->ioc)); + vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun, + ctx2abort, mptscsih_get_tm_timeout(ioc)); if (SCPNT_TO_LOOKUP_IDX(SCpnt) == scpnt_idx && SCpnt->serial_number == sn) retval = FAILED; - printk (KERN_WARNING MYNAM ": %s: task abort: %s (sc=%p)\n", - hd->ioc->name, - ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + out: + printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n", + ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); if (retval == 0) return SUCCESS; @@ -1850,32 +1844,47 @@ { MPT_SCSI_HOST *hd; int retval; - VirtDevice *vdev; + VirtDevice *vdevice; + MPT_ADAPTER *ioc; /* If we can't locate our host adapter structure, return FAILED status. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ - dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: " - "Can't locate host! (sc=%p)\n", - SCpnt)); + dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: Can't " + "locate host! (sc=%p)\n", SCpnt)); return FAILED; } - if (hd->resetPending) - return FAILED; - - printk(KERN_WARNING MYNAM ": %s: attempting target reset! (sc=%p)\n", - hd->ioc->name, SCpnt); + ioc = hd->ioc; + printk(MYIOC_s_INFO_FMT "attempting target reset! (sc=%p)\n", + ioc->name, SCpnt); scsi_print_command(SCpnt); - vdev = SCpnt->device->hostdata; + if (hd->resetPending) { + retval = FAILED; + goto out; + } + + vdevice = SCpnt->device->hostdata; + if (!vdevice || !vdevice->vtarget) { + retval = 0; + goto out; + } + + /* Target reset to hidden raid component is not supported + */ + if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) { + retval = FAILED; + goto out; + } + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, - vdev->vtarget->channel, vdev->vtarget->id, - 0, 0, mptscsih_get_tm_timeout(hd->ioc)); + vdevice->vtarget->channel, vdevice->vtarget->id, 0, 0, + mptscsih_get_tm_timeout(ioc)); - printk (KERN_WARNING MYNAM ": %s: target reset: %s (sc=%p)\n", - hd->ioc->name, - ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + out: + printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n", + ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); if (retval == 0) return SUCCESS; @@ -1899,18 +1908,19 @@ MPT_SCSI_HOST *hd; int retval; VirtDevice *vdev; + MPT_ADAPTER *ioc; /* If we can't locate our host adapter structure, return FAILED status. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ - dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: " - "Can't locate host! (sc=%p)\n", - SCpnt ) ); + dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: Can't " + "locate host! (sc=%p)\n", SCpnt )); return FAILED; } - printk(KERN_WARNING MYNAM ": %s: attempting bus reset! (sc=%p)\n", - hd->ioc->name, SCpnt); + ioc = hd->ioc; + printk(MYIOC_s_INFO_FMT "attempting bus reset! (sc=%p)\n", + ioc->name, SCpnt); scsi_print_command(SCpnt); if (hd->timeouts < -1) @@ -1918,11 +1928,10 @@ vdev = SCpnt->device->hostdata; retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS, - vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(hd->ioc)); + vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(ioc)); - printk (KERN_WARNING MYNAM ": %s: bus reset: %s (sc=%p)\n", - hd->ioc->name, - ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + printk(MYIOC_s_INFO_FMT "bus reset: %s (sc=%p)\n", + ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); if (retval == 0) return SUCCESS; @@ -1943,37 +1952,38 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt) { MPT_SCSI_HOST * hd; - int status = SUCCESS; + int retval; + MPT_ADAPTER *ioc; /* If we can't locate the host to reset, then we failed. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ - dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: " - "Can't locate host! (sc=%p)\n", - SCpnt ) ); + dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: Can't " + "locate host! (sc=%p)\n", SCpnt)); return FAILED; } - printk(KERN_WARNING MYNAM ": %s: Attempting host reset! (sc=%p)\n", - hd->ioc->name, SCpnt); + ioc = hd->ioc; + printk(MYIOC_s_INFO_FMT "attempting host reset! (sc=%p)\n", + ioc->name, SCpnt); /* If our attempts to reset the host failed, then return a failed * status. The host will be taken off line by the SCSI mid-layer. */ - if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0){ - status = FAILED; + if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0) { + retval = FAILED; } else { /* Make sure TM pending is cleared and TM state is set to * NONE. */ + retval = 0; hd->tmPending = 0; hd->tmState = TM_STATE_NONE; } - dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: " - "Status = %s\n", - (status == SUCCESS) ? "SUCCESS" : "FAILED" ) ); + printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n", + ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); - return status; + return retval; } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -3150,6 +3160,16 @@ { INTERNAL_CMD iocmd; + /* Ignore hidden raid components, this is handled when the command + * is sent to the volume + */ + if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) + return; + + if (vdevice->vtarget->type != TYPE_DISK || vdevice->vtarget->deleted || + !vdevice->configured_lun) + return; + /* Following parameters will not change * in this routine. */ @@ -3164,8 +3184,6 @@ iocmd.id = vdevice->vtarget->id; iocmd.lun = vdevice->lun; - if ((vdevice->vtarget->type == TYPE_DISK) && - (vdevice->configured_lun)) mptscsih_do_cmd(hd, &iocmd); } diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.h linux-2.6.22-591/drivers/message/fusion/mptscsih.h --- linux-2.6.22-570/drivers/message/fusion/mptscsih.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptscsih.h 2007-12-21 15:36:12.000000000 -0500 @@ -6,7 +6,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptspi.c linux-2.6.22-591/drivers/message/fusion/mptspi.c --- linux-2.6.22-570/drivers/message/fusion/mptspi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/fusion/mptspi.c 2007-12-21 15:36:12.000000000 -0500 @@ -4,7 +4,7 @@ * running LSI Logic Fusion MPT (Message Passing Technology) firmware. * * Copyright (c) 1999-2007 LSI Logic Corporation - * (mailto:mpt_linux_developer@lsi.com) + * (mailto:DL-MPTFusionLinux@lsi.com) * */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -44,7 +44,6 @@ */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -#include "linux_compat.h" /* linux-2.6 tweaks */ #include #include #include diff -Nurb linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c linux-2.6.22-591/drivers/message/i2o/i2o_scsi.c --- linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/message/i2o/i2o_scsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -377,12 +377,8 @@ osm_err("SCSI error %08x\n", error); dev = &c->pdev->dev; - if (cmd->use_sg) - dma_unmap_sg(dev, cmd->request_buffer, cmd->use_sg, - cmd->sc_data_direction); - else if (cmd->SCp.dma_handle) - dma_unmap_single(dev, cmd->SCp.dma_handle, cmd->request_bufflen, - cmd->sc_data_direction); + + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); @@ -664,21 +660,15 @@ if (sgl_offset != SGL_OFFSET_0) { /* write size of data addressed by SGL */ - *mptr++ = cpu_to_le32(SCpnt->request_bufflen); + *mptr++ = cpu_to_le32(scsi_bufflen(SCpnt)); /* Now fill in the SGList and command */ - if (SCpnt->use_sg) { - if (!i2o_dma_map_sg(c, SCpnt->request_buffer, - SCpnt->use_sg, + + if (scsi_sg_count(SCpnt)) { + if (!i2o_dma_map_sg(c, scsi_sglist(SCpnt), + scsi_sg_count(SCpnt), SCpnt->sc_data_direction, &mptr)) goto nomem; - } else { - SCpnt->SCp.dma_handle = - i2o_dma_map_single(c, SCpnt->request_buffer, - SCpnt->request_bufflen, - SCpnt->sc_data_direction, &mptr); - if (dma_mapping_error(SCpnt->SCp.dma_handle)) - goto nomem; } } diff -Nurb linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c linux-2.6.22-591/drivers/mfd/ucb1x00-ts.c --- linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mfd/ucb1x00-ts.c 2007-12-21 15:36:12.000000000 -0500 @@ -209,6 +209,7 @@ DECLARE_WAITQUEUE(wait, tsk); int valid = 0; + set_freezable(); add_wait_queue(&ts->irq_wait, &wait); while (!kthread_should_stop()) { unsigned int x, y, p; diff -Nurb linux-2.6.22-570/drivers/misc/asus-laptop.c linux-2.6.22-591/drivers/misc/asus-laptop.c --- linux-2.6.22-570/drivers/misc/asus-laptop.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/misc/asus-laptop.c 2007-12-21 15:36:12.000000000 -0500 @@ -737,8 +737,7 @@ struct device_attribute dev_attr_##_name = { \ .attr = { \ .name = __stringify(_name), \ - .mode = 0, \ - .owner = THIS_MODULE }, \ + .mode = 0 }, \ .show = NULL, \ .store = NULL, \ } diff -Nurb linux-2.6.22-570/drivers/mmc/card/Kconfig linux-2.6.22-591/drivers/mmc/card/Kconfig --- linux-2.6.22-570/drivers/mmc/card/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/card/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -14,3 +14,21 @@ mount the filesystem. Almost everyone wishing MMC support should say Y or M here. +config MMC_BLOCK_BOUNCE + bool "Use bounce buffer for simple hosts" + depends on MMC_BLOCK + default y + help + SD/MMC is a high latency protocol where it is crucial to + send large requests in order to get high performance. Many + controllers, however, are restricted to continuous memory + (i.e. they can't do scatter-gather), something the kernel + rarely can provide. + + Say Y here to help these restricted hosts by bouncing + requests back and forth from a large buffer. You will get + a big performance gain at the cost of up to 64 KiB of + physical memory. + + If unsure, say Y here. + diff -Nurb linux-2.6.22-570/drivers/mmc/card/block.c linux-2.6.22-591/drivers/mmc/card/block.c --- linux-2.6.22-570/drivers/mmc/card/block.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/card/block.c 2007-12-21 15:36:12.000000000 -0500 @@ -262,7 +262,9 @@ } brq.data.sg = mq->sg; - brq.data.sg_len = blk_rq_map_sg(req->q, req, brq.data.sg); + brq.data.sg_len = mmc_queue_map_sg(mq); + + mmc_queue_bounce_pre(mq); if (brq.data.blocks != (req->nr_sectors >> (md->block_bits - 9))) { @@ -279,6 +281,9 @@ } mmc_wait_for_req(card->host, &brq.mrq); + + mmc_queue_bounce_post(mq); + if (brq.cmd.error) { printk(KERN_ERR "%s: error %d sending read/write command\n", req->rq_disk->disk_name, brq.cmd.error); diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.c linux-2.6.22-591/drivers/mmc/card/queue.c --- linux-2.6.22-570/drivers/mmc/card/queue.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/card/queue.c 2007-12-21 15:36:12.000000000 -0500 @@ -11,12 +11,15 @@ */ #include #include +#include #include #include #include #include "queue.h" +#define MMC_QUEUE_BOUNCESZ 65536 + #define MMC_QUEUE_SUSPENDED (1 << 0) /* @@ -42,11 +45,7 @@ struct mmc_queue *mq = d; struct request_queue *q = mq->queue; - /* - * Set iothread to ensure that we aren't put to sleep by - * the process freezing. We handle suspension ourselves. - */ - current->flags |= PF_MEMALLOC|PF_NOFREEZE; + current->flags |= PF_MEMALLOC; down(&mq->thread_sem); do { @@ -118,6 +117,7 @@ struct mmc_host *host = card->host; u64 limit = BLK_BOUNCE_HIGH; int ret; + unsigned int bouncesz; if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = *mmc_dev(host)->dma_mask; @@ -127,36 +127,83 @@ if (!mq->queue) return -ENOMEM; + mq->queue->queuedata = mq; + mq->req = NULL; + blk_queue_prep_rq(mq->queue, mmc_prep_request); + +#ifdef CONFIG_MMC_BLOCK_BOUNCE + if (host->max_hw_segs == 1) { + bouncesz = MMC_QUEUE_BOUNCESZ; + + if (bouncesz > host->max_req_size) + bouncesz = host->max_req_size; + if (bouncesz > host->max_seg_size) + bouncesz = host->max_seg_size; + + mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); + if (!mq->bounce_buf) { + printk(KERN_WARNING "%s: unable to allocate " + "bounce buffer\n", mmc_card_name(card)); + } else { + blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); + blk_queue_max_sectors(mq->queue, bouncesz / 512); + blk_queue_max_phys_segments(mq->queue, bouncesz / 512); + blk_queue_max_hw_segments(mq->queue, bouncesz / 512); + blk_queue_max_segment_size(mq->queue, bouncesz); + + mq->sg = kmalloc(sizeof(struct scatterlist), + GFP_KERNEL); + if (!mq->sg) { + ret = -ENOMEM; + goto free_bounce_buf; + } + + mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * + bouncesz / 512, GFP_KERNEL); + if (!mq->bounce_sg) { + ret = -ENOMEM; + goto free_sg; + } + } + } +#endif + + if (!mq->bounce_buf) { blk_queue_bounce_limit(mq->queue, limit); blk_queue_max_sectors(mq->queue, host->max_req_size / 512); blk_queue_max_phys_segments(mq->queue, host->max_phys_segs); blk_queue_max_hw_segments(mq->queue, host->max_hw_segs); blk_queue_max_segment_size(mq->queue, host->max_seg_size); - mq->queue->queuedata = mq; - mq->req = NULL; - - mq->sg = kmalloc(sizeof(struct scatterlist) * host->max_phys_segs, - GFP_KERNEL); + mq->sg = kmalloc(sizeof(struct scatterlist) * + host->max_phys_segs, GFP_KERNEL); if (!mq->sg) { ret = -ENOMEM; goto cleanup_queue; } + } init_MUTEX(&mq->thread_sem); mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); if (IS_ERR(mq->thread)) { ret = PTR_ERR(mq->thread); - goto free_sg; + goto free_bounce_sg; } return 0; - + free_bounce_sg: + if (mq->bounce_sg) + kfree(mq->bounce_sg); + mq->bounce_sg = NULL; free_sg: kfree(mq->sg); mq->sg = NULL; + free_bounce_buf: + if (mq->bounce_buf) + kfree(mq->bounce_buf); + mq->bounce_buf = NULL; cleanup_queue: blk_cleanup_queue(mq->queue); return ret; @@ -178,9 +225,17 @@ /* Then terminate our worker thread */ kthread_stop(mq->thread); + if (mq->bounce_sg) + kfree(mq->bounce_sg); + mq->bounce_sg = NULL; + kfree(mq->sg); mq->sg = NULL; + if (mq->bounce_buf) + kfree(mq->bounce_buf); + mq->bounce_buf = NULL; + blk_cleanup_queue(mq->queue); mq->card = NULL; @@ -231,3 +286,108 @@ } } +static void copy_sg(struct scatterlist *dst, unsigned int dst_len, + struct scatterlist *src, unsigned int src_len) +{ + unsigned int chunk; + char *dst_buf, *src_buf; + unsigned int dst_size, src_size; + + dst_buf = NULL; + src_buf = NULL; + dst_size = 0; + src_size = 0; + + while (src_len) { + BUG_ON(dst_len == 0); + + if (dst_size == 0) { + dst_buf = page_address(dst->page) + dst->offset; + dst_size = dst->length; + } + + if (src_size == 0) { + src_buf = page_address(src->page) + src->offset; + src_size = src->length; + } + + chunk = min(dst_size, src_size); + + memcpy(dst_buf, src_buf, chunk); + + dst_buf += chunk; + src_buf += chunk; + dst_size -= chunk; + src_size -= chunk; + + if (dst_size == 0) { + dst++; + dst_len--; + } + + if (src_size == 0) { + src++; + src_len--; + } + } +} + +unsigned int mmc_queue_map_sg(struct mmc_queue *mq) +{ + unsigned int sg_len; + + if (!mq->bounce_buf) + return blk_rq_map_sg(mq->queue, mq->req, mq->sg); + + BUG_ON(!mq->bounce_sg); + + sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); + + mq->bounce_sg_len = sg_len; + + /* + * Shortcut in the event we only get a single entry. + */ + if (sg_len == 1) { + memcpy(mq->sg, mq->bounce_sg, sizeof(struct scatterlist)); + return 1; + } + + mq->sg[0].page = virt_to_page(mq->bounce_buf); + mq->sg[0].offset = offset_in_page(mq->bounce_buf); + mq->sg[0].length = 0; + + while (sg_len) { + mq->sg[0].length += mq->bounce_sg[sg_len - 1].length; + sg_len--; + } + + return 1; +} + +void mmc_queue_bounce_pre(struct mmc_queue *mq) +{ + if (!mq->bounce_buf) + return; + + if (mq->bounce_sg_len == 1) + return; + if (rq_data_dir(mq->req) != WRITE) + return; + + copy_sg(mq->sg, 1, mq->bounce_sg, mq->bounce_sg_len); +} + +void mmc_queue_bounce_post(struct mmc_queue *mq) +{ + if (!mq->bounce_buf) + return; + + if (mq->bounce_sg_len == 1) + return; + if (rq_data_dir(mq->req) != READ) + return; + + copy_sg(mq->bounce_sg, mq->bounce_sg_len, mq->sg, 1); +} + diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.h linux-2.6.22-591/drivers/mmc/card/queue.h --- linux-2.6.22-570/drivers/mmc/card/queue.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/card/queue.h 2007-12-21 15:36:12.000000000 -0500 @@ -14,6 +14,9 @@ void *data; struct request_queue *queue; struct scatterlist *sg; + char *bounce_buf; + struct scatterlist *bounce_sg; + unsigned int bounce_sg_len; }; extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *); @@ -21,4 +24,8 @@ extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); +extern unsigned int mmc_queue_map_sg(struct mmc_queue *); +extern void mmc_queue_bounce_pre(struct mmc_queue *); +extern void mmc_queue_bounce_post(struct mmc_queue *); + #endif diff -Nurb linux-2.6.22-570/drivers/mmc/core/Kconfig linux-2.6.22-591/drivers/mmc/core/Kconfig --- linux-2.6.22-570/drivers/mmc/core/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -14,3 +14,16 @@ This option is usually just for embedded systems which use a MMC/SD card for rootfs. Most people should say N here. +config MMC_PASSWORDS + boolean "MMC card lock/unlock passwords (EXPERIMENTAL)" + depends on EXPERIMENTAL + select KEYS + help + Say Y here to enable the use of passwords to lock and unlock + MMC cards. This uses the access key retention support, using + request_key to look up the key associated with each card. + + For example, if you have an MMC card that was locked using + Symbian OS on your cell phone, you won't be able to read it + on Linux without this support. + diff -Nurb linux-2.6.22-570/drivers/mmc/core/Makefile linux-2.6.22-591/drivers/mmc/core/Makefile --- linux-2.6.22-570/drivers/mmc/core/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -7,5 +7,6 @@ endif obj-$(CONFIG_MMC) += mmc_core.o -mmc_core-y := core.o sysfs.o mmc.o mmc_ops.o sd.o sd_ops.o +mmc_core-y := core.o sysfs.o bus.o host.o mmc.o mmc_ops.o sd.o sd_ops.o +mmc_core-$(CONFIG_MMC_PASSWORDS) += lock.o diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.c linux-2.6.22-591/drivers/mmc/core/bus.c --- linux-2.6.22-570/drivers/mmc/core/bus.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/mmc/core/bus.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,245 @@ +/* + * linux/drivers/mmc/core/bus.c + * + * Copyright (C) 2003 Russell King, All Rights Reserved. + * Copyright (C) 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMC card bus driver model + */ + +#include +#include + +#include +#include + +#include "sysfs.h" +#include "bus.h" + +#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) +#define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) + +static ssize_t mmc_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_card *card = dev_to_mmc_card(dev); + + switch (card->type) { + case MMC_TYPE_MMC: + return sprintf(buf, "MMC\n"); + case MMC_TYPE_SD: + return sprintf(buf, "SD\n"); + default: + return -EFAULT; + } +} + +static struct device_attribute mmc_dev_attrs[] = { + MMC_ATTR_RO(type), + __ATTR_NULL, +}; + +/* + * This currently matches any MMC driver to any MMC card - drivers + * themselves make the decision whether to drive this card in their + * probe method. + * + * We also fail for all locked cards; drivers expect to be able to do block + * I/O still on probe(), which is not possible while the card is locked. + * Device probing must be triggered sometime later to make the card available + * to the block driver. + */ +static int mmc_bus_match(struct device *dev, struct device_driver *drv) +{ + struct mmc_card *card = dev_to_mmc_card(dev); + + if (mmc_card_locked(card)) { + dev_dbg(&card->dev, "card is locked; binding is deferred\n"); + return 0; + } + + return 1; +} + +static int +mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf, + int buf_size) +{ + struct mmc_card *card = dev_to_mmc_card(dev); + int retval = 0, i = 0, length = 0; + +#define add_env(fmt,val) do { \ + retval = add_uevent_var(envp, num_envp, &i, \ + buf, buf_size, &length, \ + fmt, val); \ + if (retval) \ + return retval; \ +} while (0); + + switch (card->type) { + case MMC_TYPE_MMC: + add_env("MMC_TYPE=%s", "MMC"); + break; + case MMC_TYPE_SD: + add_env("MMC_TYPE=%s", "SD"); + break; + } + + add_env("MMC_NAME=%s", mmc_card_name(card)); + +#undef add_env + + envp[i] = NULL; + + return 0; +} + +static int mmc_bus_probe(struct device *dev) +{ + struct mmc_driver *drv = to_mmc_driver(dev->driver); + struct mmc_card *card = dev_to_mmc_card(dev); + + return drv->probe(card); +} + +static int mmc_bus_remove(struct device *dev) +{ + struct mmc_driver *drv = to_mmc_driver(dev->driver); + struct mmc_card *card = dev_to_mmc_card(dev); + + drv->remove(card); + + return 0; +} + +static int mmc_bus_suspend(struct device *dev, pm_message_t state) +{ + struct mmc_driver *drv = to_mmc_driver(dev->driver); + struct mmc_card *card = dev_to_mmc_card(dev); + int ret = 0; + + if (dev->driver && drv->suspend) + ret = drv->suspend(card, state); + return ret; +} + +static int mmc_bus_resume(struct device *dev) +{ + struct mmc_driver *drv = to_mmc_driver(dev->driver); + struct mmc_card *card = dev_to_mmc_card(dev); + int ret = 0; + + if (dev->driver && drv->resume) + ret = drv->resume(card); + return ret; +} + +static struct bus_type mmc_bus_type = { + .name = "mmc", + .dev_attrs = mmc_dev_attrs, + .match = mmc_bus_match, + .uevent = mmc_bus_uevent, + .probe = mmc_bus_probe, + .remove = mmc_bus_remove, + .suspend = mmc_bus_suspend, + .resume = mmc_bus_resume, +}; + +int mmc_register_bus(void) +{ + return bus_register(&mmc_bus_type); +} + +void mmc_unregister_bus(void) +{ + bus_unregister(&mmc_bus_type); +} + +/** + * mmc_register_driver - register a media driver + * @drv: MMC media driver + */ +int mmc_register_driver(struct mmc_driver *drv) +{ + drv->drv.bus = &mmc_bus_type; + return driver_register(&drv->drv); +} + +EXPORT_SYMBOL(mmc_register_driver); + +/** + * mmc_unregister_driver - unregister a media driver + * @drv: MMC media driver + */ +void mmc_unregister_driver(struct mmc_driver *drv) +{ + drv->drv.bus = &mmc_bus_type; + driver_unregister(&drv->drv); +} + +EXPORT_SYMBOL(mmc_unregister_driver); + +static void mmc_release_card(struct device *dev) +{ + struct mmc_card *card = dev_to_mmc_card(dev); + + kfree(card); +} + +/* + * Allocate and initialise a new MMC card structure. + */ +struct mmc_card *mmc_alloc_card(struct mmc_host *host) +{ + struct mmc_card *card; + + card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL); + if (!card) + return ERR_PTR(-ENOMEM); + + memset(card, 0, sizeof(struct mmc_card)); + + card->host = host; + + device_initialize(&card->dev); + + card->dev.parent = mmc_classdev(host); + card->dev.bus = &mmc_bus_type; + card->dev.release = mmc_release_card; + + return card; +} + +/* + * Register a new MMC card with the driver model. + */ +int mmc_add_card(struct mmc_card *card) +{ + int ret; + + snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), + "%s:%04x", mmc_hostname(card->host), card->rca); + + ret = device_add(&card->dev); + if (ret == 0) + mmc_card_set_present(card); + + return ret; +} + +/* + * Unregister a new MMC card with the driver model, and + * (eventually) free it. + */ +void mmc_remove_card(struct mmc_card *card) +{ + if (mmc_card_present(card)) + device_del(&card->dev); + + put_device(&card->dev); +} + diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.h linux-2.6.22-591/drivers/mmc/core/bus.h --- linux-2.6.22-570/drivers/mmc/core/bus.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/mmc/core/bus.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,22 @@ +/* + * linux/drivers/mmc/core/bus.h + * + * Copyright (C) 2003 Russell King, All Rights Reserved. + * Copyright 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _MMC_CORE_BUS_H +#define _MMC_CORE_BUS_H + +struct mmc_card *mmc_alloc_card(struct mmc_host *host); +int mmc_add_card(struct mmc_card *card); +void mmc_remove_card(struct mmc_card *card); + +int mmc_register_bus(void); +void mmc_unregister_bus(void); + +#endif + diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.c linux-2.6.22-591/drivers/mmc/core/core.c --- linux-2.6.22-570/drivers/mmc/core/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -27,7 +27,9 @@ #include #include "core.h" -#include "sysfs.h" +#include "bus.h" +#include "host.h" +#include "lock.h" #include "mmc_ops.h" #include "sd_ops.h" @@ -35,6 +37,25 @@ extern int mmc_attach_mmc(struct mmc_host *host, u32 ocr); extern int mmc_attach_sd(struct mmc_host *host, u32 ocr); +static struct workqueue_struct *workqueue; + +/* + * Internal function. Schedule delayed work in the MMC work queue. + */ +static int mmc_schedule_delayed_work(struct delayed_work *work, + unsigned long delay) +{ + return queue_delayed_work(workqueue, work, delay); +} + +/* + * Internal function. Flush all scheduled work from the MMC work queue. + */ +static void mmc_flush_scheduled_work(void) +{ + flush_workqueue(workqueue); +} + /** * mmc_request_done - finish processing an MMC request * @host: MMC host which completed request @@ -369,22 +390,6 @@ } /* - * Allocate a new MMC card - */ -struct mmc_card *mmc_alloc_card(struct mmc_host *host) -{ - struct mmc_card *card; - - card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL); - if (!card) - return ERR_PTR(-ENOMEM); - - mmc_init_card(card, host); - - return card; -} - -/* * Apply power to the MMC stack. This is a two-stage process. * First, we enable power to the card without the clock running. * We then wait a bit for the power to stabilise. Finally, @@ -512,7 +517,7 @@ EXPORT_SYMBOL(mmc_detect_change); -static void mmc_rescan(struct work_struct *work) +void mmc_rescan(struct work_struct *work) { struct mmc_host *host = container_of(work, struct mmc_host, detect.work); @@ -561,69 +566,13 @@ } } - -/** - * mmc_alloc_host - initialise the per-host structure. - * @extra: sizeof private data structure - * @dev: pointer to host device model structure - * - * Initialise the per-host structure. - */ -struct mmc_host *mmc_alloc_host(int extra, struct device *dev) -{ - struct mmc_host *host; - - host = mmc_alloc_host_sysfs(extra, dev); - if (host) { - spin_lock_init(&host->lock); - init_waitqueue_head(&host->wq); - INIT_DELAYED_WORK(&host->detect, mmc_rescan); - - /* - * By default, hosts do not support SGIO or large requests. - * They have to set these according to their abilities. - */ - host->max_hw_segs = 1; - host->max_phys_segs = 1; - host->max_seg_size = PAGE_CACHE_SIZE; - - host->max_req_size = PAGE_CACHE_SIZE; - host->max_blk_size = 512; - host->max_blk_count = PAGE_CACHE_SIZE / 512; - } - - return host; -} - -EXPORT_SYMBOL(mmc_alloc_host); - -/** - * mmc_add_host - initialise host hardware - * @host: mmc host - */ -int mmc_add_host(struct mmc_host *host) +void mmc_start_host(struct mmc_host *host) { - int ret; - - ret = mmc_add_host_sysfs(host); - if (ret == 0) { mmc_power_off(host); mmc_detect_change(host, 0); - } - - return ret; } -EXPORT_SYMBOL(mmc_add_host); - -/** - * mmc_remove_host - remove host hardware - * @host: mmc host - * - * Unregister and remove all cards associated with this host, - * and power down the MMC bus. - */ -void mmc_remove_host(struct mmc_host *host) +void mmc_stop_host(struct mmc_host *host) { #ifdef CONFIG_MMC_DEBUG unsigned long flags; @@ -648,24 +597,8 @@ BUG_ON(host->card); mmc_power_off(host); - mmc_remove_host_sysfs(host); } -EXPORT_SYMBOL(mmc_remove_host); - -/** - * mmc_free_host - free the host structure - * @host: mmc host - * - * Free the host once all references to it have been dropped. - */ -void mmc_free_host(struct mmc_host *host) -{ - mmc_free_host_sysfs(host); -} - -EXPORT_SYMBOL(mmc_free_host); - #ifdef CONFIG_PM /** @@ -726,4 +659,47 @@ #endif +static int __init mmc_init(void) +{ + int ret; + + workqueue = create_singlethread_workqueue("kmmcd"); + if (!workqueue) + return -ENOMEM; + + ret = mmc_register_bus(); + if (ret) + goto destroy_workqueue; + + ret = mmc_register_host_class(); + if (ret) + goto unregister_bus; + + ret = mmc_register_key_type(); + if (ret) + goto unregister_host_class; + + return 0; + +unregister_host_class: + mmc_unregister_host_class(); +unregister_bus: + mmc_unregister_bus(); +destroy_workqueue: + destroy_workqueue(workqueue); + + return ret; +} + +static void __exit mmc_exit(void) +{ + mmc_unregister_key_type(); + mmc_unregister_host_class(); + mmc_unregister_bus(); + destroy_workqueue(workqueue); +} + +module_init(mmc_init); +module_exit(mmc_exit); + MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.h linux-2.6.22-591/drivers/mmc/core/core.h --- linux-2.6.22-570/drivers/mmc/core/core.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/core.h 2007-12-21 15:36:12.000000000 -0500 @@ -54,8 +54,6 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr); void mmc_set_timing(struct mmc_host *host, unsigned int timing); -struct mmc_card *mmc_alloc_card(struct mmc_host *host); - static inline void mmc_delay(unsigned int ms) { if (ms < 1000 / HZ) { @@ -66,5 +64,9 @@ } } +void mmc_rescan(struct work_struct *work); +void mmc_start_host(struct mmc_host *host); +void mmc_stop_host(struct mmc_host *host); + #endif diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.c linux-2.6.22-591/drivers/mmc/core/host.c --- linux-2.6.22-570/drivers/mmc/core/host.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/mmc/core/host.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,156 @@ +/* + * linux/drivers/mmc/core/host.c + * + * Copyright (C) 2003 Russell King, All Rights Reserved. + * Copyright (C) 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMC host class device management + */ + +#include +#include +#include +#include + +#include + +#include "core.h" +#include "host.h" + +#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) + +static void mmc_host_classdev_release(struct device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + kfree(host); +} + +static struct class mmc_host_class = { + .name = "mmc_host", + .dev_release = mmc_host_classdev_release, +}; + +int mmc_register_host_class(void) +{ + return class_register(&mmc_host_class); +} + +void mmc_unregister_host_class(void) +{ + class_unregister(&mmc_host_class); +} + +static DEFINE_IDR(mmc_host_idr); +static DEFINE_SPINLOCK(mmc_host_lock); + +/** + * mmc_alloc_host - initialise the per-host structure. + * @extra: sizeof private data structure + * @dev: pointer to host device model structure + * + * Initialise the per-host structure. + */ +struct mmc_host *mmc_alloc_host(int extra, struct device *dev) +{ + struct mmc_host *host; + + host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); + if (!host) + return NULL; + + memset(host, 0, sizeof(struct mmc_host) + extra); + + host->parent = dev; + host->class_dev.parent = dev; + host->class_dev.class = &mmc_host_class; + device_initialize(&host->class_dev); + + spin_lock_init(&host->lock); + init_waitqueue_head(&host->wq); + INIT_DELAYED_WORK(&host->detect, mmc_rescan); + + /* + * By default, hosts do not support SGIO or large requests. + * They have to set these according to their abilities. + */ + host->max_hw_segs = 1; + host->max_phys_segs = 1; + host->max_seg_size = PAGE_CACHE_SIZE; + + host->max_req_size = PAGE_CACHE_SIZE; + host->max_blk_size = 512; + host->max_blk_count = PAGE_CACHE_SIZE / 512; + + return host; +} + +EXPORT_SYMBOL(mmc_alloc_host); + +/** + * mmc_add_host - initialise host hardware + * @host: mmc host + */ +int mmc_add_host(struct mmc_host *host) +{ + int err; + + if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmc_host_lock); + err = idr_get_new(&mmc_host_idr, host, &host->index); + spin_unlock(&mmc_host_lock); + if (err) + return err; + + snprintf(host->class_dev.bus_id, BUS_ID_SIZE, + "mmc%d", host->index); + + err = device_add(&host->class_dev); + if (err) + return err; + + mmc_start_host(host); + + return 0; +} + +EXPORT_SYMBOL(mmc_add_host); + +/** + * mmc_remove_host - remove host hardware + * @host: mmc host + * + * Unregister and remove all cards associated with this host, + * and power down the MMC bus. + */ +void mmc_remove_host(struct mmc_host *host) +{ + mmc_stop_host(host); + + device_del(&host->class_dev); + + spin_lock(&mmc_host_lock); + idr_remove(&mmc_host_idr, host->index); + spin_unlock(&mmc_host_lock); +} + +EXPORT_SYMBOL(mmc_remove_host); + +/** + * mmc_free_host - free the host structure + * @host: mmc host + * + * Free the host once all references to it have been dropped. + */ +void mmc_free_host(struct mmc_host *host) +{ + put_device(&host->class_dev); +} + +EXPORT_SYMBOL(mmc_free_host); + diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.h linux-2.6.22-591/drivers/mmc/core/host.h --- linux-2.6.22-570/drivers/mmc/core/host.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/mmc/core/host.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,18 @@ +/* + * linux/drivers/mmc/core/host.h + * + * Copyright (C) 2003 Russell King, All Rights Reserved. + * Copyright 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _MMC_CORE_HOST_H +#define _MMC_CORE_HOST_H + +int mmc_register_host_class(void); +void mmc_unregister_host_class(void); + +#endif + diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.c linux-2.6.22-591/drivers/mmc/core/lock.c --- linux-2.6.22-570/drivers/mmc/core/lock.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/mmc/core/lock.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,199 @@ +/* + * linux/drivers/mmc/core/lock.h + * + * Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved. + * Copyright 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMC password key handling. + */ + +#include +#include +#include + +#include +#include +#include + +#include "sysfs.h" +#include "mmc_ops.h" +#include "lock.h" + +#define MMC_KEYLEN_MAXBYTES 32 + +#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) + +static int mmc_key_instantiate(struct key *key, const void *data, size_t datalen) +{ + struct mmc_key_payload *mpayload; + int ret; + + ret = -EINVAL; + if (datalen <= 0 || datalen > MMC_KEYLEN_MAXBYTES || !data) { + pr_debug("Invalid data\n"); + goto error; + } + + ret = key_payload_reserve(key, datalen); + if (ret < 0) { + pr_debug("ret = %d\n", ret); + goto error; + } + + ret = -ENOMEM; + mpayload = kmalloc(sizeof(*mpayload) + datalen, GFP_KERNEL); + if (!mpayload) { + pr_debug("Unable to allocate mpayload structure\n"); + goto error; + } + mpayload->datalen = datalen; + memcpy(mpayload->data, data, datalen); + + rcu_assign_pointer(key->payload.data, mpayload); + + /* ret = 0 if there is no error */ + ret = 0; + +error: + return ret; +} + +static int mmc_key_match(const struct key *key, const void *description) +{ + return strcmp(key->description, description) == 0; +} + +/* + * dispose of the data dangling from the corpse of a mmc key + */ +static void mmc_key_destroy(struct key *key) +{ + struct mmc_key_payload *mpayload = key->payload.data; + + kfree(mpayload); +} + +static struct key_type mmc_key_type = { + .name = "mmc", + .def_datalen = MMC_KEYLEN_MAXBYTES, + .instantiate = mmc_key_instantiate, + .match = mmc_key_match, + .destroy = mmc_key_destroy, +}; + +int mmc_register_key_type(void) +{ + return register_key_type(&mmc_key_type); +} + +void mmc_unregister_key_type(void) +{ + unregister_key_type(&mmc_key_type); +} + +static ssize_t +mmc_lockable_show(struct device *dev, struct device_attribute *att, char *buf) +{ + struct mmc_card *card = dev_to_mmc_card(dev); + + return sprintf(buf, "%slocked\n", mmc_card_locked(card) ? "" : "un"); +} + +/* + * implement MMC password functions: force erase, remove password, change + * password, unlock card and assign password. + */ +static ssize_t +mmc_lockable_store(struct device *dev, struct device_attribute *att, + const char *data, size_t len) +{ + struct mmc_card *card = dev_to_mmc_card(dev); + int ret; + struct key *mmc_key; + + if(!mmc_card_lockable(card)) + return -EINVAL; + + mmc_claim_host(card->host); + + ret = -EINVAL; + if (mmc_card_locked(card) && !strncmp(data, "erase", 5)) { + /* forced erase only works while card is locked */ + mmc_lock_unlock(card, NULL, MMC_LOCK_MODE_ERASE); + ret = len; + } else if (!mmc_card_locked(card) && !strncmp(data, "remove", 6)) { + /* remove password only works while card is unlocked */ + mmc_key = request_key(&mmc_key_type, "mmc:key", "remove"); + + if (!IS_ERR(mmc_key)) { + ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_CLR_PWD); + if (!ret) + ret = len; + } else + dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key)); + } else if (!mmc_card_locked(card) && ((!strncmp(data, "assign", 6)) || + (!strncmp(data, "change", 6)))) { + /* assign or change */ + if(!(strncmp(data, "assign", 6))) + mmc_key = request_key(&mmc_key_type, "mmc:key", "assign"); + else + mmc_key = request_key(&mmc_key_type, "mmc:key", "change"); + + if (!IS_ERR(mmc_key)) { + ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_SET_PWD); + if (!ret) + ret = len; + } else + dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key)); + } else if (mmc_card_locked(card) && !strncmp(data, "unlock", 6)) { + /* unlock */ + mmc_key = request_key(&mmc_key_type, "mmc:key", "unlock"); + if (!IS_ERR(mmc_key)) { + ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_UNLOCK); + if (ret) { + dev_dbg(&card->dev, "Wrong password\n"); + ret = -EINVAL; + } + else { + mmc_release_host(card->host); + device_release_driver(dev); + ret = device_attach(dev); + if(!ret) + return -EINVAL; + else + return len; + } + } else + dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key)); + } + + mmc_release_host(card->host); + return ret; +} + +static struct device_attribute mmc_dev_attr_lockable[] = { + __ATTR(lockable, S_IWUSR | S_IRUGO, + mmc_lockable_show, mmc_lockable_store), + __ATTR_NULL, +}; + +int mmc_lock_add_sysfs(struct mmc_card *card) +{ + if (!mmc_card_lockable(card)) + return 0; + + return mmc_add_attrs(card, mmc_dev_attr_lockable); +} + +void mmc_lock_remove_sysfs(struct mmc_card *card) +{ + if (!mmc_card_lockable(card)) + return; + + mmc_remove_attrs(card, mmc_dev_attr_lockable); +} + diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.h linux-2.6.22-591/drivers/mmc/core/lock.h --- linux-2.6.22-570/drivers/mmc/core/lock.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/mmc/core/lock.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,51 @@ +/* + * linux/drivers/mmc/core/lock.h + * + * Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved. + * Copyright 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _MMC_CORE_LOCK_H +#define _MMC_CORE_LOCK_H + +#ifdef CONFIG_MMC_PASSWORDS + +/* core-internal data */ +struct mmc_key_payload { + struct rcu_head rcu; /* RCU destructor */ + unsigned short datalen; /* length of this data */ + char data[0]; /* actual data */ +}; + +int mmc_register_key_type(void); +void mmc_unregister_key_type(void); + +int mmc_lock_add_sysfs(struct mmc_card *card); +void mmc_lock_remove_sysfs(struct mmc_card *card); + +#else + +static inline int mmc_register_key_type(void) +{ + return 0; +} + +static inline void mmc_unregister_key_type(void) +{ +} + +static inline int mmc_lock_add_sysfs(struct mmc_card *card) +{ + return 0; +} + +static inline void mmc_lock_remove_sysfs(struct mmc_card *card) +{ +} + +#endif + +#endif diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc.c linux-2.6.22-591/drivers/mmc/core/mmc.c --- linux-2.6.22-570/drivers/mmc/core/mmc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/mmc.c 2007-12-21 15:36:12.000000000 -0500 @@ -18,6 +18,8 @@ #include "core.h" #include "sysfs.h" +#include "bus.h" +#include "lock.h" #include "mmc_ops.h" static const unsigned int tran_exp[] = { @@ -230,19 +232,74 @@ return err; } +MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], + card->raw_cid[2], card->raw_cid[3]); +MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], + card->raw_csd[2], card->raw_csd[3]); +MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year); +MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev); +MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev); +MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid); +MMC_ATTR_FN(name, "%s\n", card->cid.prod_name); +MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid); +MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial); + +static struct device_attribute mmc_dev_attrs[] = { + MMC_ATTR_RO(cid), + MMC_ATTR_RO(csd), + MMC_ATTR_RO(date), + MMC_ATTR_RO(fwrev), + MMC_ATTR_RO(hwrev), + MMC_ATTR_RO(manfid), + MMC_ATTR_RO(name), + MMC_ATTR_RO(oemid), + MMC_ATTR_RO(serial), + __ATTR_NULL, +}; + +/* + * Adds sysfs entries as relevant. + */ +static int mmc_sysfs_add(struct mmc_card *card) +{ + int ret; + + ret = mmc_add_attrs(card, mmc_dev_attrs); + if (ret < 0) + return ret; + + ret = mmc_lock_add_sysfs(card); + if (ret < 0) { + mmc_remove_attrs(card, mmc_dev_attrs); + return ret; + } + + return 0; +} + +/* + * Removes the sysfs entries added by mmc_sysfs_add(). + */ +static void mmc_sysfs_remove(struct mmc_card *card) +{ + mmc_lock_remove_sysfs(card); + mmc_remove_attrs(card, mmc_dev_attrs); +} + /* * Handle the detection and initialisation of a card. * * In the case of a resume, "curcard" will contain the card * we're trying to reinitialise. */ -static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, +static int mmc_init_card(struct mmc_host *host, u32 ocr, struct mmc_card *oldcard) { struct mmc_card *card; int err; u32 cid[4]; unsigned int max_dtr; + u32 status; BUG_ON(!host); BUG_ON(!host->claimed); @@ -294,6 +351,15 @@ mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); + /* + * Check if card is locked. + */ + err = mmc_send_status(card, &status); + if (err != MMC_ERR_NONE) + goto free_card; + if (status & R1_CARD_IS_LOCKED) + mmc_card_set_locked(card); + if (!oldcard) { /* * Fetch CSD from card. @@ -389,6 +455,8 @@ BUG_ON(!host); BUG_ON(!host->card); + mmc_sysfs_remove(host->card); + mmc_remove_card(host->card); host->card = NULL; } @@ -413,8 +481,7 @@ mmc_release_host(host); if (err != MMC_ERR_NONE) { - mmc_remove_card(host->card); - host->card = NULL; + mmc_remove(host); mmc_claim_host(host); mmc_detach_bus(host); @@ -434,7 +501,7 @@ mmc_claim_host(host); mmc_deselect_cards(host); - host->card->state &= ~MMC_STATE_HIGHSPEED; + host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED); mmc_release_host(host); } @@ -453,11 +520,9 @@ mmc_claim_host(host); - err = mmc_sd_init_card(host, host->ocr, host->card); + err = mmc_init_card(host, host->ocr, host->card); if (err != MMC_ERR_NONE) { - mmc_remove_card(host->card); - host->card = NULL; - + mmc_remove(host); mmc_detach_bus(host); } @@ -512,13 +577,17 @@ /* * Detect and init the card. */ - err = mmc_sd_init_card(host, host->ocr, NULL); + err = mmc_init_card(host, host->ocr, NULL); if (err != MMC_ERR_NONE) goto err; mmc_release_host(host); - err = mmc_register_card(host->card); + err = mmc_add_card(host->card); + if (err) + goto reclaim_host; + + err = mmc_sysfs_add(host->card); if (err) goto reclaim_host; diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.c linux-2.6.22-591/drivers/mmc/core/mmc_ops.c --- linux-2.6.22-570/drivers/mmc/core/mmc_ops.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/mmc_ops.c 2007-12-21 15:36:12.000000000 -0500 @@ -2,6 +2,8 @@ * linux/drivers/mmc/mmc_ops.h * * Copyright 2006-2007 Pierre Ossman + * MMC password protection (C) 2006 Instituto Nokia de Tecnologia (INdT), + * All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -12,12 +14,14 @@ #include #include #include +#include #include #include #include #include "core.h" +#include "lock.h" #include "mmc_ops.h" static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card) @@ -274,3 +278,114 @@ return MMC_ERR_NONE; } +#ifdef CONFIG_MMC_PASSWORDS + +int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode) +{ + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_data data; + struct scatterlist sg; + struct mmc_key_payload *mpayload; + unsigned long erase_timeout; + int err, data_size; + u8 *data_buf; + + mpayload = NULL; + data_size = 1; + if (!(mode & MMC_LOCK_MODE_ERASE)) { + mpayload = rcu_dereference(key->payload.data); + data_size = 2 + mpayload->datalen; + } + + data_buf = kmalloc(data_size, GFP_KERNEL); + if (!data_buf) + return -ENOMEM; + memset(data_buf, 0, data_size); + + data_buf[0] |= mode; + if (mode & MMC_LOCK_MODE_UNLOCK) + data_buf[0] &= ~MMC_LOCK_MODE_UNLOCK; + + if (!(mode & MMC_LOCK_MODE_ERASE)) { + data_buf[1] = mpayload->datalen; + memcpy(data_buf + 2, mpayload->data, mpayload->datalen); + } + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = MMC_SET_BLOCKLEN; + cmd.arg = data_size; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES); + if (err != MMC_ERR_NONE) + goto out; + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = MMC_LOCK_UNLOCK; + cmd.arg = 0; + cmd.flags = MMC_RSP_R1B | MMC_CMD_ADTC; + + memset(&data, 0, sizeof(struct mmc_data)); + + mmc_set_data_timeout(&data, card, 1); + + data.blksz = data_size; + data.blocks = 1; + data.flags = MMC_DATA_WRITE; + data.sg = &sg; + data.sg_len = 1; + + memset(&mrq, 0, sizeof(struct mmc_request)); + + mrq.cmd = &cmd; + mrq.data = &data; + + sg_init_one(&sg, data_buf, data_size); + err = mmc_wait_for_req(card->host, &mrq); + if (err != MMC_ERR_NONE) + goto out; + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = MMC_SEND_STATUS; + cmd.arg = card->rca << 16; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + + /* set timeout for forced erase operation to 3 min. (see MMC spec) */ + erase_timeout = jiffies + 180 * HZ; + do { + /* we cannot use "retries" here because the + * R1_LOCK_UNLOCK_FAILED bit is cleared by subsequent reads to + * the status register, hiding the error condition */ + err = mmc_wait_for_cmd(card->host, &cmd, 0); + if (err != MMC_ERR_NONE) + break; + /* the other modes don't need timeout checking */ + if (!(mode & MMC_LOCK_MODE_ERASE)) + continue; + if (time_after(jiffies, erase_timeout)) { + dev_dbg(&card->dev, "forced erase timed out\n"); + err = MMC_ERR_TIMEOUT; + break; + } + } while (!(cmd.resp[0] & R1_READY_FOR_DATA)); + if (cmd.resp[0] & R1_LOCK_UNLOCK_FAILED) { + dev_dbg(&card->dev, "LOCK_UNLOCK operation failed\n"); + err = MMC_ERR_FAILED; + } + + if (cmd.resp[0] & R1_CARD_IS_LOCKED) + mmc_card_set_locked(card); + else + card->state &= ~MMC_STATE_LOCKED; + +out: + kfree(data_buf); + + return err; +} + +#endif /* CONFIG_MMC_PASSWORDS */ + diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.h linux-2.6.22-591/drivers/mmc/core/mmc_ops.h --- linux-2.6.22-570/drivers/mmc/core/mmc_ops.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/mmc_ops.h 2007-12-21 15:36:12.000000000 -0500 @@ -12,6 +12,8 @@ #ifndef _MMC_MMC_OPS_H #define _MMC_MMC_OPS_H +struct key; + int mmc_select_card(struct mmc_card *card); int mmc_deselect_cards(struct mmc_host *host); int mmc_go_idle(struct mmc_host *host); @@ -22,6 +24,7 @@ int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd); int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value); int mmc_send_status(struct mmc_card *card, u32 *status); +int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode); #endif diff -Nurb linux-2.6.22-570/drivers/mmc/core/sd.c linux-2.6.22-591/drivers/mmc/core/sd.c --- linux-2.6.22-570/drivers/mmc/core/sd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/sd.c 2007-12-21 15:36:12.000000000 -0500 @@ -19,11 +19,11 @@ #include "core.h" #include "sysfs.h" +#include "bus.h" +#include "lock.h" #include "mmc_ops.h" #include "sd_ops.h" -#include "core.h" - static const unsigned int tran_exp[] = { 10000, 100000, 1000000, 10000000, 0, 0, 0, 0 @@ -280,6 +280,62 @@ return err; } +MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], + card->raw_cid[2], card->raw_cid[3]); +MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], + card->raw_csd[2], card->raw_csd[3]); +MMC_ATTR_FN(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]); +MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year); +MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev); +MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev); +MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid); +MMC_ATTR_FN(name, "%s\n", card->cid.prod_name); +MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid); +MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial); + +static struct device_attribute mmc_sd_dev_attrs[] = { + MMC_ATTR_RO(cid), + MMC_ATTR_RO(csd), + MMC_ATTR_RO(scr), + MMC_ATTR_RO(date), + MMC_ATTR_RO(fwrev), + MMC_ATTR_RO(hwrev), + MMC_ATTR_RO(manfid), + MMC_ATTR_RO(name), + MMC_ATTR_RO(oemid), + MMC_ATTR_RO(serial), + __ATTR_NULL, +}; + +/* + * Adds sysfs entries as relevant. + */ +static int mmc_sd_sysfs_add(struct mmc_card *card) +{ + int ret; + + ret = mmc_add_attrs(card, mmc_sd_dev_attrs); + if (ret < 0) + return ret; + + ret = mmc_lock_add_sysfs(card); + if (ret < 0) { + mmc_remove_attrs(card, mmc_sd_dev_attrs); + return ret; + } + + return 0; +} + +/* + * Removes the sysfs entries added by mmc_sysfs_add(). + */ +static void mmc_sd_sysfs_remove(struct mmc_card *card) +{ + mmc_lock_remove_sysfs(card); + mmc_remove_attrs(card, mmc_sd_dev_attrs); +} + /* * Handle the detection and initialisation of a card. * @@ -293,6 +349,7 @@ int err; u32 cid[4]; unsigned int max_dtr; + u32 status; BUG_ON(!host); BUG_ON(!host->claimed); @@ -352,6 +409,15 @@ mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); + /* + * Check if card is locked. + */ + err = mmc_send_status(card, &status); + if (err != MMC_ERR_NONE) + goto free_card; + if (status & R1_CARD_IS_LOCKED) + mmc_card_set_locked(card); + if (!oldcard) { /* * Fetch CSD from card. @@ -463,6 +529,8 @@ BUG_ON(!host); BUG_ON(!host->card); + mmc_sd_sysfs_remove(host->card); + mmc_remove_card(host->card); host->card = NULL; } @@ -487,8 +555,7 @@ mmc_release_host(host); if (err != MMC_ERR_NONE) { - mmc_remove_card(host->card); - host->card = NULL; + mmc_sd_remove(host); mmc_claim_host(host); mmc_detach_bus(host); @@ -508,7 +575,7 @@ mmc_claim_host(host); mmc_deselect_cards(host); - host->card->state &= ~MMC_STATE_HIGHSPEED; + host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED); mmc_release_host(host); } @@ -529,9 +596,7 @@ err = mmc_sd_init_card(host, host->ocr, host->card); if (err != MMC_ERR_NONE) { - mmc_remove_card(host->card); - host->card = NULL; - + mmc_sd_remove(host); mmc_detach_bus(host); } @@ -599,7 +664,11 @@ mmc_release_host(host); - err = mmc_register_card(host->card); + err = mmc_add_card(host->card); + if (err) + goto reclaim_host; + + err = mmc_sd_sysfs_add(host->card); if (err) goto reclaim_host; diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.c linux-2.6.22-591/drivers/mmc/core/sysfs.c --- linux-2.6.22-570/drivers/mmc/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/sysfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -2,6 +2,7 @@ * linux/drivers/mmc/core/sysfs.c * * Copyright (C) 2003 Russell King, All Rights Reserved. + * Copyright 2007 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,352 +10,34 @@ * * MMC sysfs/driver model support. */ -#include -#include #include -#include -#include #include -#include #include "sysfs.h" -#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) -#define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) -#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) - -#define MMC_ATTR(name, fmt, args...) \ -static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ -{ \ - struct mmc_card *card = dev_to_mmc_card(dev); \ - return sprintf(buf, fmt, args); \ -} - -MMC_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], - card->raw_cid[2], card->raw_cid[3]); -MMC_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], - card->raw_csd[2], card->raw_csd[3]); -MMC_ATTR(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]); -MMC_ATTR(date, "%02d/%04d\n", card->cid.month, card->cid.year); -MMC_ATTR(fwrev, "0x%x\n", card->cid.fwrev); -MMC_ATTR(hwrev, "0x%x\n", card->cid.hwrev); -MMC_ATTR(manfid, "0x%06x\n", card->cid.manfid); -MMC_ATTR(name, "%s\n", card->cid.prod_name); -MMC_ATTR(oemid, "0x%04x\n", card->cid.oemid); -MMC_ATTR(serial, "0x%08x\n", card->cid.serial); - -#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL) - -static struct device_attribute mmc_dev_attrs[] = { - MMC_ATTR_RO(cid), - MMC_ATTR_RO(csd), - MMC_ATTR_RO(date), - MMC_ATTR_RO(fwrev), - MMC_ATTR_RO(hwrev), - MMC_ATTR_RO(manfid), - MMC_ATTR_RO(name), - MMC_ATTR_RO(oemid), - MMC_ATTR_RO(serial), - __ATTR_NULL -}; - -static struct device_attribute mmc_dev_attr_scr = MMC_ATTR_RO(scr); - - -static void mmc_release_card(struct device *dev) -{ - struct mmc_card *card = dev_to_mmc_card(dev); - - kfree(card); -} - -/* - * This currently matches any MMC driver to any MMC card - drivers - * themselves make the decision whether to drive this card in their - * probe method. - */ -static int mmc_bus_match(struct device *dev, struct device_driver *drv) -{ - return 1; -} - -static int -mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf, - int buf_size) -{ - struct mmc_card *card = dev_to_mmc_card(dev); - char ccc[13]; - int retval = 0, i = 0, length = 0; - -#define add_env(fmt,val) do { \ - retval = add_uevent_var(envp, num_envp, &i, \ - buf, buf_size, &length, \ - fmt, val); \ - if (retval) \ - return retval; \ -} while (0); - - for (i = 0; i < 12; i++) - ccc[i] = card->csd.cmdclass & (1 << i) ? '1' : '0'; - ccc[12] = '\0'; - - add_env("MMC_CCC=%s", ccc); - add_env("MMC_MANFID=%06x", card->cid.manfid); - add_env("MMC_NAME=%s", mmc_card_name(card)); - add_env("MMC_OEMID=%04x", card->cid.oemid); -#undef add_env - envp[i] = NULL; - - return 0; -} - -static int mmc_bus_suspend(struct device *dev, pm_message_t state) +int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs) { - struct mmc_driver *drv = to_mmc_driver(dev->driver); - struct mmc_card *card = dev_to_mmc_card(dev); - int ret = 0; - - if (dev->driver && drv->suspend) - ret = drv->suspend(card, state); - return ret; -} + int error = 0; + int i; -static int mmc_bus_resume(struct device *dev) -{ - struct mmc_driver *drv = to_mmc_driver(dev->driver); - struct mmc_card *card = dev_to_mmc_card(dev); - int ret = 0; - - if (dev->driver && drv->resume) - ret = drv->resume(card); - return ret; -} - -static int mmc_bus_probe(struct device *dev) -{ - struct mmc_driver *drv = to_mmc_driver(dev->driver); - struct mmc_card *card = dev_to_mmc_card(dev); - - return drv->probe(card); -} - -static int mmc_bus_remove(struct device *dev) -{ - struct mmc_driver *drv = to_mmc_driver(dev->driver); - struct mmc_card *card = dev_to_mmc_card(dev); - - drv->remove(card); - - return 0; -} - -static struct bus_type mmc_bus_type = { - .name = "mmc", - .dev_attrs = mmc_dev_attrs, - .match = mmc_bus_match, - .uevent = mmc_bus_uevent, - .probe = mmc_bus_probe, - .remove = mmc_bus_remove, - .suspend = mmc_bus_suspend, - .resume = mmc_bus_resume, -}; - -/** - * mmc_register_driver - register a media driver - * @drv: MMC media driver - */ -int mmc_register_driver(struct mmc_driver *drv) -{ - drv->drv.bus = &mmc_bus_type; - return driver_register(&drv->drv); -} - -EXPORT_SYMBOL(mmc_register_driver); - -/** - * mmc_unregister_driver - unregister a media driver - * @drv: MMC media driver - */ -void mmc_unregister_driver(struct mmc_driver *drv) -{ - drv->drv.bus = &mmc_bus_type; - driver_unregister(&drv->drv); -} - -EXPORT_SYMBOL(mmc_unregister_driver); - - -/* - * Internal function. Initialise a MMC card structure. - */ -void mmc_init_card(struct mmc_card *card, struct mmc_host *host) -{ - memset(card, 0, sizeof(struct mmc_card)); - card->host = host; - device_initialize(&card->dev); - card->dev.parent = mmc_classdev(host); - card->dev.bus = &mmc_bus_type; - card->dev.release = mmc_release_card; -} - -/* - * Internal function. Register a new MMC card with the driver model. - */ -int mmc_register_card(struct mmc_card *card) -{ - int ret; - - snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), - "%s:%04x", mmc_hostname(card->host), card->rca); - - ret = device_add(&card->dev); - if (ret == 0) { - if (mmc_card_sd(card)) { - ret = device_create_file(&card->dev, &mmc_dev_attr_scr); - if (ret) - device_del(&card->dev); - } + for (i = 0; attr_name(attrs[i]); i++) { + error = device_create_file(&card->dev, &attrs[i]); + if (error) { + while (--i >= 0) + device_remove_file(&card->dev, &attrs[i]); + break; } - if (ret == 0) - mmc_card_set_present(card); - return ret; -} - -/* - * Internal function. Unregister a new MMC card with the - * driver model, and (eventually) free it. - */ -void mmc_remove_card(struct mmc_card *card) -{ - if (mmc_card_present(card)) { - if (mmc_card_sd(card)) - device_remove_file(&card->dev, &mmc_dev_attr_scr); - - device_del(&card->dev); } - put_device(&card->dev); -} - - -static void mmc_host_classdev_release(struct device *dev) -{ - struct mmc_host *host = cls_dev_to_mmc_host(dev); - kfree(host); -} - -static struct class mmc_host_class = { - .name = "mmc_host", - .dev_release = mmc_host_classdev_release, -}; - -static DEFINE_IDR(mmc_host_idr); -static DEFINE_SPINLOCK(mmc_host_lock); - -/* - * Internal function. Allocate a new MMC host. - */ -struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev) -{ - struct mmc_host *host; - - host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); - if (host) { - memset(host, 0, sizeof(struct mmc_host) + extra); - - host->parent = dev; - host->class_dev.parent = dev; - host->class_dev.class = &mmc_host_class; - device_initialize(&host->class_dev); - } - - return host; -} - -/* - * Internal function. Register a new MMC host with the MMC class. - */ -int mmc_add_host_sysfs(struct mmc_host *host) -{ - int err; - - if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(&mmc_host_lock); - err = idr_get_new(&mmc_host_idr, host, &host->index); - spin_unlock(&mmc_host_lock); - if (err) - return err; - - snprintf(host->class_dev.bus_id, BUS_ID_SIZE, - "mmc%d", host->index); - - return device_add(&host->class_dev); -} - -/* - * Internal function. Unregister a MMC host with the MMC class. - */ -void mmc_remove_host_sysfs(struct mmc_host *host) -{ - device_del(&host->class_dev); - - spin_lock(&mmc_host_lock); - idr_remove(&mmc_host_idr, host->index); - spin_unlock(&mmc_host_lock); + return error; } -/* - * Internal function. Free a MMC host. - */ -void mmc_free_host_sysfs(struct mmc_host *host) +void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs) { - put_device(&host->class_dev); -} + int i; -static struct workqueue_struct *workqueue; - -/* - * Internal function. Schedule delayed work in the MMC work queue. - */ -int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay) -{ - return queue_delayed_work(workqueue, work, delay); -} - -/* - * Internal function. Flush all scheduled work from the MMC work queue. - */ -void mmc_flush_scheduled_work(void) -{ - flush_workqueue(workqueue); -} - -static int __init mmc_init(void) -{ - int ret; - - workqueue = create_singlethread_workqueue("kmmcd"); - if (!workqueue) - return -ENOMEM; - - ret = bus_register(&mmc_bus_type); - if (ret == 0) { - ret = class_register(&mmc_host_class); - if (ret) - bus_unregister(&mmc_bus_type); - } - return ret; -} - -static void __exit mmc_exit(void) -{ - class_unregister(&mmc_host_class); - bus_unregister(&mmc_bus_type); - destroy_workqueue(workqueue); + for (i = 0; attr_name(attrs[i]); i++) + device_remove_file(&card->dev, &attrs[i]); } -module_init(mmc_init); -module_exit(mmc_exit); diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.h linux-2.6.22-591/drivers/mmc/core/sysfs.h --- linux-2.6.22-570/drivers/mmc/core/sysfs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/core/sysfs.h 2007-12-21 15:36:12.000000000 -0500 @@ -11,17 +11,16 @@ #ifndef _MMC_CORE_SYSFS_H #define _MMC_CORE_SYSFS_H -void mmc_init_card(struct mmc_card *card, struct mmc_host *host); -int mmc_register_card(struct mmc_card *card); -void mmc_remove_card(struct mmc_card *card); +#define MMC_ATTR_FN(name, fmt, args...) \ +static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ +{ \ + struct mmc_card *card = container_of(dev, struct mmc_card, dev);\ + return sprintf(buf, fmt, args); \ +} -struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev); -int mmc_add_host_sysfs(struct mmc_host *host); -void mmc_remove_host_sysfs(struct mmc_host *host); -void mmc_free_host_sysfs(struct mmc_host *host); +#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL) -int mmc_schedule_work(struct work_struct *work); -int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay); -void mmc_flush_scheduled_work(void); +int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs); +void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs); #endif diff -Nurb linux-2.6.22-570/drivers/mmc/host/sdhci.c linux-2.6.22-591/drivers/mmc/host/sdhci.c --- linux-2.6.22-570/drivers/mmc/host/sdhci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mmc/host/sdhci.c 2007-12-21 15:36:12.000000000 -0500 @@ -70,6 +70,14 @@ .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE, }, + { + .vendor = PCI_VENDOR_ID_ENE, + .device = PCI_DEVICE_ID_ENE_CB712_SD_2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE, + }, + { /* Generic SD host controller */ PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) }, diff -Nurb linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c linux-2.6.22-591/drivers/mtd/mtd_blkdevs.c --- linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mtd/mtd_blkdevs.c 2007-12-21 15:36:12.000000000 -0500 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,7 @@ struct request_queue *rq = tr->blkcore_priv->rq; /* we might get involved when memory gets low, so use PF_MEMALLOC */ - current->flags |= PF_MEMALLOC | PF_NOFREEZE; + current->flags |= PF_MEMALLOC; spin_lock_irq(rq->queue_lock); while (!kthread_should_stop()) { diff -Nurb linux-2.6.22-570/drivers/mtd/ubi/wl.c linux-2.6.22-591/drivers/mtd/ubi/wl.c --- linux-2.6.22-570/drivers/mtd/ubi/wl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/mtd/ubi/wl.c 2007-12-21 15:36:12.000000000 -0500 @@ -1346,6 +1346,7 @@ ubi_msg("background thread \"%s\" started, PID %d", ubi->bgt_name, current->pid); + set_freezable(); for (;;) { int err; diff -Nurb linux-2.6.22-570/drivers/net/3c523.c linux-2.6.22-591/drivers/net/3c523.c --- linux-2.6.22-570/drivers/net/3c523.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/3c523.c 2007-12-21 15:36:12.000000000 -0500 @@ -990,7 +990,7 @@ if (skb != NULL) { skb_reserve(skb, 2); /* 16 byte alignment */ skb_put(skb,totlen); - eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0); + skb_copy_to_linear_data(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/7990.c linux-2.6.22-591/drivers/net/7990.c --- linux-2.6.22-570/drivers/net/7990.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/7990.c 2007-12-21 15:36:12.000000000 -0500 @@ -333,9 +333,9 @@ skb_reserve (skb, 2); /* 16 byte align */ skb_put (skb, len); /* make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)&(ib->rx_buf [lp->rx_new][0]), - len, 0); + len); skb->protocol = eth_type_trans (skb, dev); netif_rx (skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/8139too.c linux-2.6.22-591/drivers/net/8139too.c --- linux-2.6.22-570/drivers/net/8139too.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/8139too.c 2007-12-21 15:36:12.000000000 -0500 @@ -2017,7 +2017,7 @@ #if RX_BUF_IDX == 3 wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); #else - eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); + skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size); #endif skb_put (skb, pkt_size); diff -Nurb linux-2.6.22-570/drivers/net/Kconfig linux-2.6.22-591/drivers/net/Kconfig --- linux-2.6.22-570/drivers/net/Kconfig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/net/Kconfig 2007-12-21 15:36:14.000000000 -0500 @@ -119,6 +119,20 @@ If you don't know what to use this for, you don't need it. +config ETUN + tristate "Ethernet tunnel device driver support" + depends on SYSFS + ---help--- + ETUN provices a pair of network devices that can be used for + configuring interesting topolgies. What one devices transmits + the other receives and vice versa. The link level framing + is ethernet for wide compatibility with network stacks. + + To compile this driver as a module, choose M here: the module + will be called etun. + + If you don't know what to use this for, you don't need it. + config NET_SB1000 tristate "General Instruments Surfboard 1000" depends on PNP @@ -2555,6 +2569,18 @@ source "drivers/s390/net/Kconfig" +config XEN_NETDEV_FRONTEND + tristate "Xen network device frontend driver" + depends on XEN + default y + help + The network device frontend driver allows the kernel to + access network devices exported exported by a virtual + machine containing a physical network device driver. The + frontend driver is intended for unprivileged guest domains; + if you are compiling a kernel for a Xen guest, you almost + certainly want to enable this. + config ISERIES_VETH tristate "iSeries Virtual Ethernet driver support" depends on PPC_ISERIES diff -Nurb linux-2.6.22-570/drivers/net/Makefile linux-2.6.22-591/drivers/net/Makefile --- linux-2.6.22-570/drivers/net/Makefile 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/net/Makefile 2007-12-21 15:36:14.000000000 -0500 @@ -186,6 +186,7 @@ obj-$(CONFIG_MACMACE) += macmace.o obj-$(CONFIG_MAC89x0) += mac89x0.o obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_ETUN) += etun.o obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_DL2K) += dl2k.o obj-$(CONFIG_R8169) += r8169.o @@ -224,7 +225,10 @@ obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/ obj-$(CONFIG_NETCONSOLE) += netconsole.o +obj-$(CONFIG_KGDBOE) += kgdboe.o obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_NETXEN_NIC) += netxen/ +obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o + diff -Nurb linux-2.6.22-570/drivers/net/a2065.c linux-2.6.22-591/drivers/net/a2065.c --- linux-2.6.22-570/drivers/net/a2065.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/a2065.c 2007-12-21 15:36:12.000000000 -0500 @@ -322,9 +322,9 @@ skb_reserve (skb, 2); /* 16 byte align */ skb_put (skb, len); /* make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)&(ib->rx_buf [lp->rx_new][0]), - len, 0); + len); skb->protocol = eth_type_trans (skb, dev); netif_rx (skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/ariadne.c linux-2.6.22-591/drivers/net/ariadne.c --- linux-2.6.22-570/drivers/net/ariadne.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/ariadne.c 2007-12-21 15:36:12.000000000 -0500 @@ -746,7 +746,7 @@ skb_reserve(skb,2); /* 16 byte align */ skb_put(skb,pkt_len); /* Make room */ - eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0); + skb_copy_to_linear_data(skb, (char *)priv->rx_buff[entry], pkt_len); skb->protocol=eth_type_trans(skb,dev); #if 0 printk(KERN_DEBUG "RX pkt type 0x%04x from ", diff -Nurb linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c linux-2.6.22-591/drivers/net/arm/ep93xx_eth.c --- linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/arm/ep93xx_eth.c 2007-12-21 15:36:12.000000000 -0500 @@ -258,7 +258,7 @@ skb_reserve(skb, 2); dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr, length, DMA_FROM_DEVICE); - eth_copy_and_sum(skb, ep->rx_buf[entry], length, 0); + skb_copy_to_linear_data(skb, ep->rx_buf[entry], length); skb_put(skb, length); skb->protocol = eth_type_trans(skb, dev); diff -Nurb linux-2.6.22-570/drivers/net/au1000_eth.c linux-2.6.22-591/drivers/net/au1000_eth.c --- linux-2.6.22-570/drivers/net/au1000_eth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/au1000_eth.c 2007-12-21 15:36:12.000000000 -0500 @@ -1205,8 +1205,8 @@ continue; } skb_reserve(skb, 2); /* 16 byte IP header align */ - eth_copy_and_sum(skb, - (unsigned char *)pDB->vaddr, frmlen, 0); + skb_copy_to_linear_data(skb, + (unsigned char *)pDB->vaddr, frmlen); skb_put(skb, frmlen); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); /* pass the packet to upper layers */ diff -Nurb linux-2.6.22-570/drivers/net/bnx2.c linux-2.6.22-591/drivers/net/bnx2.c --- linux-2.6.22-570/drivers/net/bnx2.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/bnx2.c 2007-12-21 15:36:12.000000000 -0500 @@ -6490,10 +6490,10 @@ memcpy(dev->perm_addr, bp->mac_addr, 6); bp->name = board_info[ent->driver_data].name; - if (CHIP_NUM(bp) == CHIP_NUM_5709) - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; - else dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; + if (CHIP_NUM(bp) == CHIP_NUM_5709) + dev->features |= NETIF_F_IPV6_CSUM; + #ifdef BCM_VLAN dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; #endif diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_3ad.c linux-2.6.22-591/drivers/net/bonding/bond_3ad.c --- linux-2.6.22-570/drivers/net/bonding/bond_3ad.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/bonding/bond_3ad.c 2007-12-21 15:36:14.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" @@ -2448,6 +2449,9 @@ struct slave *slave = NULL; int ret = NET_RX_DROP; + if (dev->nd_net != &init_net) + goto out; + if (!(dev->flags & IFF_MASTER)) goto out; diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_alb.c linux-2.6.22-591/drivers/net/bonding/bond_alb.c --- linux-2.6.22-570/drivers/net/bonding/bond_alb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/bonding/bond_alb.c 2007-12-21 15:36:14.000000000 -0500 @@ -345,6 +345,9 @@ struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; + if (bond_dev->nd_net != &init_net) + goto out; + if (!(bond_dev->flags & IFF_MASTER)) goto out; diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_main.c linux-2.6.22-591/drivers/net/bonding/bond_main.c --- linux-2.6.22-570/drivers/net/bonding/bond_main.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/net/bonding/bond_main.c 2007-12-21 15:36:14.000000000 -0500 @@ -75,6 +75,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -2376,6 +2377,7 @@ * can tag the ARP with the proper VLAN tag. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fl4_dst = targets[i]; fl.fl4_tos = RTO_ONLINK; @@ -2485,6 +2487,9 @@ unsigned char *arp_ptr; u32 sip, tip; + if (dev->nd_net != &init_net) + goto out; + if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) goto out; @@ -3172,7 +3177,7 @@ { int len = strlen(DRV_NAME); - for (bond_proc_dir = proc_net->subdir; bond_proc_dir; + for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir; bond_proc_dir = bond_proc_dir->next) { if ((bond_proc_dir->namelen == len) && !memcmp(bond_proc_dir->name, DRV_NAME, len)) { @@ -3181,7 +3186,7 @@ } if (!bond_proc_dir) { - bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); + bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); if (bond_proc_dir) { bond_proc_dir->owner = THIS_MODULE; } else { @@ -3216,7 +3221,7 @@ bond_proc_dir->owner = NULL; } } else { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); bond_proc_dir = NULL; } } @@ -3323,6 +3328,9 @@ { struct net_device *event_dev = (struct net_device *)ptr; + if (event_dev->nd_net != &init_net) + return NOTIFY_DONE; + dprintk("event_dev: %s, event: %lx\n", (event_dev ? event_dev->name : "None"), event); @@ -3740,7 +3748,7 @@ } down_write(&(bonding_rwsem)); - slave_dev = dev_get_by_name(ifr->ifr_slave); + slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); dprintk("slave_dev=%p: \n", slave_dev); diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_sysfs.c linux-2.6.22-591/drivers/net/bonding/bond_sysfs.c --- linux-2.6.22-570/drivers/net/bonding/bond_sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/bonding/bond_sysfs.c 2007-12-21 15:36:14.000000000 -0500 @@ -35,6 +35,7 @@ #include #include #include +#include /* #define BONDING_DEBUG 1 */ #include "bonding.h" @@ -299,7 +300,7 @@ read_unlock_bh(&bond->lock); printk(KERN_INFO DRV_NAME ": %s: Adding slave %s.\n", bond->dev->name, ifname); - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(&init_net, ifname); if (!dev) { printk(KERN_INFO DRV_NAME ": %s: Interface %s does not exist!\n", diff -Nurb linux-2.6.22-570/drivers/net/dl2k.c linux-2.6.22-591/drivers/net/dl2k.c --- linux-2.6.22-570/drivers/net/dl2k.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/dl2k.c 2007-12-21 15:36:12.000000000 -0500 @@ -866,9 +866,9 @@ PCI_DMA_FROMDEVICE); /* 16 byte align the IP header */ skb_reserve (skb, 2); - eth_copy_and_sum (skb, + skb_copy_to_linear_data (skb, np->rx_skbuff[entry]->data, - pkt_len, 0); + pkt_len); skb_put (skb, pkt_len); pci_dma_sync_single_for_device(np->pdev, desc->fraginfo & diff -Nurb linux-2.6.22-570/drivers/net/dummy.c linux-2.6.22-591/drivers/net/dummy.c --- linux-2.6.22-570/drivers/net/dummy.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/dummy.c 2007-12-21 15:36:12.000000000 -0500 @@ -34,11 +34,17 @@ #include #include #include +#include +#include + +struct dummy_priv { + struct net_device *dev; + struct list_head list; +}; static int numdummies = 1; static int dummy_xmit(struct sk_buff *skb, struct net_device *dev); -static struct net_device_stats *dummy_get_stats(struct net_device *dev); static int dummy_set_address(struct net_device *dev, void *p) { @@ -56,13 +62,13 @@ { } -static void __init dummy_setup(struct net_device *dev) +static void dummy_setup(struct net_device *dev) { /* Initialize the device structure. */ - dev->get_stats = dummy_get_stats; dev->hard_start_xmit = dummy_xmit; dev->set_multicast_list = set_multicast_list; dev->set_mac_address = dummy_set_address; + dev->destructor = free_netdev; /* Fill in device structure with ethernet-generic values. */ ether_setup(dev); @@ -76,77 +82,114 @@ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = netdev_priv(dev); - - stats->tx_packets++; - stats->tx_bytes+=skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; dev_kfree_skb(skb); return 0; } -static struct net_device_stats *dummy_get_stats(struct net_device *dev) +static LIST_HEAD(dummies); + +static int dummy_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - return netdev_priv(dev); + struct dummy_priv *priv = netdev_priv(dev); + int err; + + err = register_netdevice(dev); + if (err < 0) + return err; + + priv->dev = dev; + list_add_tail(&priv->list, &dummies); + return 0; +} + +static void dummy_dellink(struct net_device *dev) +{ + struct dummy_priv *priv = netdev_priv(dev); + + list_del(&priv->list); + unregister_netdevice(dev); } -static struct net_device **dummies; +static struct rtnl_link_ops dummy_link_ops __read_mostly = { + .kind = "dummy", + .priv_size = sizeof(struct dummy_priv), + .setup = dummy_setup, + .newlink = dummy_newlink, + .dellink = dummy_dellink, +}; /* Number of dummy devices to be set up by this module. */ module_param(numdummies, int, 0); MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices"); -static int __init dummy_init_one(int index) +static int __init dummy_init_one(void) { struct net_device *dev_dummy; + struct dummy_priv *priv; int err; - dev_dummy = alloc_netdev(sizeof(struct net_device_stats), - "dummy%d", dummy_setup); + dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d", + dummy_setup); if (!dev_dummy) return -ENOMEM; - if ((err = register_netdev(dev_dummy))) { - free_netdev(dev_dummy); - dev_dummy = NULL; - } else { - dummies[index] = dev_dummy; - } + err = dev_alloc_name(dev_dummy, dev_dummy->name); + if (err < 0) + goto err; + + dev_dummy->rtnl_link_ops = &dummy_link_ops; + err = register_netdevice(dev_dummy); + if (err < 0) + goto err; + + priv = netdev_priv(dev_dummy); + priv->dev = dev_dummy; + list_add_tail(&priv->list, &dummies); + return 0; +err: + free_netdev(dev_dummy); return err; } -static void dummy_free_one(int index) -{ - unregister_netdev(dummies[index]); - free_netdev(dummies[index]); -} - static int __init dummy_init_module(void) { + struct dummy_priv *priv, *next; int i, err = 0; - dummies = kmalloc(numdummies * sizeof(void *), GFP_KERNEL); - if (!dummies) - return -ENOMEM; + + rtnl_lock(); + err = __rtnl_link_register(&dummy_link_ops); + for (i = 0; i < numdummies && !err; i++) - err = dummy_init_one(i); - if (err) { - i--; - while (--i >= 0) - dummy_free_one(i); + err = dummy_init_one(); + if (err < 0) { + list_for_each_entry_safe(priv, next, &dummies, list) + dummy_dellink(priv->dev); + __rtnl_link_unregister(&dummy_link_ops); } + rtnl_unlock(); + return err; } static void __exit dummy_cleanup_module(void) { - int i; - for (i = 0; i < numdummies; i++) - dummy_free_one(i); - kfree(dummies); + struct dummy_priv *priv, *next; + + rtnl_lock(); + list_for_each_entry_safe(priv, next, &dummies, list) + dummy_dellink(priv->dev); + + __rtnl_link_unregister(&dummy_link_ops); + rtnl_unlock(); } module_init(dummy_init_module); module_exit(dummy_cleanup_module); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("dummy"); diff -Nurb linux-2.6.22-570/drivers/net/eepro100.c linux-2.6.22-591/drivers/net/eepro100.c --- linux-2.6.22-570/drivers/net/eepro100.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/eepro100.c 2007-12-21 15:36:12.000000000 -0500 @@ -1801,7 +1801,7 @@ #if 1 || USE_IP_CSUM /* Packet is in one chunk -- we can copy + cksum. */ - eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0); + skb_copy_to_linear_data(skb, sp->rx_skbuff[entry]->data, pkt_len); skb_put(skb, pkt_len); #else skb_copy_from_linear_data(sp->rx_skbuff[entry], diff -Nurb linux-2.6.22-570/drivers/net/epic100.c linux-2.6.22-591/drivers/net/epic100.c --- linux-2.6.22-570/drivers/net/epic100.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/epic100.c 2007-12-21 15:36:12.000000000 -0500 @@ -1201,7 +1201,7 @@ ep->rx_ring[entry].bufaddr, ep->rx_buf_sz, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, ep->rx_skbuff[entry]->data, pkt_len, 0); + skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len); skb_put(skb, pkt_len); pci_dma_sync_single_for_device(ep->pci_dev, ep->rx_ring[entry].bufaddr, diff -Nurb linux-2.6.22-570/drivers/net/eql.c linux-2.6.22-591/drivers/net/eql.c --- linux-2.6.22-570/drivers/net/eql.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/eql.c 2007-12-21 15:36:14.000000000 -0500 @@ -116,6 +116,7 @@ #include #include #include +#include #include #include @@ -412,7 +413,7 @@ if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; - slave_dev = dev_get_by_name(srq.slave_name); + slave_dev = dev_get_by_name(&init_net, srq.slave_name); if (slave_dev) { if ((master_dev->flags & IFF_UP) == IFF_UP) { /* slave is not a master & not already a slave: */ @@ -460,7 +461,7 @@ if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; - slave_dev = dev_get_by_name(srq.slave_name); + slave_dev = dev_get_by_name(&init_net, srq.slave_name); ret = -EINVAL; if (slave_dev) { spin_lock_bh(&eql->queue.lock); @@ -493,7 +494,7 @@ if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; - slave_dev = dev_get_by_name(sc.slave_name); + slave_dev = dev_get_by_name(&init_net, sc.slave_name); if (!slave_dev) return -ENODEV; @@ -528,7 +529,7 @@ if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; - slave_dev = dev_get_by_name(sc.slave_name); + slave_dev = dev_get_by_name(&init_net, sc.slave_name); if (!slave_dev) return -ENODEV; diff -Nurb linux-2.6.22-570/drivers/net/etun.c linux-2.6.22-591/drivers/net/etun.c --- linux-2.6.22-570/drivers/net/etun.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/net/etun.c 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,489 @@ +/* + * ETUN - Universal ETUN device driver. + * Copyright (C) 2006 Linux Networx + * + */ + +#define DRV_NAME "etun" +#define DRV_VERSION "1.0" +#define DRV_DESCRIPTION "Ethernet pseudo tunnel device driver" +#define DRV_COPYRIGHT "(C) 2007 Linux Networx" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Device cheksum strategy. + * + * etun is designed to a be a pair of virutal devices + * connecting two network stack instances. + * + * Typically it will either be used with ethernet bridging or + * it will be used to route packets between the two stacks. + * + * The only checksum offloading I can do is to completely + * skip the checksumming step all together. + * + * When used for ethernet bridging I don't believe any + * checksum off loading is safe. + * - If my source is an external interface the checksum may be + * invalid so I don't want to report I have already checked it. + * - If my destination is an external interface I don't want to put + * a packet on the wire with someone computing the checksum. + * + * When used for routing between two stacks checksums should + * be as unnecessary as they are on the loopback device. + * + * So by default I am safe and disable checksumming and + * other advanced features like SG and TSO. + * + * However because I think these features could be useful + * I provide the ethtool functions to and enable/disable + * them at runtime. + * + * If you think you can correctly enable these go ahead. + * For checksums both the transmitter and the receiver must + * agree before the are actually disabled. + */ + +#define ETUN_NUM_STATS 1 +static struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[ETUN_NUM_STATS] = { + { "partner_ifindex" }, +}; + +struct etun_info { + struct net_device *rx_dev; + unsigned ip_summed; + struct net_device_stats stats; + struct list_head list; + struct net_device *dev; +}; + +/* + * I have to hold the rtnl_lock during device delete. + * So I use the rtnl_lock to protect my list manipulations + * as well. Crude but simple. + */ +static LIST_HEAD(etun_list); + +/* + * The higher levels take care of making this non-reentrant (it's + * called with bh's disabled). + */ +static int etun_xmit(struct sk_buff *skb, struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + struct etun_info *rx_info = rx_dev->priv; + + tx_info->stats.tx_packets++; + tx_info->stats.tx_bytes += skb->len; + + /* Drop the skb state that was needed to get here */ + skb_orphan(skb); + if (skb->dst) + skb->dst = dst_pop(skb->dst); /* Allow for smart routing */ + + /* Switch to the receiving device */ + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, rx_dev); + skb->dev = rx_dev; + skb->ip_summed = CHECKSUM_NONE; + + /* If both halves agree no checksum is needed */ + if (tx_dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = rx_info->ip_summed; + + rx_dev->last_rx = jiffies; + rx_info->stats.rx_packets++; + rx_info->stats.rx_bytes += skb->len; + netif_rx(skb); + + return 0; +} + +static struct net_device_stats *etun_get_stats(struct net_device *dev) +{ + struct etun_info *info = dev->priv; + return &info->stats; +} + +/* ethtool interface */ +static int etun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = 0; + cmd->advertising = 0; + cmd->speed = SPEED_10000; /* Memory is fast! */ + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_TP; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + return 0; +} + +static void etun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->fw_version, "N/A"); +} + +static void etun_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch(stringset) { + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + case ETH_SS_TEST: + default: + break; + } +} + +static int etun_get_stats_count(struct net_device *dev) +{ + return ETUN_NUM_STATS; +} + +static void etun_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct etun_info *info = dev->priv; + + data[0] = info->rx_dev->ifindex; +} + +static u32 etun_get_rx_csum(struct net_device *dev) +{ + struct etun_info *info = dev->priv; + return info->ip_summed == CHECKSUM_UNNECESSARY; +} + +static int etun_set_rx_csum(struct net_device *dev, u32 data) +{ + struct etun_info *info = dev->priv; + + info->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; + + return 0; +} + +static u32 etun_get_tx_csum(struct net_device *dev) +{ + return (dev->features & NETIF_F_NO_CSUM) != 0; +} + +static int etun_set_tx_csum(struct net_device *dev, u32 data) +{ + dev->features &= ~NETIF_F_NO_CSUM; + if (data) + dev->features |= NETIF_F_NO_CSUM; + + return 0; +} + +static struct ethtool_ops etun_ethtool_ops = { + .get_settings = etun_get_settings, + .get_drvinfo = etun_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_rx_csum = etun_get_rx_csum, + .set_rx_csum = etun_set_rx_csum, + .get_tx_csum = etun_get_tx_csum, + .set_tx_csum = etun_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#if 0 /* Does just setting the bit successfuly emulate tso? */ + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +#endif + .get_strings = etun_get_strings, + .get_stats_count = etun_get_stats_count, + .get_ethtool_stats = etun_get_ethtool_stats, + .get_perm_addr = ethtool_op_get_perm_addr, +}; + +static int etun_open(struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + /* If we attempt to bring up etun in the small window before + * it is connected to it's partner error. + */ + if (!rx_dev) + return -ENOTCONN; + if (rx_dev->flags & IFF_UP) { + netif_carrier_on(tx_dev); + netif_carrier_on(rx_dev); + } + netif_start_queue(tx_dev); + return 0; +} + +static int etun_stop(struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + netif_stop_queue(tx_dev); + if (netif_carrier_ok(tx_dev)) { + netif_carrier_off(tx_dev); + netif_carrier_off(rx_dev); + } + return 0; +} + +static int etun_change_mtu(struct net_device *dev, int new_mtu) +{ + /* Don't allow ridiculously small mtus */ + if (new_mtu < (ETH_ZLEN - ETH_HLEN)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static void etun_set_multicast_list(struct net_device *dev) +{ + /* Nothing sane I can do here */ + return; +} + +static int etun_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + return -EOPNOTSUPP; +} + +/* Only allow letters and numbers in an etun device name */ +static int is_valid_name(const char *name) +{ + const char *ptr; + for (ptr = name; *ptr; ptr++) { + if (!isalnum(*ptr)) + return 0; + } + return 1; +} + +static struct net_device *etun_alloc(struct net *net, const char *name) +{ + struct net_device *dev; + struct etun_info *info; + int err; + + if (!name || !is_valid_name(name)) + return ERR_PTR(-EINVAL); + + dev = alloc_netdev(sizeof(struct etun_info), name, ether_setup); + if (!dev) + return ERR_PTR(-ENOMEM); + + info = dev->priv; + info->dev = dev; + dev->nd_net = net; + + random_ether_addr(dev->dev_addr); + dev->tx_queue_len = 0; /* A queue is silly for a loopback device */ + dev->hard_start_xmit = etun_xmit; + dev->get_stats = etun_get_stats; + dev->open = etun_open; + dev->stop = etun_stop; + dev->set_multicast_list = etun_set_multicast_list; + dev->do_ioctl = etun_ioctl; + dev->features = NETIF_F_FRAGLIST + | NETIF_F_HIGHDMA + | NETIF_F_LLTX; + dev->flags = IFF_BROADCAST | IFF_MULTICAST |IFF_PROMISC; + dev->ethtool_ops = &etun_ethtool_ops; + dev->destructor = free_netdev; + dev->change_mtu = etun_change_mtu; + err = register_netdev(dev); + if (err) { + free_netdev(dev); + dev = ERR_PTR(err); + goto out; + } + netif_carrier_off(dev); +out: + return dev; +} + +static int etun_alloc_pair(struct net *net, const char *name0, const char *name1) +{ + struct net_device *dev0, *dev1; + struct etun_info *info0, *info1; + + dev0 = etun_alloc(net, name0); + if (IS_ERR(dev0)) { + return PTR_ERR(dev0); + } + info0 = dev0->priv; + + dev1 = etun_alloc(net, name1); + if (IS_ERR(dev1)) { + unregister_netdev(dev0); + return PTR_ERR(dev1); + } + info1 = dev1->priv; + + dev_hold(dev0); + dev_hold(dev1); + info0->rx_dev = dev1; + info1->rx_dev = dev0; + + /* Only place one member of the pair on the list + * so I don't confuse list_for_each_entry_safe, + * by deleting two list entries at once. + */ + rtnl_lock(); + list_add(&info0->list, &etun_list); + INIT_LIST_HEAD(&info1->list); + rtnl_unlock(); + + return 0; +} + +static int etun_unregister_pair(struct net_device *dev0) +{ + struct etun_info *info0, *info1; + struct net_device *dev1; + + ASSERT_RTNL(); + + if (!dev0) + return -ENODEV; + + /* Ensure my network devices are not passing packets */ + dev_close(dev0); + info0 = dev0->priv; + dev1 = info0->rx_dev; + info1 = dev1->priv; + dev_close(dev1); + + /* Drop the cross device references */ + dev_put(dev0); + dev_put(dev1); + + /* Remove from the etun list */ + if (!list_empty(&info0->list)) + list_del_init(&info0->list); + if (!list_empty(&info1->list)) + list_del_init(&info1->list); + + unregister_netdevice(dev0); + unregister_netdevice(dev1); + return 0; +} + +static int etun_noget(char *buffer, struct kernel_param *kp) +{ + return 0; +} + +static int etun_newif(const char *val, struct kernel_param *kp) +{ + char name0[IFNAMSIZ], name1[IFNAMSIZ]; + const char *mid; + int len, len0, len1; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Avoid frustration by removing trailing whitespace */ + len = strlen(val); + while (isspace(val[len - 1])) + len--; + + /* Split the string into 2 names */ + mid = memchr(val, ',', len); + if (!mid) + return -EINVAL; + + /* Get the first device name */ + len0 = mid - val; + if (len0 > sizeof(name0) - 1) + len = sizeof(name0) - 1; + strncpy(name0, val, len0); + name0[len0] = '\0'; + + /* And the second device name */ + len1 = len - (len0 + 1); + if (len1 > sizeof(name1) - 1) + len1 = sizeof(name1) - 1; + strncpy(name1, mid + 1, len1); + name1[len1] = '\0'; + + return etun_alloc_pair(current->nsproxy->net_ns, name0, name1); +} + +static int etun_delif(const char *val, struct kernel_param *kp) +{ + char name[IFNAMSIZ]; + int len; + struct net_device *dev; + int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Avoid frustration by removing trailing whitespace */ + len = strlen(val); + while (isspace(val[len - 1])) + len--; + + /* Get the device name */ + if (len > sizeof(name) - 1) + return -EINVAL; + strncpy(name, val, len); + name[len] = '\0'; + + /* Double check I don't have strange characters in my device name */ + if (!is_valid_name(name)) + return -EINVAL; + + rtnl_lock(); + err = -ENODEV; + dev = __dev_get_by_name(current->nsproxy->net_ns, name); + err = etun_unregister_pair(dev); + rtnl_unlock(); + return err; +} + +static int __init etun_init(void) +{ + printk(KERN_INFO "etun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); + printk(KERN_INFO "etun: %s\n", DRV_COPYRIGHT); + + return 0; +} + +static void etun_cleanup(void) +{ + struct etun_info *info, *tmp; + rtnl_lock(); + list_for_each_entry_safe(info, tmp, &etun_list, list) { + etun_unregister_pair(info->dev); + } + rtnl_unlock(); +} + +module_param_call(newif, etun_newif, etun_noget, NULL, S_IWUSR); +module_param_call(delif, etun_delif, etun_noget, NULL, S_IWUSR); +module_init(etun_init); +module_exit(etun_cleanup); +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("Eric Biederman "); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/drivers/net/fealnx.c linux-2.6.22-591/drivers/net/fealnx.c --- linux-2.6.22-570/drivers/net/fealnx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/fealnx.c 2007-12-21 15:36:12.000000000 -0500 @@ -1727,8 +1727,8 @@ /* Call copy + cksum if available. */ #if ! defined(__alpha__) - eth_copy_and_sum(skb, - np->cur_rx->skbuff->data, pkt_len, 0); + skb_copy_to_linear_data(skb, + np->cur_rx->skbuff->data, pkt_len); skb_put(skb, pkt_len); #else memcpy(skb_put(skb, pkt_len), diff -Nurb linux-2.6.22-570/drivers/net/fec.c linux-2.6.22-591/drivers/net/fec.c --- linux-2.6.22-570/drivers/net/fec.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/fec.c 2007-12-21 15:36:12.000000000 -0500 @@ -648,7 +648,7 @@ fep->stats.rx_dropped++; } else { skb_put(skb,pkt_len-4); /* Make room */ - eth_copy_and_sum(skb, data, pkt_len-4, 0); + skb_copy_to_linear_data(skb, data, pkt_len-4); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); } diff -Nurb linux-2.6.22-570/drivers/net/hamachi.c linux-2.6.22-591/drivers/net/hamachi.c --- linux-2.6.22-570/drivers/net/hamachi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/hamachi.c 2007-12-21 15:36:12.000000000 -0500 @@ -1575,8 +1575,8 @@ PCI_DMA_FROMDEVICE); /* Call copy + cksum if available. */ #if 1 || USE_IP_COPYSUM - eth_copy_and_sum(skb, - hmp->rx_skbuff[entry]->data, pkt_len, 0); + skb_copy_to_linear_data(skb, + hmp->rx_skbuff[entry]->data, pkt_len); skb_put(skb, pkt_len); #else memcpy(skb_put(skb, pkt_len), hmp->rx_ring_dma diff -Nurb linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c linux-2.6.22-591/drivers/net/hamradio/baycom_epp.c --- linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/hamradio/baycom_epp.c 2007-12-21 15:36:12.000000000 -0500 @@ -320,7 +320,7 @@ sprintf(portarg, "%ld", bc->pdev->port->base); printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); - return call_usermodehelper(eppconfig_path, argv, envp, 1); + return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC); } /* ---------------------------------------------------------------------- */ diff -Nurb linux-2.6.22-570/drivers/net/hamradio/bpqether.c linux-2.6.22-591/drivers/net/hamradio/bpqether.c --- linux-2.6.22-570/drivers/net/hamradio/bpqether.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/hamradio/bpqether.c 2007-12-21 15:36:14.000000000 -0500 @@ -83,6 +83,7 @@ #include #include +#include #include @@ -172,6 +173,9 @@ struct ethhdr *eth; struct bpqdev *bpq; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -559,6 +563,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!dev_is_ethdev(dev)) return NOTIFY_DONE; @@ -594,7 +601,7 @@ static int __init bpq_init_driver(void) { #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create("bpqether", S_IRUGO, &bpq_info_fops)) { + if (!proc_net_fops_create(&init_net, "bpqether", S_IRUGO, &bpq_info_fops)) { printk(KERN_ERR "bpq: cannot create /proc/net/bpqether entry.\n"); return -ENOENT; @@ -618,7 +625,7 @@ unregister_netdevice_notifier(&bpq_dev_notifier); - proc_net_remove("bpqether"); + proc_net_remove(&init_net, "bpqether"); rtnl_lock(); while (!list_empty(&bpq_devices)) { diff -Nurb linux-2.6.22-570/drivers/net/hamradio/scc.c linux-2.6.22-591/drivers/net/hamradio/scc.c --- linux-2.6.22-570/drivers/net/hamradio/scc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/hamradio/scc.c 2007-12-21 15:36:14.000000000 -0500 @@ -174,6 +174,7 @@ #include #include +#include #include #include @@ -2114,7 +2115,7 @@ } rtnl_unlock(); - proc_net_fops_create("z8530drv", 0, &scc_net_seq_fops); + proc_net_fops_create(&init_net, "z8530drv", 0, &scc_net_seq_fops); return 0; } @@ -2169,7 +2170,7 @@ if (Vector_Latch) release_region(Vector_Latch, 1); - proc_net_remove("z8530drv"); + proc_net_remove(&init_net, "z8530drv"); } MODULE_AUTHOR("Joerg Reuter "); diff -Nurb linux-2.6.22-570/drivers/net/hamradio/yam.c linux-2.6.22-591/drivers/net/hamradio/yam.c --- linux-2.6.22-570/drivers/net/hamradio/yam.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/hamradio/yam.c 2007-12-21 15:36:14.000000000 -0500 @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -1142,7 +1143,7 @@ yam_timer.expires = jiffies + HZ / 100; add_timer(&yam_timer); - proc_net_fops_create("yam", S_IRUGO, &yam_info_fops); + proc_net_fops_create(&init_net, "yam", S_IRUGO, &yam_info_fops); return 0; error: while (--i >= 0) { @@ -1174,7 +1175,7 @@ kfree(p); } - proc_net_remove("yam"); + proc_net_remove(&init_net, "yam"); } /* --------------------------------------------------------------------- */ diff -Nurb linux-2.6.22-570/drivers/net/ibmveth.c linux-2.6.22-591/drivers/net/ibmveth.c --- linux-2.6.22-570/drivers/net/ibmveth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/ibmveth.c 2007-12-21 15:36:14.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -97,7 +98,7 @@ static struct kobj_type ktype_veth_pool; #ifdef CONFIG_PROC_FS -#define IBMVETH_PROC_DIR "net/ibmveth" +#define IBMVETH_PROC_DIR "ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; #endif @@ -1093,7 +1094,7 @@ #ifdef CONFIG_PROC_FS static void ibmveth_proc_register_driver(void) { - ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL); + ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, init_net.proc_net); if (ibmveth_proc_dir) { SET_MODULE_OWNER(ibmveth_proc_dir); } @@ -1101,7 +1102,7 @@ static void ibmveth_proc_unregister_driver(void) { - remove_proc_entry(IBMVETH_PROC_DIR, NULL); + remove_proc_entry(IBMVETH_PROC_DIR, init_net.proc_net); } static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos) @@ -1337,7 +1338,7 @@ #define ATTR(_name, _mode) \ struct attribute veth_##_name##_attr = { \ - .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \ + .name = __stringify(_name), .mode = _mode, \ }; static ATTR(active, 0644); diff -Nurb linux-2.6.22-570/drivers/net/ifb.c linux-2.6.22-591/drivers/net/ifb.c --- linux-2.6.22-570/drivers/net/ifb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/ifb.c 2007-12-21 15:36:12.000000000 -0500 @@ -33,12 +33,15 @@ #include #include #include +#include #include #define TX_TIMEOUT (2*HZ) #define TX_Q_LIMIT 32 struct ifb_private { + struct list_head list; + struct net_device *dev; struct net_device_stats stats; struct tasklet_struct ifb_tasklet; int tasklet_pending; @@ -136,13 +139,14 @@ } -static void __init ifb_setup(struct net_device *dev) +static void ifb_setup(struct net_device *dev) { /* Initialize the device structure. */ dev->get_stats = ifb_get_stats; dev->hard_start_xmit = ifb_xmit; dev->open = &ifb_open; dev->stop = &ifb_close; + dev->destructor = free_netdev; /* Fill in device structure with ethernet-generic values. */ ether_setup(dev); @@ -197,7 +201,7 @@ return stats; } -static struct net_device **ifbs; +static LIST_HEAD(ifbs); /* Number of ifb devices to be set up by this module. */ module_param(numifbs, int, 0); @@ -226,9 +230,41 @@ return 0; } +static int ifb_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ifb_private *priv = netdev_priv(dev); + int err; + + err = register_netdevice(dev); + if (err < 0) + return err; + + priv->dev = dev; + list_add_tail(&priv->list, &ifbs); + return 0; +} + +static void ifb_dellink(struct net_device *dev) +{ + struct ifb_private *priv = netdev_priv(dev); + + list_del(&priv->list); + unregister_netdevice(dev); +} + +static struct rtnl_link_ops ifb_link_ops __read_mostly = { + .kind = "ifb", + .priv_size = sizeof(struct ifb_private), + .setup = ifb_setup, + .newlink = ifb_newlink, + .dellink = ifb_dellink, +}; + static int __init ifb_init_one(int index) { struct net_device *dev_ifb; + struct ifb_private *priv; int err; dev_ifb = alloc_netdev(sizeof(struct ifb_private), @@ -237,49 +273,59 @@ if (!dev_ifb) return -ENOMEM; - if ((err = register_netdev(dev_ifb))) { - free_netdev(dev_ifb); - dev_ifb = NULL; - } else { - ifbs[index] = dev_ifb; - } + err = dev_alloc_name(dev_ifb, dev_ifb->name); + if (err < 0) + goto err; + + dev_ifb->rtnl_link_ops = &ifb_link_ops; + err = register_netdevice(dev_ifb); + if (err < 0) + goto err; + + priv = netdev_priv(dev_ifb); + priv->dev = dev_ifb; + list_add_tail(&priv->list, &ifbs); + return 0; +err: + free_netdev(dev_ifb); return err; } -static void ifb_free_one(int index) -{ - unregister_netdev(ifbs[index]); - free_netdev(ifbs[index]); -} - static int __init ifb_init_module(void) { - int i, err = 0; - ifbs = kmalloc(numifbs * sizeof(void *), GFP_KERNEL); - if (!ifbs) - return -ENOMEM; + struct ifb_private *priv, *next; + int i, err; + + rtnl_lock(); + err = __rtnl_link_register(&ifb_link_ops); + for (i = 0; i < numifbs && !err; i++) err = ifb_init_one(i); if (err) { - i--; - while (--i >= 0) - ifb_free_one(i); + list_for_each_entry_safe(priv, next, &ifbs, list) + ifb_dellink(priv->dev); + __rtnl_link_unregister(&ifb_link_ops); } + rtnl_unlock(); return err; } static void __exit ifb_cleanup_module(void) { - int i; + struct ifb_private *priv, *next; + + rtnl_lock(); + list_for_each_entry_safe(priv, next, &ifbs, list) + ifb_dellink(priv->dev); - for (i = 0; i < numifbs; i++) - ifb_free_one(i); - kfree(ifbs); + __rtnl_link_unregister(&ifb_link_ops); + rtnl_unlock(); } module_init(ifb_init_module); module_exit(ifb_cleanup_module); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jamal Hadi Salim"); +MODULE_ALIAS_RTNL_LINK("ifb"); diff -Nurb linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c linux-2.6.22-591/drivers/net/ixp2000/ixpdev.c --- linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/ixp2000/ixpdev.c 2007-12-21 15:36:12.000000000 -0500 @@ -111,7 +111,7 @@ skb = dev_alloc_skb(desc->pkt_length + 2); if (likely(skb != NULL)) { skb_reserve(skb, 2); - eth_copy_and_sum(skb, buf, desc->pkt_length, 0); + skb_copy_to_linear_data(skb, buf, desc->pkt_length); skb_put(skb, desc->pkt_length); skb->protocol = eth_type_trans(skb, nds[desc->channel]); diff -Nurb linux-2.6.22-570/drivers/net/kgdboe.c linux-2.6.22-591/drivers/net/kgdboe.c --- linux-2.6.22-570/drivers/net/kgdboe.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/net/kgdboe.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,294 @@ +/* + * drivers/net/kgdboe.c + * + * A network interface for GDB. + * Based upon 'gdbserial' by David Grothe + * and Scott Foehner + * + * Maintainers: Amit S. Kale and + * Tom Rini + * + * 2004 (c) Amit S. Kale + * 2004-2005 (c) MontaVista Software, Inc. + * 2005 (c) Wind River Systems, Inc. + * + * Contributors at various stages not listed above: + * San Mehat , Robert Walsh , + * wangdi , Matt Mackall , + * Pavel Machek , Jason Wessel + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define IN_BUF_SIZE 512 /* power of 2, please */ +#define NOT_CONFIGURED_STRING "not_configured" +#define OUT_BUF_SIZE 30 /* We don't want to send too big of a packet. */ +#define MAX_KGDBOE_CONFIG_STR 256 + +static char in_buf[IN_BUF_SIZE], out_buf[OUT_BUF_SIZE]; +static int in_head, in_tail, out_count; +static atomic_t in_count; +/* 0 = unconfigured, 1 = netpoll options parsed, 2 = fully configured. */ +static int configured; +static struct kgdb_io local_kgdb_io_ops; +static int use_dynamic_mac; + +MODULE_DESCRIPTION("KGDB driver for network interfaces"); +MODULE_LICENSE("GPL"); +static char config[MAX_KGDBOE_CONFIG_STR] = NOT_CONFIGURED_STRING; +static struct kparam_string kps = { + .string = config, + .maxlen = MAX_KGDBOE_CONFIG_STR, +}; + +static void rx_hook(struct netpoll *np, int port, char *msg, int len, + struct sk_buff *skb) +{ + int i; + + np->remote_port = port; + + /* Copy the MAC address if we need to. */ + if (use_dynamic_mac) { + memcpy(np->remote_mac, eth_hdr(skb)->h_source, + sizeof(np->remote_mac)); + use_dynamic_mac = 0; + } + + /* + * This could be GDB trying to attach. But it could also be GDB + * finishing up a session, with kgdb_connected=0 but GDB sending + * an ACK for the final packet. To make sure we don't try and + * make a breakpoint when GDB is leaving, make sure that if + * !kgdb_connected the only len == 1 packet we allow is ^C. + */ + if (!kgdb_connected && (len != 1 || msg[0] == 3) && + !atomic_read(&kgdb_setting_breakpoint)) { + tasklet_schedule(&kgdb_tasklet_breakpoint); + } + + for (i = 0; i < len; i++) { + if (msg[i] == 3) + tasklet_schedule(&kgdb_tasklet_breakpoint); + + if (atomic_read(&in_count) >= IN_BUF_SIZE) { + /* buffer overflow, clear it */ + in_head = in_tail = 0; + atomic_set(&in_count, 0); + break; + } + in_buf[in_head++] = msg[i]; + in_head &= (IN_BUF_SIZE - 1); + atomic_inc(&in_count); + } +} + +static struct netpoll np = { + .dev_name = "eth0", + .name = "kgdboe", + .rx_hook = rx_hook, + .local_port = 6443, + .remote_port = 6442, + .remote_mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, +}; + +static void eth_pre_exception_handler(void) +{ + /* Increment the module count when the debugger is active */ + if (!kgdb_connected) + try_module_get(THIS_MODULE); + netpoll_set_trap(1); +} + +static void eth_post_exception_handler(void) +{ + /* decrement the module count when the debugger detaches */ + if (!kgdb_connected) + module_put(THIS_MODULE); + netpoll_set_trap(0); +} + +static int eth_get_char(void) +{ + int chr; + + while (atomic_read(&in_count) == 0) + netpoll_poll(&np); + + chr = in_buf[in_tail++]; + in_tail &= (IN_BUF_SIZE - 1); + atomic_dec(&in_count); + return chr; +} + +static void eth_flush_buf(void) +{ + if (out_count && np.dev) { + netpoll_send_udp(&np, out_buf, out_count); + memset(out_buf, 0, sizeof(out_buf)); + out_count = 0; + } +} + +static void eth_put_char(u8 chr) +{ + out_buf[out_count++] = chr; + if (out_count == OUT_BUF_SIZE) + eth_flush_buf(); +} + +static int option_setup(char *opt) +{ + char opt_scratch[MAX_KGDBOE_CONFIG_STR]; + + /* If we're being given a new configuration, copy it in. */ + if (opt != config) + strcpy(config, opt); + /* But work on a copy as netpoll_parse_options will eat it. */ + strcpy(opt_scratch, opt); + configured = !netpoll_parse_options(&np, opt_scratch); + + use_dynamic_mac = 1; + + return 0; +} +__setup("kgdboe=", option_setup); + +/* With our config string set by some means, configure kgdboe. */ +static int configure_kgdboe(void) +{ + /* Try out the string. */ + option_setup(config); + + if (!configured) { + printk(KERN_ERR "kgdboe: configuration incorrect - kgdboe not " + "loaded.\n"); + printk(KERN_ERR " Usage: kgdboe=[src-port]@[src-ip]/[dev]," + "[tgt-port]@/\n"); + return -EINVAL; + } + + /* Bring it up. */ + if (netpoll_setup(&np)) { + printk(KERN_ERR "kgdboe: netpoll_setup failed kgdboe failed\n"); + return -EINVAL; + } + + if (kgdb_register_io_module(&local_kgdb_io_ops)) { + netpoll_cleanup(&np); + return -EINVAL; + } + + configured = 2; + + return 0; +} + +static int init_kgdboe(void) +{ + int ret; + + /* Already done? */ + if (configured == 2) + return 0; + + /* OK, go ahead and do it. */ + ret = configure_kgdboe(); + + if (configured == 2) + printk(KERN_INFO "kgdboe: debugging over ethernet enabled\n"); + + return ret; +} + +static void cleanup_kgdboe(void) +{ + netpoll_cleanup(&np); + configured = 0; + kgdb_unregister_io_module(&local_kgdb_io_ops); +} + +static int param_set_kgdboe_var(const char *kmessage, struct kernel_param *kp) +{ + char kmessage_save[MAX_KGDBOE_CONFIG_STR]; + int msg_len = strlen(kmessage); + + if (msg_len + 1 > MAX_KGDBOE_CONFIG_STR) { + printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", + kp->name, MAX_KGDBOE_CONFIG_STR - 1); + return -ENOSPC; + } + + if (kgdb_connected) { + printk(KERN_ERR "kgdboe: Cannot reconfigure while KGDB is " + "connected.\n"); + return 0; + } + + /* Start the reconfiguration process by saving the old string */ + strncpy(kmessage_save, config, sizeof(kmessage_save)); + + + /* Copy in the new param and strip out invalid characters so we + * can optionally specify the MAC. + */ + strncpy(config, kmessage, sizeof(config)); + msg_len--; + while (msg_len > 0 && + (config[msg_len] < ',' || config[msg_len] > 'f')) { + config[msg_len] = '\0'; + msg_len--; + } + + /* Check to see if we are unconfiguring the io module and that it + * was in a fully configured state, as this is the only time that + * netpoll_cleanup should get called + */ + if (configured == 2 && strcmp(config, NOT_CONFIGURED_STRING) == 0) { + printk(KERN_INFO "kgdboe: reverting to unconfigured state\n"); + cleanup_kgdboe(); + return 0; + } else + /* Go and configure with the new params. */ + configure_kgdboe(); + + if (configured == 2) + return 0; + + /* If the new string was invalid, revert to the previous state, which + * is at a minimum not_configured. */ + strncpy(config, kmessage_save, sizeof(config)); + if (strcmp(kmessage_save, NOT_CONFIGURED_STRING) != 0) { + printk(KERN_INFO "kgdboe: reverting to prior configuration\n"); + /* revert back to the original config */ + strncpy(config, kmessage_save, sizeof(config)); + configure_kgdboe(); + } + return 0; +} + +static struct kgdb_io local_kgdb_io_ops = { + .read_char = eth_get_char, + .write_char = eth_put_char, + .init = init_kgdboe, + .flush = eth_flush_buf, + .pre_exception = eth_pre_exception_handler, + .post_exception = eth_post_exception_handler +}; + +module_init(init_kgdboe); +module_exit(cleanup_kgdboe); +module_param_call(kgdboe, param_set_kgdboe_var, param_get_string, &kps, 0644); +MODULE_PARM_DESC(kgdboe, " kgdboe=[src-port]@[src-ip]/[dev]," + "[tgt-port]@/\n"); diff -Nurb linux-2.6.22-570/drivers/net/lance.c linux-2.6.22-591/drivers/net/lance.c --- linux-2.6.22-570/drivers/net/lance.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/lance.c 2007-12-21 15:36:12.000000000 -0500 @@ -1186,9 +1186,9 @@ } skb_reserve(skb,2); /* 16 byte align */ skb_put(skb,pkt_len); /* Make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)isa_bus_to_virt((lp->rx_ring[entry].base & 0x00ffffff)), - pkt_len,0); + pkt_len); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/loopback.c linux-2.6.22-591/drivers/net/loopback.c --- linux-2.6.22-570/drivers/net/loopback.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/loopback.c 2007-12-21 15:36:14.000000000 -0500 @@ -57,6 +57,7 @@ #include #include #include +#include struct pcpu_lstats { unsigned long packets; @@ -199,39 +200,52 @@ .get_rx_csum = always_on, }; +static int loopback_net_init(struct net *net) +{ + struct net_device *lo = &net->loopback_dev; /* * The loopback device is special. There is only one instance and * it is statically allocated. Don't do this for other devices. */ -struct net_device loopback_dev = { - .name = "lo", - .get_stats = &get_stats, - .mtu = (16 * 1024) + 20 + 20 + 12, - .hard_start_xmit = loopback_xmit, - .hard_header = eth_header, - .hard_header_cache = eth_header_cache, - .header_cache_update = eth_header_cache_update, - .hard_header_len = ETH_HLEN, /* 14 */ - .addr_len = ETH_ALEN, /* 6 */ - .tx_queue_len = 0, - .type = ARPHRD_LOOPBACK, /* 0x0001*/ - .rebuild_header = eth_rebuild_header, - .flags = IFF_LOOPBACK, - .features = NETIF_F_SG | NETIF_F_FRAGLIST + strcpy(lo->name, "lo"); + lo->get_stats = &get_stats, + lo->mtu = (16 * 1024) + 20 + 20 + 12, + lo->hard_start_xmit = loopback_xmit, + lo->hard_header = eth_header, + lo->hard_header_cache = eth_header_cache, + lo->header_cache_update = eth_header_cache_update, + lo->hard_header_len = ETH_HLEN, /* 14 */ + lo->addr_len = ETH_ALEN, /* 6 */ + lo->tx_queue_len = 0, + lo->type = ARPHRD_LOOPBACK, /* 0x0001*/ + lo->rebuild_header = eth_rebuild_header, + lo->flags = IFF_LOOPBACK, + lo->features = NETIF_F_SG | NETIF_F_FRAGLIST #ifdef LOOPBACK_TSO | NETIF_F_TSO #endif | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA - | NETIF_F_LLTX, - .ethtool_ops = &loopback_ethtool_ops, + | NETIF_F_LLTX + | NETIF_F_NETNS_LOCAL, + lo->ethtool_ops = &loopback_ethtool_ops, + lo->nd_net = net; + return register_netdev(lo); +} + +static void loopback_net_exit(struct net *net) +{ + unregister_netdev(&net->loopback_dev); +} + +static struct pernet_operations loopback_net_ops = { + .init = loopback_net_init, + .exit = loopback_net_exit, }; /* Setup and register the loopback device. */ static int __init loopback_init(void) { - return register_netdev(&loopback_dev); + return register_pernet_device(&loopback_net_ops); }; module_init(loopback_init); - -EXPORT_SYMBOL(loopback_dev); diff -Nurb linux-2.6.22-570/drivers/net/natsemi.c linux-2.6.22-591/drivers/net/natsemi.c --- linux-2.6.22-570/drivers/net/natsemi.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/net/natsemi.c 2007-12-21 15:36:12.000000000 -0500 @@ -2357,8 +2357,8 @@ np->rx_dma[entry], buflen, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, - np->rx_skbuff[entry]->data, pkt_len, 0); + skb_copy_to_linear_data(skb, + np->rx_skbuff[entry]->data, pkt_len); skb_put(skb, pkt_len); pci_dma_sync_single_for_device(np->pci_dev, np->rx_dma[entry], diff -Nurb linux-2.6.22-570/drivers/net/ni52.c linux-2.6.22-591/drivers/net/ni52.c --- linux-2.6.22-570/drivers/net/ni52.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/ni52.c 2007-12-21 15:36:12.000000000 -0500 @@ -936,7 +936,7 @@ { skb_reserve(skb,2); skb_put(skb,totlen); - eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0); + skb_copy_to_linear_data(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/ni65.c linux-2.6.22-591/drivers/net/ni65.c --- linux-2.6.22-570/drivers/net/ni65.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/ni65.c 2007-12-21 15:36:12.000000000 -0500 @@ -1096,7 +1096,7 @@ #ifdef RCV_VIA_SKB if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) { skb_put(skb,len); - eth_copy_and_sum(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len,0); + skb_copy_to_linear_data(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len); } else { struct sk_buff *skb1 = p->recv_skb[p->rmdnum]; @@ -1108,7 +1108,7 @@ } #else skb_put(skb,len); - eth_copy_and_sum(skb, (unsigned char *) p->recvbounce[p->rmdnum],len,0); + skb_copy_to_linear_data(skb, (unsigned char *) p->recvbounce[p->rmdnum],len); #endif p->stats.rx_packets++; p->stats.rx_bytes += len; diff -Nurb linux-2.6.22-570/drivers/net/pci-skeleton.c linux-2.6.22-591/drivers/net/pci-skeleton.c --- linux-2.6.22-570/drivers/net/pci-skeleton.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/pci-skeleton.c 2007-12-21 15:36:12.000000000 -0500 @@ -1567,7 +1567,7 @@ if (skb) { skb_reserve (skb, 2); /* 16 byte align the IP fields. */ - eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); + skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size); skb_put (skb, pkt_size); skb->protocol = eth_type_trans (skb, dev); diff -Nurb linux-2.6.22-570/drivers/net/pcnet32.c linux-2.6.22-591/drivers/net/pcnet32.c --- linux-2.6.22-570/drivers/net/pcnet32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/pcnet32.c 2007-12-21 15:36:12.000000000 -0500 @@ -1235,9 +1235,9 @@ lp->rx_dma_addr[entry], pkt_len, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)(lp->rx_skbuff[entry]->data), - pkt_len, 0); + pkt_len); pci_dma_sync_single_for_device(lp->pci_dev, lp->rx_dma_addr[entry], pkt_len, diff -Nurb linux-2.6.22-570/drivers/net/pppoe.c linux-2.6.22-591/drivers/net/pppoe.c --- linux-2.6.22-570/drivers/net/pppoe.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/pppoe.c 2007-12-21 15:36:14.000000000 -0500 @@ -78,6 +78,7 @@ #include #include +#include #include #include @@ -210,7 +211,7 @@ struct net_device *dev; int ifindex; - dev = dev_get_by_name(sp->sa_addr.pppoe.dev); + dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev); if(!dev) return NULL; ifindex = dev->ifindex; @@ -295,6 +296,9 @@ { struct net_device *dev = (struct net_device *) ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEMTU: @@ -380,6 +384,9 @@ struct pppoe_hdr *ph; struct pppox_sock *po; + if (dev->nd_net != &init_net) + goto drop; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; @@ -412,6 +419,9 @@ struct pppoe_hdr *ph; struct pppox_sock *po; + if (dev->nd_net != &init_net) + goto abort; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; @@ -471,12 +481,12 @@ * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct socket *sock) +static int pppoe_create(struct net *net, struct socket *sock) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); if (!sk) goto out; @@ -588,7 +598,7 @@ /* Don't re-bind if sid==0 */ if (sp->sa_addr.pppoe.sid != 0) { - dev = dev_get_by_name(sp->sa_addr.pppoe.dev); + dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev); error = -ENODEV; if (!dev) @@ -1064,7 +1074,7 @@ { struct proc_dir_entry *p; - p = create_proc_entry("net/pppoe", S_IRUGO, NULL); + p = create_proc_entry("pppoe", S_IRUGO, init_net.proc_net); if (!p) return -ENOMEM; @@ -1135,7 +1145,7 @@ dev_remove_pack(&pppoes_ptype); dev_remove_pack(&pppoed_ptype); unregister_netdevice_notifier(&pppoe_notifier); - remove_proc_entry("net/pppoe", NULL); + remove_proc_entry("pppoe", init_net.proc_net); proto_unregister(&pppoe_sk_proto); } diff -Nurb linux-2.6.22-570/drivers/net/pppox.c linux-2.6.22-591/drivers/net/pppox.c --- linux-2.6.22-570/drivers/net/pppox.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/pppox.c 2007-12-21 15:36:14.000000000 -0500 @@ -107,10 +107,13 @@ EXPORT_SYMBOL(pppox_ioctl); -static int pppox_create(struct socket *sock, int protocol) +static int pppox_create(struct net *net, struct socket *sock, int protocol) { int rc = -EPROTOTYPE; + if (net != &init_net) + return -EAFNOSUPPORT; + if (protocol < 0 || protocol > PX_MAX_PROTO) goto out; @@ -126,7 +129,7 @@ !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(sock); + rc = pppox_protos[protocol]->create(net, sock); module_put(pppox_protos[protocol]->owner); out: diff -Nurb linux-2.6.22-570/drivers/net/r8169.c linux-2.6.22-591/drivers/net/r8169.c --- linux-2.6.22-570/drivers/net/r8169.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/net/r8169.c 2007-12-21 15:36:12.000000000 -0500 @@ -2492,7 +2492,7 @@ skb = dev_alloc_skb(pkt_size + align); if (skb) { skb_reserve(skb, (align - 1) & (unsigned long)skb->data); - eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0); + skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); *sk_buff = skb; rtl8169_mark_to_asic(desc, rx_buf_sz); ret = 0; diff -Nurb linux-2.6.22-570/drivers/net/saa9730.c linux-2.6.22-591/drivers/net/saa9730.c --- linux-2.6.22-570/drivers/net/saa9730.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/saa9730.c 2007-12-21 15:36:12.000000000 -0500 @@ -690,9 +690,9 @@ lp->stats.rx_packets++; skb_reserve(skb, 2); /* 16 byte align */ skb_put(skb, len); /* make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *) pData, - len, 0); + len); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/sgiseeq.c linux-2.6.22-591/drivers/net/sgiseeq.c --- linux-2.6.22-570/drivers/net/sgiseeq.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sgiseeq.c 2007-12-21 15:36:12.000000000 -0500 @@ -320,7 +320,7 @@ skb_put(skb, len); /* Copy out of kseg1 to avoid silly cache flush. */ - eth_copy_and_sum(skb, pkt_pointer + 2, len, 0); + skb_copy_to_linear_data(skb, pkt_pointer + 2, len); skb->protocol = eth_type_trans(skb, dev); /* We don't want to receive our own packets */ diff -Nurb linux-2.6.22-570/drivers/net/shaper.c linux-2.6.22-591/drivers/net/shaper.c --- linux-2.6.22-570/drivers/net/shaper.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/shaper.c 2007-12-21 15:36:14.000000000 -0500 @@ -86,6 +86,7 @@ #include #include +#include struct shaper_cb { unsigned long shapeclock; /* Time it should go out */ @@ -488,7 +489,7 @@ { case SHAPER_SET_DEV: { - struct net_device *them=__dev_get_by_name(ss->ss_name); + struct net_device *them=__dev_get_by_name(&init_net, ss->ss_name); if(them==NULL) return -ENODEV; if(sh->dev) diff -Nurb linux-2.6.22-570/drivers/net/sis190.c linux-2.6.22-591/drivers/net/sis190.c --- linux-2.6.22-570/drivers/net/sis190.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sis190.c 2007-12-21 15:36:12.000000000 -0500 @@ -548,7 +548,7 @@ skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN); if (skb) { skb_reserve(skb, NET_IP_ALIGN); - eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0); + skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); *sk_buff = skb; sis190_give_to_asic(desc, rx_buf_sz); ret = 0; diff -Nurb linux-2.6.22-570/drivers/net/starfire.c linux-2.6.22-591/drivers/net/starfire.c --- linux-2.6.22-570/drivers/net/starfire.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/starfire.c 2007-12-21 15:36:12.000000000 -0500 @@ -1456,7 +1456,7 @@ pci_dma_sync_single_for_cpu(np->pci_dev, np->rx_info[entry].mapping, pkt_len, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, np->rx_info[entry].skb->data, pkt_len, 0); + skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len); pci_dma_sync_single_for_device(np->pci_dev, np->rx_info[entry].mapping, pkt_len, PCI_DMA_FROMDEVICE); diff -Nurb linux-2.6.22-570/drivers/net/sun3_82586.c linux-2.6.22-591/drivers/net/sun3_82586.c --- linux-2.6.22-570/drivers/net/sun3_82586.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sun3_82586.c 2007-12-21 15:36:12.000000000 -0500 @@ -777,7 +777,7 @@ { skb_reserve(skb,2); skb_put(skb,totlen); - eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0); + skb_copy_to_linear_data(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); p->stats.rx_packets++; diff -Nurb linux-2.6.22-570/drivers/net/sun3lance.c linux-2.6.22-591/drivers/net/sun3lance.c --- linux-2.6.22-570/drivers/net/sun3lance.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sun3lance.c 2007-12-21 15:36:12.000000000 -0500 @@ -853,10 +853,9 @@ skb_reserve( skb, 2 ); /* 16 byte align */ skb_put( skb, pkt_len ); /* Make room */ -// skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len); - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), - pkt_len, 0); + pkt_len); skb->protocol = eth_type_trans( skb, dev ); netif_rx( skb ); diff -Nurb linux-2.6.22-570/drivers/net/sunbmac.c linux-2.6.22-591/drivers/net/sunbmac.c --- linux-2.6.22-570/drivers/net/sunbmac.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sunbmac.c 2007-12-21 15:36:12.000000000 -0500 @@ -860,7 +860,7 @@ sbus_dma_sync_single_for_cpu(bp->bigmac_sdev, this->rx_addr, len, SBUS_DMA_FROMDEVICE); - eth_copy_and_sum(copy_skb, (unsigned char *)skb->data, len, 0); + skb_copy_to_linear_data(copy_skb, (unsigned char *)skb->data, len); sbus_dma_sync_single_for_device(bp->bigmac_sdev, this->rx_addr, len, SBUS_DMA_FROMDEVICE); diff -Nurb linux-2.6.22-570/drivers/net/sundance.c linux-2.6.22-591/drivers/net/sundance.c --- linux-2.6.22-570/drivers/net/sundance.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sundance.c 2007-12-21 15:36:12.000000000 -0500 @@ -1313,7 +1313,7 @@ np->rx_buf_sz, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0); + skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); pci_dma_sync_single_for_device(np->pci_dev, desc->frag[0].addr, np->rx_buf_sz, diff -Nurb linux-2.6.22-570/drivers/net/sunlance.c linux-2.6.22-591/drivers/net/sunlance.c --- linux-2.6.22-570/drivers/net/sunlance.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sunlance.c 2007-12-21 15:36:12.000000000 -0500 @@ -549,9 +549,9 @@ skb_reserve(skb, 2); /* 16 byte align */ skb_put(skb, len); /* make room */ - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, (unsigned char *)&(ib->rx_buf [entry][0]), - len, 0); + len); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/sunqe.c linux-2.6.22-591/drivers/net/sunqe.c --- linux-2.6.22-570/drivers/net/sunqe.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/sunqe.c 2007-12-21 15:36:12.000000000 -0500 @@ -439,8 +439,8 @@ } else { skb_reserve(skb, 2); skb_put(skb, len); - eth_copy_and_sum(skb, (unsigned char *) this_qbuf, - len, 0); + skb_copy_to_linear_data(skb, (unsigned char *) this_qbuf, + len); skb->protocol = eth_type_trans(skb, qep->dev); netif_rx(skb); qep->dev->last_rx = jiffies; diff -Nurb linux-2.6.22-570/drivers/net/tg3.c linux-2.6.22-591/drivers/net/tg3.c --- linux-2.6.22-570/drivers/net/tg3.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tg3.c 2007-12-21 15:36:12.000000000 -0500 @@ -11944,12 +11944,11 @@ * checksumming. */ if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) { + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) - dev->features |= NETIF_F_HW_CSUM; - else - dev->features |= NETIF_F_IP_CSUM; - dev->features |= NETIF_F_SG; + dev->features |= NETIF_F_IPV6_CSUM; + tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS; } else tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS; diff -Nurb linux-2.6.22-570/drivers/net/tokenring/lanstreamer.c linux-2.6.22-591/drivers/net/tokenring/lanstreamer.c --- linux-2.6.22-570/drivers/net/tokenring/lanstreamer.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tokenring/lanstreamer.c 2007-12-21 15:36:14.000000000 -0500 @@ -250,7 +250,7 @@ #if STREAMER_NETWORK_MONITOR #ifdef CONFIG_PROC_FS if (!dev_streamer) - create_proc_read_entry("net/streamer_tr", 0, 0, + create_proc_read_entry("streamer_tr", 0, init_net.proc_net, streamer_proc_info, NULL); streamer_priv->next = dev_streamer; dev_streamer = streamer_priv; @@ -423,7 +423,7 @@ } } if (!dev_streamer) - remove_proc_entry("net/streamer_tr", NULL); + remove_proc_entry("streamer_tr", init_net.proc_net); } #endif #endif diff -Nurb linux-2.6.22-570/drivers/net/tokenring/olympic.c linux-2.6.22-591/drivers/net/tokenring/olympic.c --- linux-2.6.22-570/drivers/net/tokenring/olympic.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tokenring/olympic.c 2007-12-21 15:36:14.000000000 -0500 @@ -101,6 +101,7 @@ #include #include +#include #include #include @@ -268,9 +269,9 @@ printk("Olympic: %s registered as: %s\n",olympic_priv->olympic_card_name,dev->name); if (olympic_priv->olympic_network_monitor) { /* Must go after register_netdev as we need the device name */ char proc_name[20] ; - strcpy(proc_name,"net/olympic_") ; + strcpy(proc_name,"olympic_") ; strcat(proc_name,dev->name) ; - create_proc_read_entry(proc_name,0,NULL,olympic_proc_info,(void *)dev) ; + create_proc_read_entry(proc_name,0,init_net.proc_net,olympic_proc_info,(void *)dev) ; printk("Olympic: Network Monitor information: /proc/%s\n",proc_name); } return 0 ; @@ -1752,9 +1753,9 @@ if (olympic_priv->olympic_network_monitor) { char proc_name[20] ; - strcpy(proc_name,"net/olympic_") ; + strcpy(proc_name,"olympic_") ; strcat(proc_name,dev->name) ; - remove_proc_entry(proc_name,NULL); + remove_proc_entry(proc_name,init_net.proc_net); } unregister_netdev(dev) ; iounmap(olympic_priv->olympic_mmio) ; diff -Nurb linux-2.6.22-570/drivers/net/tulip/interrupt.c linux-2.6.22-591/drivers/net/tulip/interrupt.c --- linux-2.6.22-570/drivers/net/tulip/interrupt.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tulip/interrupt.c 2007-12-21 15:36:12.000000000 -0500 @@ -197,8 +197,8 @@ tp->rx_buffers[entry].mapping, pkt_len, PCI_DMA_FROMDEVICE); #if ! defined(__alpha__) - eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data, - pkt_len, 0); + skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data, + pkt_len); skb_put(skb, pkt_len); #else memcpy(skb_put(skb, pkt_len), @@ -420,8 +420,8 @@ tp->rx_buffers[entry].mapping, pkt_len, PCI_DMA_FROMDEVICE); #if ! defined(__alpha__) - eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data, - pkt_len, 0); + skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data, + pkt_len); skb_put(skb, pkt_len); #else memcpy(skb_put(skb, pkt_len), diff -Nurb linux-2.6.22-570/drivers/net/tulip/winbond-840.c linux-2.6.22-591/drivers/net/tulip/winbond-840.c --- linux-2.6.22-570/drivers/net/tulip/winbond-840.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tulip/winbond-840.c 2007-12-21 15:36:12.000000000 -0500 @@ -1232,7 +1232,7 @@ pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry], np->rx_skbuff[entry]->len, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0); + skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); skb_put(skb, pkt_len); pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry], np->rx_skbuff[entry]->len, diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_cb.c linux-2.6.22-591/drivers/net/tulip/xircom_cb.c --- linux-2.6.22-570/drivers/net/tulip/xircom_cb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tulip/xircom_cb.c 2007-12-21 15:36:12.000000000 -0500 @@ -1208,7 +1208,7 @@ goto out; } skb_reserve(skb, 2); - eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0); + skb_copy_to_linear_data(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c linux-2.6.22-591/drivers/net/tulip/xircom_tulip_cb.c --- linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tulip/xircom_tulip_cb.c 2007-12-21 15:36:12.000000000 -0500 @@ -1242,8 +1242,8 @@ && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { skb_reserve(skb, 2); /* 16 byte align the IP header */ #if ! defined(__alpha__) - eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1), - pkt_len, 0); + skb_copy_to_linear_data(skb, bus_to_virt(tp->rx_ring[entry].buffer1), + pkt_len); skb_put(skb, pkt_len); #else memcpy(skb_put(skb, pkt_len), diff -Nurb linux-2.6.22-570/drivers/net/tun.c linux-2.6.22-591/drivers/net/tun.c --- linux-2.6.22-570/drivers/net/tun.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/tun.c 2007-12-21 15:36:14.000000000 -0500 @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -432,6 +433,7 @@ init_waitqueue_head(&tun->read_wait); tun->owner = -1; + tun->group = -1; SET_MODULE_OWNER(dev); dev->open = tun_net_open; @@ -467,11 +469,14 @@ return -EBUSY; /* Check permissions */ - if (tun->owner != -1 && - current->euid != tun->owner && !capable(CAP_NET_ADMIN)) + if (((tun->owner != -1 && + current->euid != tun->owner) || + (tun->group != -1 && + current->egid != tun->group)) && + !capable(CAP_NET_ADMIN)) return -EPERM; } - else if (__dev_get_by_name(ifr->ifr_name)) + else if (__dev_get_by_name(&init_net, ifr->ifr_name)) return -EINVAL; else { char *name; @@ -610,6 +615,13 @@ DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner); break; + case TUNSETGROUP: + /* Set group of the device */ + tun->group= (gid_t) arg; + + DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group); + break; + case TUNSETLINK: /* Only allow setting the type when the interface is down */ if (tun->dev->flags & IFF_UP) { diff -Nurb linux-2.6.22-570/drivers/net/typhoon.c linux-2.6.22-591/drivers/net/typhoon.c --- linux-2.6.22-570/drivers/net/typhoon.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/typhoon.c 2007-12-21 15:36:12.000000000 -0500 @@ -1703,7 +1703,7 @@ pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(new_skb, skb->data, pkt_len, 0); + skb_copy_to_linear_data(new_skb, skb->data, pkt_len); pci_dma_sync_single_for_device(tp->pdev, dma_addr, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); diff -Nurb linux-2.6.22-570/drivers/net/usb/catc.c linux-2.6.22-591/drivers/net/usb/catc.c --- linux-2.6.22-570/drivers/net/usb/catc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/usb/catc.c 2007-12-21 15:36:12.000000000 -0500 @@ -255,7 +255,7 @@ if (!(skb = dev_alloc_skb(pkt_len))) return; - eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0); + skb_copy_to_linear_data(skb, pkt_start + pkt_offset, pkt_len); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, catc->netdev); diff -Nurb linux-2.6.22-570/drivers/net/usb/kaweth.c linux-2.6.22-591/drivers/net/usb/kaweth.c --- linux-2.6.22-570/drivers/net/usb/kaweth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/usb/kaweth.c 2007-12-21 15:36:12.000000000 -0500 @@ -635,7 +635,7 @@ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0); + skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len); skb_put(skb, pkt_len); diff -Nurb linux-2.6.22-570/drivers/net/via-rhine.c linux-2.6.22-591/drivers/net/via-rhine.c --- linux-2.6.22-570/drivers/net/via-rhine.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/via-rhine.c 2007-12-21 15:36:12.000000000 -0500 @@ -1492,9 +1492,9 @@ rp->rx_buf_sz, PCI_DMA_FROMDEVICE); - eth_copy_and_sum(skb, + skb_copy_to_linear_data(skb, rp->rx_skbuff[entry]->data, - pkt_len, 0); + pkt_len); skb_put(skb, pkt_len); pci_dma_sync_single_for_device(rp->pdev, rp->rx_skbuff_dma[entry], diff -Nurb linux-2.6.22-570/drivers/net/wan/dlci.c linux-2.6.22-591/drivers/net/wan/dlci.c --- linux-2.6.22-570/drivers/net/wan/dlci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wan/dlci.c 2007-12-21 15:36:14.000000000 -0500 @@ -361,7 +361,7 @@ /* validate slave device */ - slave = dev_get_by_name(dlci->devname); + slave = dev_get_by_name(&init_net, dlci->devname); if (!slave) return -ENODEV; @@ -427,7 +427,7 @@ int err; /* validate slave device */ - master = __dev_get_by_name(dlci->devname); + master = __dev_get_by_name(&init_net, dlci->devname); if (!master) return(-ENODEV); @@ -513,6 +513,9 @@ { struct net_device *dev = (struct net_device *) ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { struct dlci_local *dlp; diff -Nurb linux-2.6.22-570/drivers/net/wan/hdlc.c linux-2.6.22-591/drivers/net/wan/hdlc.c --- linux-2.6.22-570/drivers/net/wan/hdlc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wan/hdlc.c 2007-12-21 15:36:14.000000000 -0500 @@ -36,6 +36,7 @@ #include #include #include +#include static const char* version = "HDLC support module revision 1.21"; @@ -66,6 +67,12 @@ struct packet_type *p, struct net_device *orig_dev) { struct hdlc_device_desc *desc = dev_to_desc(dev); + + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (desc->netif_rx) return desc->netif_rx(skb); @@ -102,6 +109,9 @@ unsigned long flags; int on; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->get_stats != hdlc_get_stats) return NOTIFY_DONE; /* not an HDLC device */ diff -Nurb linux-2.6.22-570/drivers/net/wan/lapbether.c linux-2.6.22-591/drivers/net/wan/lapbether.c --- linux-2.6.22-570/drivers/net/wan/lapbether.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wan/lapbether.c 2007-12-21 15:36:14.000000000 -0500 @@ -91,6 +91,9 @@ int len, err; struct lapbethdev *lapbeth; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -391,6 +394,9 @@ struct lapbethdev *lapbeth; struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!dev_is_ethdev(dev)) return NOTIFY_DONE; diff -Nurb linux-2.6.22-570/drivers/net/wan/sbni.c linux-2.6.22-591/drivers/net/wan/sbni.c --- linux-2.6.22-570/drivers/net/wan/sbni.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wan/sbni.c 2007-12-21 15:36:14.000000000 -0500 @@ -54,6 +54,7 @@ #include #include +#include #include #include @@ -1362,7 +1363,7 @@ if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name )) return -EFAULT; - slave_dev = dev_get_by_name( slave_name ); + slave_dev = dev_get_by_name(&init_net, slave_name ); if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { printk( KERN_ERR "%s: trying to enslave non-active " "device %s\n", dev->name, slave_name ); diff -Nurb linux-2.6.22-570/drivers/net/wan/syncppp.c linux-2.6.22-591/drivers/net/wan/syncppp.c --- linux-2.6.22-570/drivers/net/wan/syncppp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wan/syncppp.c 2007-12-21 15:36:14.000000000 -0500 @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -1445,6 +1446,11 @@ static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; sppp_input(dev,skb); diff -Nurb linux-2.6.22-570/drivers/net/wireless/airo.c linux-2.6.22-591/drivers/net/wireless/airo.c --- linux-2.6.22-570/drivers/net/wireless/airo.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wireless/airo.c 2007-12-21 15:36:12.000000000 -0500 @@ -3079,6 +3079,7 @@ struct airo_info *ai = dev->priv; int locked; + set_freezable(); while(1) { /* make swsusp happy with our thread */ try_to_freeze(); diff -Nurb linux-2.6.22-570/drivers/net/wireless/hostap/hostap_main.c linux-2.6.22-591/drivers/net/wireless/hostap/hostap_main.c --- linux-2.6.22-570/drivers/net/wireless/hostap/hostap_main.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wireless/hostap/hostap_main.c 2007-12-21 15:36:14.000000000 -0500 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1094,8 +1095,8 @@ static int __init hostap_init(void) { - if (proc_net != NULL) { - hostap_proc = proc_mkdir("hostap", proc_net); + if (init_net.proc_net != NULL) { + hostap_proc = proc_mkdir("hostap", init_net.proc_net); if (!hostap_proc) printk(KERN_WARNING "Failed to mkdir " "/proc/net/hostap\n"); @@ -1110,7 +1111,7 @@ { if (hostap_proc != NULL) { hostap_proc = NULL; - remove_proc_entry("hostap", proc_net); + remove_proc_entry("hostap", init_net.proc_net); } } diff -Nurb linux-2.6.22-570/drivers/net/wireless/libertas/main.c linux-2.6.22-591/drivers/net/wireless/libertas/main.c --- linux-2.6.22-570/drivers/net/wireless/libertas/main.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wireless/libertas/main.c 2007-12-21 15:36:12.000000000 -0500 @@ -613,6 +613,7 @@ init_waitqueue_entry(&wait, current); + set_freezable(); for (;;) { lbs_deb_thread( "main-thread 111: intcounter=%d " "currenttxskb=%p dnld_sent=%d\n", diff -Nurb linux-2.6.22-570/drivers/net/wireless/strip.c linux-2.6.22-591/drivers/net/wireless/strip.c --- linux-2.6.22-570/drivers/net/wireless/strip.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wireless/strip.c 2007-12-21 15:36:14.000000000 -0500 @@ -107,6 +107,7 @@ #include #include #include +#include #include #include @@ -1971,7 +1972,7 @@ sizeof(zero_address))) { struct net_device *dev; read_lock_bh(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->type == strip_info->dev->type && !memcmp(dev->dev_addr, &strip_info->true_dev_addr, @@ -2787,7 +2788,7 @@ /* * Register the status file with /proc */ - proc_net_fops_create("strip", S_IFREG | S_IRUGO, &strip_seq_fops); + proc_net_fops_create(&init_net, "strip", S_IFREG | S_IRUGO, &strip_seq_fops); return status; } @@ -2809,7 +2810,7 @@ } /* Unregister with the /proc/net file here. */ - proc_net_remove("strip"); + proc_net_remove(&init_net, "strip"); if ((i = tty_unregister_ldisc(N_STRIP))) printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i); diff -Nurb linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c linux-2.6.22-591/drivers/net/wireless/wl3501_cs.c --- linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/wireless/wl3501_cs.c 2007-12-21 15:36:12.000000000 -0500 @@ -1011,7 +1011,7 @@ } else { skb->dev = dev; skb_reserve(skb, 2); /* IP headers on 16 bytes boundaries */ - eth_copy_and_sum(skb, (unsigned char *)&sig.daddr, 12, 0); + skb_copy_to_linear_data(skb, (unsigned char *)&sig.daddr, 12); wl3501_receive(this, skb->data, pkt_len); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, dev); diff -Nurb linux-2.6.22-570/drivers/net/xen-netfront.c linux-2.6.22-591/drivers/net/xen-netfront.c --- linux-2.6.22-570/drivers/net/xen-netfront.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/net/xen-netfront.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,1995 @@ +/* + * Virtual network driver for conversing with remote driver backends. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +static struct ethtool_ops xennet_ethtool_ops; + +struct netfront_cb { + struct page *page; + unsigned offset; +}; + +#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) + +/* + * Mutually-exclusive module options to select receive data path: + * copy : Packets are copied by network backend into local memory + * flip : Page containing packet data is transferred to our ownership + * For fully-virtualised guests there is no option - copying must be used. + * For paravirtualised guests, flipping is the default. + */ +typedef enum rx_mode { + RX_COPY = 0, + RX_FLIP = 1, +} rx_mode_t; + +static enum rx_mode rx_mode = RX_FLIP; + +#define param_check_rx_mode_t(name, p) __param_check(name, p, rx_mode_t) + +static int param_set_rx_mode_t(const char *val, struct kernel_param *kp) +{ + enum rx_mode *rxmp = kp->arg; + int ret = 0; + + if (strcmp(val, "copy") == 0) + *rxmp = RX_COPY; + else if (strcmp(val, "flip") == 0) + *rxmp = RX_FLIP; + else + ret = -EINVAL; + + return ret; +} + +static int param_get_rx_mode_t(char *buffer, struct kernel_param *kp) +{ + enum rx_mode *rxmp = kp->arg; + + return sprintf(buffer, "%s", *rxmp == RX_COPY ? "copy" : "flip"); +} + +MODULE_PARM_DESC(rx_mode, "How to get packets from card: \"copy\" or \"flip\""); +module_param(rx_mode, rx_mode_t, 0400); + +#define RX_COPY_THRESHOLD 256 + +#define GRANT_INVALID_REF 0 + +#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + +struct netfront_info { + struct list_head list; + struct net_device *netdev; + + struct net_device_stats stats; + + struct xen_netif_tx_front_ring tx; + struct xen_netif_rx_front_ring rx; + + spinlock_t tx_lock; + spinlock_t rx_lock; + + unsigned int evtchn; + unsigned int copying_receiver; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 8 +#define RX_DFL_MIN_TARGET 64 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + unsigned rx_min_target, rx_max_target, rx_target; + struct sk_buff_head rx_batch; + + struct timer_list rx_refill_timer; + + /* + * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries + * are linked from tx_skb_freelist through skb_entry.link. + * + * NB. Freelist index entries are always going to be less than + * PAGE_OFFSET, whereas pointers to skbs will always be equal or + * greater than PAGE_OFFSET: we use this property to distinguish + * them. + */ + union skb_entry { + struct sk_buff *skb; + unsigned link; + } tx_skbs[NET_TX_RING_SIZE];; + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; + unsigned tx_skb_freelist; + + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; + grant_ref_t gref_rx_head; + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + + struct xenbus_device *xbdev; + int tx_ring_ref; + int rx_ring_ref; + + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; +}; + +struct netfront_rx_info { + struct xen_netif_rx_response rx; + struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; +}; + +/* + * Access macros for acquiring freeing slots in tx_skbs[]. + */ + +static void add_id_to_freelist(unsigned *head, union skb_entry *list, unsigned short id) +{ + list[id].link = *head; + *head = id; +} + +static unsigned short get_id_from_freelist(unsigned *head, union skb_entry *list) +{ + unsigned int id = *head; + *head = list[id].link; + return id; +} + +static int xennet_rxidx(RING_IDX idx) +{ + return idx & (NET_RX_RING_SIZE - 1); +} + +static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, + RING_IDX ri) +{ + int i = xennet_rxidx(ri); + struct sk_buff *skb = np->rx_skbs[i]; + np->rx_skbs[i] = NULL; + return skb; +} + +static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, + RING_IDX ri) +{ + int i = xennet_rxidx(ri); + grant_ref_t ref = np->grant_rx_ref[i]; + np->grant_rx_ref[i] = GRANT_INVALID_REF; + return ref; +} + +#ifdef CONFIG_SYSFS +static int xennet_sysfs_addif(struct net_device *netdev); +static void xennet_sysfs_delif(struct net_device *netdev); +#else /* !CONFIG_SYSFS */ +#define xennet_sysfs_addif(dev) (0) +#define xennet_sysfs_delif(dev) do { } while(0) +#endif + +static int xennet_can_sg(struct net_device *dev) +{ + return dev->features & NETIF_F_SG; +} + + +static void rx_refill_timeout(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + netif_rx_schedule(dev); +} + +static int netfront_tx_slot_available(struct netfront_info *np) +{ + return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < + (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); +} + +static void xennet_maybe_wake_tx(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + + if (unlikely(netif_queue_stopped(dev)) && + netfront_tx_slot_available(np) && + likely(netif_running(dev))) + netif_wake_queue(dev); +} + +static void xennet_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct netfront_info *np = netdev_priv(dev); + struct sk_buff *skb; + struct page *page; + int i, batch_target, notify; + RING_IDX req_prod = np->rx.req_prod_pvt; + struct xen_memory_reservation reservation; + grant_ref_t ref; + unsigned long pfn; + void *vaddr; + int nr_flips; + struct xen_netif_rx_request *req; + + if (unlikely(!netif_carrier_ok(dev))) + return; + + /* + * Allocate skbuffs greedily, even though we batch updates to the + * receive ring. This creates a less bursty demand on the memory + * allocator, so should reduce the chance of failed allocation requests + * both for ourself and for other kernel subsystems. + */ + batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); + for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { + skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + goto no_skb; + + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) { + kfree_skb(skb); +no_skb: + /* Any skbuffs queued for refill? Force them out. */ + if (i != 0) + goto refill; + /* Could not allocate any skbuffs. Try again later. */ + mod_timer(&np->rx_refill_timer, + jiffies + (HZ/10)); + break; + } + + skb_shinfo(skb)->frags[0].page = page; + skb_shinfo(skb)->nr_frags = 1; + __skb_queue_tail(&np->rx_batch, skb); + } + + /* Is the batch large enough to be worthwhile? */ + if (i < (np->rx_target/2)) { + if (req_prod > np->rx.sring->req_prod) + goto push; + return; + } + + /* Adjust our fill target if we risked running out of buffers. */ + if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && + ((np->rx_target *= 2) > np->rx_max_target)) + np->rx_target = np->rx_max_target; + + refill: + for (nr_flips = i = 0; ; i++) { + if ((skb = __skb_dequeue(&np->rx_batch)) == NULL) + break; + + skb->dev = dev; + + id = xennet_rxidx(req_prod + i); + + BUG_ON(np->rx_skbs[id]); + np->rx_skbs[id] = skb; + + ref = gnttab_claim_grant_reference(&np->gref_rx_head); + BUG_ON((signed short)ref < 0); + np->grant_rx_ref[id] = ref; + + pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); + vaddr = page_address(skb_shinfo(skb)->frags[0].page); + + req = RING_GET_REQUEST(&np->rx, req_prod + i); + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref(ref, + np->xbdev->otherend_id, + pfn); + np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remove this page before passing + * back to Xen. */ + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + MULTI_update_va_mapping(np->rx_mcl+i, + (unsigned long)vaddr, + __pte(0), 0); + } + nr_flips++; + } else { + gnttab_grant_foreign_access_ref(ref, + np->xbdev->otherend_id, + pfn_to_mfn(pfn), + 0); + } + + req->id = id; + req->gref = ref; + } + + if (nr_flips != 0) { + reservation.extent_start = np->rx_pfn_array; + reservation.nr_extents = nr_flips; + reservation.extent_order = 0; + reservation.address_bits = 0; + reservation.domid = DOMID_SELF; + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* After all PTEs have been zapped, flush the TLB. */ + np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = + UVMF_TLB_FLUSH|UVMF_ALL; + + /* Give away a batch of pages. */ + np->rx_mcl[i].op = __HYPERVISOR_memory_op; + np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; + np->rx_mcl[i].args[1] = (unsigned long)&reservation; + + /* Zap PTEs and give away pages in one big + * multicall. */ + (void)HYPERVISOR_multicall(np->rx_mcl, i+1); + + /* Check return status of HYPERVISOR_memory_op(). */ + if (unlikely(np->rx_mcl[i].result != i)) + panic("Unable to reduce memory reservation\n"); + } else { + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation) != i) + panic("Unable to reduce memory reservation\n"); + } + } else { + wmb(); + } + + /* Above is a suitable barrier to ensure backend will see requests. */ + np->rx.req_prod_pvt = req_prod + i; + push: + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); + if (notify) + notify_remote_via_irq(np->netdev->irq); +} + +static int xennet_open(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + + memset(&np->stats, 0, sizeof(np->stats)); + + spin_lock_bh(&np->rx_lock); + if (netif_carrier_ok(dev)) { + xennet_alloc_rx_buffers(dev); + np->rx.sring->rsp_event = np->rx.rsp_cons + 1; + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + netif_rx_schedule(dev); + } + spin_unlock_bh(&np->rx_lock); + + xennet_maybe_wake_tx(dev); + + return 0; +} + +static void xennet_tx_buf_gc(struct net_device *dev) +{ + RING_IDX cons, prod; + unsigned short id; + struct netfront_info *np = netdev_priv(dev); + struct sk_buff *skb; + + BUG_ON(!netif_carrier_ok(dev)); + + do { + prod = np->tx.sring->rsp_prod; + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (cons = np->tx.rsp_cons; cons != prod; cons++) { + struct xen_netif_tx_response *txrsp; + + txrsp = RING_GET_RESPONSE(&np->tx, cons); + if (txrsp->status == NETIF_RSP_NULL) + continue; + + id = txrsp->id; + skb = np->tx_skbs[id].skb; + if (unlikely(gnttab_query_foreign_access( + np->grant_tx_ref[id]) != 0)) { + printk(KERN_ALERT "xennet_tx_buf_gc: warning " + "-- grant still in use by backend " + "domain.\n"); + BUG(); + } + gnttab_end_foreign_access_ref( + np->grant_tx_ref[id], GNTMAP_readonly); + gnttab_release_grant_reference( + &np->gref_tx_head, np->grant_tx_ref[id]); + np->grant_tx_ref[id] = GRANT_INVALID_REF; + add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); + dev_kfree_skb_irq(skb); + } + + np->tx.rsp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. + * Note that it is essential to schedule a callback, no matter + * how few buffers are pending. Even if there is space in the + * transmit ring, higher layers may be blocked because too much + * data is outstanding: in such cases notification from Xen is + * likely to be the only kick that we'll get. + */ + np->tx.sring->rsp_event = + prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; + mb(); + } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); + + xennet_maybe_wake_tx(dev); +} + +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, + struct xen_netif_tx_request *tx) +{ + struct netfront_info *np = netdev_priv(dev); + char *data = skb->data; + unsigned long mfn; + RING_IDX prod = np->tx.req_prod_pvt; + int frags = skb_shinfo(skb)->nr_frags; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + unsigned int id; + grant_ref_t ref; + int i; + + /* While the header overlaps a page boundary (including being + larger than a page), split it it into page-sized chunks. */ + while (len > PAGE_SIZE - offset) { + tx->size = PAGE_SIZE - offset; + tx->flags |= NETTXF_more_data; + len -= tx->size; + data += tx->size; + offset = 0; + + id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); + np->tx_skbs[id].skb = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = virt_to_mfn(data); + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; + tx->flags = 0; + } + + /* Grant backend access to each skb fragment page. */ + for (i = 0; i < frags; i++) { + skb_frag_t *frag = skb_shinfo(skb)->frags + i; + + tx->flags |= NETTXF_more_data; + + id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); + np->tx_skbs[id].skb = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = pfn_to_mfn(page_to_pfn(frag->page)); + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = frag->page_offset; + tx->size = frag->size; + tx->flags = 0; + } + + np->tx.req_prod_pvt = prod; +} + +static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct netfront_info *np = netdev_priv(dev); + struct xen_netif_tx_request *tx; + struct xen_netif_extra_info *extra; + char *data = skb->data; + RING_IDX i; + grant_ref_t ref; + unsigned long mfn; + int notify; + int frags = skb_shinfo(skb)->nr_frags; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; + if (unlikely(frags > MAX_SKB_FRAGS + 1)) { + printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", + frags); + dump_stack(); + goto drop; + } + + spin_lock_irq(&np->tx_lock); + + if (unlikely(!netif_carrier_ok(dev) || + (frags > 1 && !xennet_can_sg(dev)) || + netif_needs_gso(dev, skb))) { + spin_unlock_irq(&np->tx_lock); + goto drop; + } + + i = np->tx.req_prod_pvt; + + id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); + np->tx_skbs[id].skb = skb; + + tx = RING_GET_REQUEST(&np->tx, i); + + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + mfn = virt_to_mfn(data); + gnttab_grant_foreign_access_ref( + ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; + extra = NULL; + + tx->flags = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL) + /* local packet? */ + tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; + else if (skb->ip_summed == CHECKSUM_UNNECESSARY) + /* remote but checksummed. */ + tx->flags |= NETTXF_data_validated; + + if (skb_shinfo(skb)->gso_size) { + struct xen_netif_extra_info *gso; + + gso = (struct xen_netif_extra_info *) + RING_GET_REQUEST(&np->tx, ++i); + + if (extra) + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; + else + tx->flags |= NETTXF_extra_info; + + gso->u.gso.size = skb_shinfo(skb)->gso_size; + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.pad = 0; + gso->u.gso.features = 0; + + gso->type = XEN_NETIF_EXTRA_TYPE_GSO; + gso->flags = 0; + extra = gso; + } + + np->tx.req_prod_pvt = i + 1; + + xennet_make_frags(skb, dev, tx); + tx->size = skb->len; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); + if (notify) + notify_remote_via_irq(np->netdev->irq); + + xennet_tx_buf_gc(dev); + + if (!netfront_tx_slot_available(np)) + netif_stop_queue(dev); + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + return 0; + + drop: + np->stats.tx_dropped++; + dev_kfree_skb(skb); + return 0; +} + +static int xennet_close(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + netif_stop_queue(np->netdev); + return 0; +} + +static struct net_device_stats *xennet_get_stats(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + return &np->stats; +} + +static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, + grant_ref_t ref) +{ + int new = xennet_rxidx(np->rx.req_prod_pvt); + + BUG_ON(np->rx_skbs[new]); + np->rx_skbs[new] = skb; + np->grant_rx_ref[new] = ref; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; + np->rx.req_prod_pvt++; +} + +static int xennet_get_extras(struct netfront_info *np, + struct xen_netif_extra_info *extras, + RING_IDX rp) + +{ + struct xen_netif_extra_info *extra; + struct device *dev = &np->netdev->dev; + RING_IDX cons = np->rx.rsp_cons; + int err = 0; + + do { + struct sk_buff *skb; + grant_ref_t ref; + + if (unlikely(cons + 1 == rp)) { + if (net_ratelimit()) + dev_warn(dev, "Missing extra info\n"); + err = -EBADR; + break; + } + + extra = (struct xen_netif_extra_info *) + RING_GET_RESPONSE(&np->rx, ++cons); + + if (unlikely(!extra->type || + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + if (net_ratelimit()) + dev_warn(dev, "Invalid extra type: %d\n", + extra->type); + err = -EINVAL; + } else { + memcpy(&extras[extra->type - 1], extra, + sizeof(*extra)); + } + + skb = xennet_get_rx_skb(np, cons); + ref = xennet_get_rx_ref(np, cons); + xennet_move_rx_slot(np, skb, ref); + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + + np->rx.rsp_cons = cons; + return err; +} + +static int xennet_get_responses(struct netfront_info *np, + struct netfront_rx_info *rinfo, RING_IDX rp, + struct sk_buff_head *list, + int *pages_flipped_p) +{ + int pages_flipped = *pages_flipped_p; + struct mmu_update *mmu; + struct multicall_entry *mcl; + struct xen_netif_rx_response *rx = &rinfo->rx; + struct xen_netif_extra_info *extras = rinfo->extras; + struct device *dev = &np->netdev->dev; + RING_IDX cons = np->rx.rsp_cons; + struct sk_buff *skb = xennet_get_rx_skb(np, cons); + grant_ref_t ref = xennet_get_rx_ref(np, cons); + int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); + int frags = 1; + int err = 0; + unsigned long ret; + + if (rx->flags & NETRXF_extra_info) { + err = xennet_get_extras(np, extras, rp); + cons = np->rx.rsp_cons; + } + + for (;;) { + unsigned long mfn; + + if (unlikely(rx->status < 0 || + rx->offset + rx->status > PAGE_SIZE)) { + if (net_ratelimit()) + dev_warn(dev, "rx->offset: %x, size: %u\n", + rx->offset, rx->status); + xennet_move_rx_slot(np, skb, ref); + err = -EINVAL; + goto next; + } + + /* + * This definitely indicates a bug, either in this driver or in + * the backend driver. In future this should flag the bad + * situation to the system controller to reboot the backed. + */ + if (ref == GRANT_INVALID_REF) { + if (net_ratelimit()) + dev_warn(dev, "Bad rx response id %d.\n", + rx->id); + err = -EINVAL; + goto next; + } + + if (!np->copying_receiver) { + /* Memory pressure, insufficient buffer + * headroom, ... */ + mfn = gnttab_end_foreign_transfer_ref(ref); + if (!mfn) { + if (net_ratelimit()) + dev_warn(dev, "Unfulfilled rx req " + "(id=%d, st=%d).\n", + rx->id, rx->status); + xennet_move_rx_slot(np, skb, ref); + err = -ENOMEM; + goto next; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + struct page *page = + skb_shinfo(skb)->frags[0].page; + unsigned long pfn = page_to_pfn(page); + void *vaddr = page_address(page); + + mcl = np->rx_mcl + pages_flipped; + mmu = np->rx_mmu + pages_flipped; + + MULTI_update_va_mapping(mcl, + (unsigned long)vaddr, + mfn_pte(mfn, PAGE_KERNEL), + 0); + mmu->ptr = ((u64)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + + set_phys_to_machine(pfn, mfn); + } + pages_flipped++; + } else { + ret = gnttab_end_foreign_access_ref(ref, 0); + BUG_ON(!ret); + } + + gnttab_release_grant_reference(&np->gref_rx_head, ref); + + __skb_queue_tail(list, skb); + +next: + if (!(rx->flags & NETRXF_more_data)) + break; + + if (cons + frags == rp) { + if (net_ratelimit()) + dev_warn(dev, "Need more frags\n"); + err = -ENOENT; + break; + } + + rx = RING_GET_RESPONSE(&np->rx, cons + frags); + skb = xennet_get_rx_skb(np, cons + frags); + ref = xennet_get_rx_ref(np, cons + frags); + frags++; + } + + if (unlikely(frags > max)) { + if (net_ratelimit()) + dev_warn(dev, "Too many frags\n"); + err = -E2BIG; + } + + if (unlikely(err)) + np->rx.rsp_cons = cons + frags; + + *pages_flipped_p = pages_flipped; + + return err; +} + +static int xennet_set_skb_gso(struct sk_buff *skb, + struct xen_netif_extra_info *gso) +{ + if (!gso->u.gso.size) { + if (net_ratelimit()) + printk(KERN_WARNING "GSO size must not be zero.\n"); + return -EINVAL; + } + + /* Currently only TCPv4 S.O. is supported. */ + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + if (net_ratelimit()) + printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type); + return -EINVAL; + } + + skb_shinfo(skb)->gso_size = gso->u.gso.size; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + + return 0; +} + +static RING_IDX xennet_fill_frags(struct netfront_info *np, + struct sk_buff *skb, + struct sk_buff_head *list) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + RING_IDX cons = np->rx.rsp_cons; + skb_frag_t *frag = shinfo->frags + nr_frags; + struct sk_buff *nskb; + + while ((nskb = __skb_dequeue(list))) { + struct xen_netif_rx_response *rx = + RING_GET_RESPONSE(&np->rx, ++cons); + + frag->page = skb_shinfo(nskb)->frags[0].page; + frag->page_offset = rx->offset; + frag->size = rx->status; + + skb->data_len += rx->status; + + skb_shinfo(nskb)->nr_frags = 0; + kfree_skb(nskb); + + frag++; + nr_frags++; + } + + shinfo->nr_frags = nr_frags; + return cons; +} + +static int skb_checksum_setup(struct sk_buff *skb) +{ + struct iphdr *iph; + unsigned char *th; + int err = -EPROTO; + + if (skb->protocol != htons(ETH_P_IP)) + goto out; + + iph = (void *)skb->data; + th = skb->data + 4 * iph->ihl; + if (th >= skb_tail_pointer(skb)) + goto out; + + skb->csum_start = th - skb->head; + switch (iph->protocol) { + case IPPROTO_TCP: + skb->csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + skb->csum_offset = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", iph->protocol); + goto out; + } + + if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) + goto out; + + err = 0; + +out: + return err; +} + +static int handle_incoming_queue(struct net_device *dev, + struct sk_buff_head *rxq) +{ + struct netfront_info *np = netdev_priv(dev); + int packets_dropped = 0; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(rxq)) != NULL) { + struct page *page = NETFRONT_SKB_CB(skb)->page; + void *vaddr = page_address(page); + unsigned offset = NETFRONT_SKB_CB(skb)->offset; + + memcpy(skb->data, vaddr + offset, + skb_headlen(skb)); + + if (page != skb_shinfo(skb)->frags[0].page) + __free_page(page); + + /* Ethernet work: Delayed to here as it peeks the header. */ + skb->protocol = eth_type_trans(skb, dev); + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_setup(skb)) { + kfree_skb(skb); + packets_dropped++; + np->stats.rx_errors++; + continue; + } + } + + np->stats.rx_packets++; + np->stats.rx_bytes += skb->len; + + /* Pass it up. */ + netif_receive_skb(skb); + dev->last_rx = jiffies; + } + + return packets_dropped; +} + +static int xennet_poll(struct net_device *dev, int *pbudget) +{ + struct netfront_info *np = netdev_priv(dev); + struct sk_buff *skb; + struct netfront_rx_info rinfo; + struct xen_netif_rx_response *rx = &rinfo.rx; + struct xen_netif_extra_info *extras = rinfo.extras; + RING_IDX i, rp; + struct multicall_entry *mcl; + int work_done, budget, more_to_do = 1; + struct sk_buff_head rxq; + struct sk_buff_head errq; + struct sk_buff_head tmpq; + unsigned long flags; + unsigned int len; + int pages_flipped = 0; + int err; + + spin_lock(&np->rx_lock); + + if (unlikely(!netif_carrier_ok(dev))) { + spin_unlock(&np->rx_lock); + return 0; + } + + skb_queue_head_init(&rxq); + skb_queue_head_init(&errq); + skb_queue_head_init(&tmpq); + + if ((budget = *pbudget) > dev->quota) + budget = dev->quota; + rp = np->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + i = np->rx.rsp_cons; + work_done = 0; + while ((i != rp) && (work_done < budget)) { + memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); + memset(extras, 0, sizeof(rinfo.extras)); + + err = xennet_get_responses(np, &rinfo, rp, &tmpq, + &pages_flipped); + + if (unlikely(err)) { +err: + while ((skb = __skb_dequeue(&tmpq))) + __skb_queue_tail(&errq, skb); + np->stats.rx_errors++; + i = np->rx.rsp_cons; + continue; + } + + skb = __skb_dequeue(&tmpq); + + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { + struct xen_netif_extra_info *gso; + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; + + if (unlikely(xennet_set_skb_gso(skb, gso))) { + __skb_queue_head(&tmpq, skb); + np->rx.rsp_cons += skb_queue_len(&tmpq); + goto err; + } + } + + NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page; + NETFRONT_SKB_CB(skb)->offset = rx->offset; + + len = rx->status; + if (len > RX_COPY_THRESHOLD) + len = RX_COPY_THRESHOLD; + skb_put(skb, len); + + if (rx->status > len) { + skb_shinfo(skb)->frags[0].page_offset = + rx->offset + len; + skb_shinfo(skb)->frags[0].size = rx->status - len; + skb->data_len = rx->status - len; + } else { + skb_shinfo(skb)->frags[0].page = NULL; + skb_shinfo(skb)->nr_frags = 0; + } + + i = xennet_fill_frags(np, skb, &tmpq); + + /* + * Truesize approximates the size of true data plus + * any supervisor overheads. Adding hypervisor + * overheads has been shown to significantly reduce + * achievable bandwidth with the default receive + * buffer size. It is therefore not wise to account + * for it here. + * + * After alloc_skb(RX_COPY_THRESHOLD), truesize is set + * to RX_COPY_THRESHOLD + the supervisor + * overheads. Here, we add the size of the data pulled + * in xennet_fill_frags(). + * + * We also adjust for any unused space in the main + * data area by subtracting (RX_COPY_THRESHOLD - + * len). This is especially important with drivers + * which split incoming packets into header and data, + * using only 66 bytes of the main data area (see the + * e1000 driver for example.) On such systems, + * without this last adjustement, our achievable + * receive throughout using the standard receive + * buffer size was cut by 25%(!!!). + */ + skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); + skb->len += skb->data_len; + + if (rx->flags & NETRXF_csum_blank) + skb->ip_summed = CHECKSUM_PARTIAL; + else if (rx->flags & NETRXF_data_validated) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + __skb_queue_tail(&rxq, skb); + + np->rx.rsp_cons = ++i; + work_done++; + } + + if (pages_flipped) { + /* Do all the remapping work, and M2P updates. */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = np->rx_mcl + pages_flipped; + MULTI_mmu_update(mcl, np->rx_mmu, + pages_flipped, 0, DOMID_SELF); + (void)HYPERVISOR_multicall(np->rx_mcl, + pages_flipped + 1); + } + } + + while ((skb = __skb_dequeue(&errq))) + kfree_skb(skb); + + work_done -= handle_incoming_queue(dev, &rxq); + + /* If we get a callback with very few responses, reduce fill target. */ + /* NB. Note exponential increase, linear decrease. */ + if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > + ((3*np->rx_target) / 4)) && + (--np->rx_target < np->rx_min_target)) + np->rx_target = np->rx_min_target; + + xennet_alloc_rx_buffers(dev); + + *pbudget -= work_done; + dev->quota -= work_done; + + if (work_done < budget) { + local_irq_save(flags); + + RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); + if (!more_to_do) + __netif_rx_complete(dev); + + local_irq_restore(flags); + } + + spin_unlock(&np->rx_lock); + + return more_to_do; +} + +static int xennet_change_mtu(struct net_device *dev, int mtu) +{ + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static void xennet_release_tx_bufs(struct netfront_info *np) +{ + struct sk_buff *skb; + int i; + + for (i = 0; i < NET_TX_RING_SIZE; i++) { + /* Skip over entries which are actually freelist references */ + if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET) + continue; + + skb = np->tx_skbs[i].skb; + gnttab_end_foreign_access_ref(np->grant_tx_ref[i], + GNTMAP_readonly); + gnttab_release_grant_reference(&np->gref_tx_head, + np->grant_tx_ref[i]); + np->grant_tx_ref[i] = GRANT_INVALID_REF; + add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); + dev_kfree_skb_irq(skb); + } +} + +static void xennet_release_rx_bufs(struct netfront_info *np) +{ + struct mmu_update *mmu = np->rx_mmu; + struct multicall_entry *mcl = np->rx_mcl; + struct sk_buff_head free_list; + struct sk_buff *skb; + unsigned long mfn; + int xfer = 0, noxfer = 0, unused = 0; + int id, ref; + + if (np->copying_receiver) { + dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", + __func__); + return; + } + + skb_queue_head_init(&free_list); + + spin_lock_bh(&np->rx_lock); + + for (id = 0; id < NET_RX_RING_SIZE; id++) { + if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) { + unused++; + continue; + } + + skb = np->rx_skbs[id]; + mfn = gnttab_end_foreign_transfer_ref(ref); + gnttab_release_grant_reference(&np->gref_rx_head, ref); + np->grant_rx_ref[id] = GRANT_INVALID_REF; + + if (0 == mfn) { + skb_shinfo(skb)->nr_frags = 0; + dev_kfree_skb(skb); + noxfer++; + continue; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + struct page *page = skb_shinfo(skb)->frags[0].page; + unsigned long pfn = page_to_pfn(page); + void *vaddr = page_address(page); + + MULTI_update_va_mapping(mcl, (unsigned long)vaddr, + mfn_pte(mfn, PAGE_KERNEL), + 0); + mcl++; + mmu->ptr = ((u64)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + mmu++; + + set_phys_to_machine(pfn, mfn); + } + __skb_queue_tail(&free_list, skb); + xfer++; + } + + dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", + __func__, xfer, noxfer, unused); + + if (xfer) { + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Do all the remapping work and M2P updates. */ + MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, + 0, DOMID_SELF); + mcl++; + HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); + } + } + + while ((skb = __skb_dequeue(&free_list)) != NULL) + dev_kfree_skb(skb); + + spin_unlock_bh(&np->rx_lock); +} + +static void xennet_uninit(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + xennet_release_tx_bufs(np); + xennet_release_rx_bufs(np); + gnttab_free_grant_references(np->gref_tx_head); + gnttab_free_grant_references(np->gref_rx_head); +} + +static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev) +{ + int i, err; + struct net_device *netdev; + struct netfront_info *np; + + netdev = alloc_etherdev(sizeof(struct netfront_info)); + if (!netdev) { + printk(KERN_WARNING "%s> alloc_etherdev failed.\n", + __func__); + return ERR_PTR(-ENOMEM); + } + + np = netdev_priv(netdev); + np->xbdev = dev; + + spin_lock_init(&np->tx_lock); + spin_lock_init(&np->rx_lock); + + skb_queue_head_init(&np->rx_batch); + np->rx_target = RX_DFL_MIN_TARGET; + np->rx_min_target = RX_DFL_MIN_TARGET; + np->rx_max_target = RX_MAX_TARGET; + + init_timer(&np->rx_refill_timer); + np->rx_refill_timer.data = (unsigned long)netdev; + np->rx_refill_timer.function = rx_refill_timeout; + + /* Initialise tx_skbs as a free chain containing every entry. */ + np->tx_skb_freelist = 0; + for (i = 0; i < NET_TX_RING_SIZE; i++) { + np->tx_skbs[i].link = i+1; + np->grant_tx_ref[i] = GRANT_INVALID_REF; + } + + /* Clear out rx_skbs */ + for (i = 0; i < NET_RX_RING_SIZE; i++) { + np->rx_skbs[i] = NULL; + np->grant_rx_ref[i] = GRANT_INVALID_REF; + } + + /* A grant for every tx ring slot */ + if (gnttab_alloc_grant_references(TX_MAX_TARGET, + &np->gref_tx_head) < 0) { + printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); + err = -ENOMEM; + goto exit; + } + /* A grant for every rx ring slot */ + if (gnttab_alloc_grant_references(RX_MAX_TARGET, + &np->gref_rx_head) < 0) { + printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); + err = -ENOMEM; + goto exit_free_tx; + } + + netdev->open = xennet_open; + netdev->hard_start_xmit = xennet_start_xmit; + netdev->stop = xennet_close; + netdev->get_stats = xennet_get_stats; + netdev->poll = xennet_poll; + netdev->uninit = xennet_uninit; + netdev->change_mtu = xennet_change_mtu; + netdev->weight = 64; + netdev->features = NETIF_F_IP_CSUM; + + SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &dev->dev); + + np->netdev = netdev; + + netif_carrier_off(netdev); + + return netdev; + + exit_free_tx: + gnttab_free_grant_references(np->gref_tx_head); + exit: + free_netdev(netdev); + return ERR_PTR(err); +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and the ring buffers for communication with the backend, and + * inform the backend of the appropriate details for those. + */ +static int __devinit netfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct net_device *netdev; + struct netfront_info *info; + + netdev = xennet_create_dev(dev); + if (IS_ERR(netdev)) { + err = PTR_ERR(netdev); + xenbus_dev_fatal(dev, err, "creating netdev"); + return err; + } + + info = netdev_priv(netdev); + dev->dev.driver_data = info; + + err = register_netdev(info->netdev); + if (err) { + printk(KERN_WARNING "%s: register_netdev err=%d\n", + __func__, err); + goto fail; + } + + err = xennet_sysfs_addif(info->netdev); + if (err) { + unregister_netdev(info->netdev); + printk(KERN_WARNING "%s: add sysfs failed err=%d\n", + __func__, err); + goto fail; + } + + return 0; + + fail: + free_netdev(netdev); + dev->dev.driver_data = NULL; + return err; +} + +static void xennet_end_access(int ref, void *page) +{ + /* This frees the page as a side-effect */ + if (ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(ref, 0, (unsigned long)page); +} + +static void xennet_disconnect_backend(struct netfront_info *info) +{ + /* Stop old i/f to prevent errors whilst we rebuild the state. */ + spin_lock_bh(&info->rx_lock); + spin_lock_irq(&info->tx_lock); + netif_carrier_off(info->netdev); + spin_unlock_irq(&info->tx_lock); + spin_unlock_bh(&info->rx_lock); + + if (info->netdev->irq) + unbind_from_irqhandler(info->netdev->irq, info->netdev); + info->evtchn = info->netdev->irq = 0; + + /* End access and free the pages */ + xennet_end_access(info->tx_ring_ref, info->tx.sring); + xennet_end_access(info->rx_ring_ref, info->rx.sring); + + info->tx_ring_ref = GRANT_INVALID_REF; + info->rx_ring_ref = GRANT_INVALID_REF; + info->tx.sring = NULL; + info->rx.sring = NULL; +} + +/** + * We are reconnecting to the backend, due to a suspend/resume, or a backend + * driver restart. We tear down our netif structure and recreate it, but + * leave the device-layer structures intact so that this is transparent to the + * rest of the kernel. + */ +static int netfront_resume(struct xenbus_device *dev) +{ + struct netfront_info *info = dev->dev.driver_data; + + dev_dbg(&dev->dev, "%s\n", dev->nodename); + + xennet_disconnect_backend(info); + return 0; +} + +static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +{ + char *s, *e, *macstr; + int i; + + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); + if (IS_ERR(macstr)) + return PTR_ERR(macstr); + + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = simple_strtoul(s, &e, 16); + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { + kfree(macstr); + return -ENOENT; + } + s = e+1; + } + + kfree(macstr); + return 0; +} + +static irqreturn_t xennet_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct netfront_info *np = netdev_priv(dev); + unsigned long flags; + + spin_lock_irqsave(&np->tx_lock, flags); + + if (likely(netif_carrier_ok(dev))) { + xennet_tx_buf_gc(dev); + /* Under tx_lock: protects access to rx shared-ring indexes. */ + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + netif_rx_schedule(dev); + } + + spin_unlock_irqrestore(&np->tx_lock, flags); + + return IRQ_HANDLED; +} + +static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) +{ + struct xen_netif_tx_sring *txs; + struct xen_netif_rx_sring *rxs; + int err; + struct net_device *netdev = info->netdev; + + info->tx_ring_ref = GRANT_INVALID_REF; + info->rx_ring_ref = GRANT_INVALID_REF; + info->rx.sring = NULL; + info->tx.sring = NULL; + netdev->irq = 0; + + err = xen_net_read_mac(dev, netdev->dev_addr); + if (err) { + xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); + goto fail; + } + + txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL); + if (!txs) { + err = -ENOMEM; + xenbus_dev_fatal(dev, err, "allocating tx ring page"); + goto fail; + } + SHARED_RING_INIT(txs); + FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); + + err = xenbus_grant_ring(dev, virt_to_mfn(txs)); + if (err < 0) { + free_page((unsigned long)txs); + goto fail; + } + + info->tx_ring_ref = err; + rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL); + if (!rxs) { + err = -ENOMEM; + xenbus_dev_fatal(dev, err, "allocating rx ring page"); + goto fail; + } + SHARED_RING_INIT(rxs); + FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); + + err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); + if (err < 0) { + free_page((unsigned long)rxs); + goto fail; + } + info->rx_ring_ref = err; + + err = xenbus_alloc_evtchn(dev, &info->evtchn); + if (err) + goto fail; + + err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt, + IRQF_SAMPLE_RANDOM, netdev->name, + netdev); + if (err < 0) + goto fail; + netdev->irq = err; + return 0; + + fail: + return err; +} + +/* Common code used when first setting up, and when resuming. */ +static int talk_to_backend(struct xenbus_device *dev, + struct netfront_info *info) +{ + const char *message; + struct xenbus_transaction xbt; + int err; + + /* Create shared ring, alloc event channel. */ + err = setup_netfront(dev, info); + if (err) + goto out; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto destroy_ring; + } + + err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u", + info->tx_ring_ref); + if (err) { + message = "writing tx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u", + info->rx_ring_ref); + if (err) { + message = "writing rx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, + "event-channel", "%u", info->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", + info->copying_receiver); + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); + if (err) { + message = "writing feature-rx-notify"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, err, "completing transaction"); + goto destroy_ring; + } + + return 0; + + abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); + destroy_ring: + xennet_disconnect_backend(info); + out: + return err; +} + +static int xennet_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", + "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } else if (dev->mtu > ETH_DATA_LEN) + dev->mtu = ETH_DATA_LEN; + + return ethtool_op_set_sg(dev, data); +} + +static int xennet_set_tso(struct net_device *dev, u32 data) +{ + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-gso-tcpv4", "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } + + return ethtool_op_set_tso(dev, data); +} + +static void xennet_set_features(struct net_device *dev) +{ + /* Turn off all GSO bits except ROBUST. */ + dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; + dev->features |= NETIF_F_GSO_ROBUST; + xennet_set_sg(dev, 0); + + /* We need checksum offload to enable scatter/gather and TSO. */ + if (!(dev->features & NETIF_F_IP_CSUM)) + return; + + if (!xennet_set_sg(dev, 1)) + xennet_set_tso(dev, 1); +} + +static int xennet_connect(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + int i, requeue_idx, err; + struct sk_buff *skb; + grant_ref_t ref; + struct xen_netif_rx_request *req; + unsigned int feature_rx_copy, feature_rx_flip; + + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-rx-copy", "%u", &feature_rx_copy); + if (err != 1) + feature_rx_copy = 0; + + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-rx-flip", "%u", &feature_rx_flip); + /* Flip is the default, since it was once the only mode of + operation. */ + if (err != 1) + feature_rx_flip = 1; + + /* + * Copy packets on receive path if: + * (a) This was requested by user, and the backend supports it; or + * (b) Flipping was requested, but this is unsupported by the backend. + */ + np->copying_receiver = (((rx_mode == RX_COPY) && feature_rx_copy) || + ((rx_mode == RX_FLIP) && !feature_rx_flip)); + + err = talk_to_backend(np->xbdev, np); + if (err) + return err; + + xennet_set_features(dev); + + dev_info(&dev->dev, "has %s receive path.\n", + np->copying_receiver ? "copying" : "flipping"); + + spin_lock_bh(&np->rx_lock); + spin_lock_irq(&np->tx_lock); + + /* Step 1: Discard all pending TX packet fragments. */ + xennet_release_tx_bufs(np); + + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { + if (!np->rx_skbs[i]) + continue; + + skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); + ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); + req = RING_GET_REQUEST(&np->rx, requeue_idx); + + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref( + ref, np->xbdev->otherend_id, + page_to_pfn(skb_shinfo(skb)->frags->page)); + } else { + gnttab_grant_foreign_access_ref( + ref, np->xbdev->otherend_id, + pfn_to_mfn(page_to_pfn(skb_shinfo(skb)-> + frags->page)), + 0); + } + req->gref = ref; + req->id = requeue_idx; + + requeue_idx++; + } + + np->rx.req_prod_pvt = requeue_idx; + + /* + * Step 3: All public and private state should now be sane. Get + * ready to start sending and receiving packets and give the driver + * domain a kick because we've probably just requeued some + * packets. + */ + netif_carrier_on(np->netdev); + notify_remote_via_irq(np->netdev->irq); + xennet_tx_buf_gc(dev); + xennet_alloc_rx_buffers(dev); + + spin_unlock_irq(&np->tx_lock); + spin_unlock_bh(&np->rx_lock); + + return 0; +} + +/** + * Callback received when the backend's state changes. + */ +static void backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct netfront_info *np = dev->dev.driver_data; + struct net_device *netdev = np->netdev; + + dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state)); + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateConnected: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateInitWait: + if (dev->state != XenbusStateInitialising) + break; + if (xennet_connect(netdev) != 0) + break; + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static struct ethtool_ops xennet_ethtool_ops = +{ + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = xennet_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = xennet_set_tso, + .get_link = ethtool_op_get_link, +}; + +#ifdef CONFIG_SYSFS +static ssize_t show_rxbuf_min(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + struct netfront_info *info = netdev_priv(netdev); + + return sprintf(buf, "%u\n", info->rx_min_target); +} + +static ssize_t store_rxbuf_min(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + struct netfront_info *np = netdev_priv(netdev); + char *endp; + unsigned long target; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + target = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EBADMSG; + + if (target < RX_MIN_TARGET) + target = RX_MIN_TARGET; + if (target > RX_MAX_TARGET) + target = RX_MAX_TARGET; + + spin_lock_bh(&np->rx_lock); + if (target > np->rx_max_target) + np->rx_max_target = target; + np->rx_min_target = target; + if (target > np->rx_target) + np->rx_target = target; + + xennet_alloc_rx_buffers(netdev); + + spin_unlock_bh(&np->rx_lock); + return len; +} + +static ssize_t show_rxbuf_max(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + struct netfront_info *info = netdev_priv(netdev); + + return sprintf(buf, "%u\n", info->rx_max_target); +} + +static ssize_t store_rxbuf_max(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + struct netfront_info *np = netdev_priv(netdev); + char *endp; + unsigned long target; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + target = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EBADMSG; + + if (target < RX_MIN_TARGET) + target = RX_MIN_TARGET; + if (target > RX_MAX_TARGET) + target = RX_MAX_TARGET; + + spin_lock_bh(&np->rx_lock); + if (target < np->rx_min_target) + np->rx_min_target = target; + np->rx_max_target = target; + if (target < np->rx_target) + np->rx_target = target; + + xennet_alloc_rx_buffers(netdev); + + spin_unlock_bh(&np->rx_lock); + return len; +} + +static ssize_t show_rxbuf_cur(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + struct netfront_info *info = netdev_priv(netdev); + + return sprintf(buf, "%u\n", info->rx_target); +} + +static struct device_attribute xennet_attrs[] = { + __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), + __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), + __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), +}; + +static int xennet_sysfs_addif(struct net_device *netdev) +{ + int i; + int err; + + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { + err = device_create_file(&netdev->dev, + &xennet_attrs[i]); + if (err) + goto fail; + } + return 0; + + fail: + while (--i >= 0) + device_remove_file(&netdev->dev, &xennet_attrs[i]); + return err; +} + +static void xennet_sysfs_delif(struct net_device *netdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) + device_remove_file(&netdev->dev, &xennet_attrs[i]); +} + +#endif /* CONFIG_SYSFS */ + +static struct xenbus_device_id netfront_ids[] = { + { "vif" }, + { "" } +}; + + +static int __devexit xennet_remove(struct xenbus_device *dev) +{ + struct netfront_info *info = dev->dev.driver_data; + + dev_dbg(&dev->dev, "%s\n", dev->nodename); + + unregister_netdev(info->netdev); + + xennet_disconnect_backend(info); + + del_timer_sync(&info->rx_refill_timer); + + xennet_sysfs_delif(info->netdev); + + free_netdev(info->netdev); + + return 0; +} + +static struct xenbus_driver netfront = { + .name = "vif", + .owner = THIS_MODULE, + .ids = netfront_ids, + .probe = netfront_probe, + .remove = __devexit_p(xennet_remove), + .resume = netfront_resume, + .otherend_changed = backend_changed, +}; + +static int __init netif_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + if (is_initial_xendomain()) + return 0; + + printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); + + return xenbus_register_frontend(&netfront); +} +module_init(netif_init); + + +static void __exit netif_exit(void) +{ + if (is_initial_xendomain()) + return; + + return xenbus_unregister_driver(&netfront); +} +module_exit(netif_exit); + +MODULE_DESCRIPTION("Xen virtual network device frontend"); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/drivers/net/yellowfin.c linux-2.6.22-591/drivers/net/yellowfin.c --- linux-2.6.22-570/drivers/net/yellowfin.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/net/yellowfin.c 2007-12-21 15:36:12.000000000 -0500 @@ -1137,7 +1137,7 @@ if (skb == NULL) break; skb_reserve(skb, 2); /* 16 byte align the IP header */ - eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0); + skb_copy_to_linear_data(skb, rx_skb->data, pkt_len); skb_put(skb, pkt_len); pci_dma_sync_single_for_device(yp->pci_dev, desc->addr, yp->rx_buf_sz, diff -Nurb linux-2.6.22-570/drivers/parisc/led.c linux-2.6.22-591/drivers/parisc/led.c --- linux-2.6.22-570/drivers/parisc/led.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/parisc/led.c 2007-12-21 15:36:14.000000000 -0500 @@ -359,7 +359,7 @@ * for reading should be OK */ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct net_device_stats *stats; struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) diff -Nurb linux-2.6.22-570/drivers/parisc/pdc_stable.c linux-2.6.22-591/drivers/parisc/pdc_stable.c --- linux-2.6.22-570/drivers/parisc/pdc_stable.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/parisc/pdc_stable.c 2007-12-21 15:36:12.000000000 -0500 @@ -121,14 +121,14 @@ #define PDCS_ATTR(_name, _mode, _show, _store) \ struct subsys_attribute pdcs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; #define PATHS_ATTR(_name, _mode, _show, _store) \ struct pdcspath_attribute paths_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ + .attr = {.name = __stringify(_name), .mode = _mode}, \ .show = _show, \ .store = _store, \ }; diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c linux-2.6.22-591/drivers/pci/hotplug/acpiphp_ibm.c --- linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pci/hotplug/acpiphp_ibm.c 2007-12-21 15:36:12.000000000 -0500 @@ -106,6 +106,7 @@ static void ibm_handle_events(acpi_handle handle, u32 event, void *context); static int ibm_get_table_from_acpi(char **bufp); static ssize_t ibm_read_apci_table(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t size); static acpi_status __init ibm_find_acpi_device(acpi_handle handle, u32 lvl, void *context, void **rv); @@ -117,7 +118,6 @@ static struct bin_attribute ibm_apci_table_attr = { .attr = { .name = "apci_table", - .owner = THIS_MODULE, .mode = S_IRUGO, }, .read = ibm_read_apci_table, @@ -358,6 +358,7 @@ * our solution is to only allow reading the table in all at once **/ static ssize_t ibm_read_apci_table(struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t size) { int bytes_read = -EINVAL; diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c linux-2.6.22-591/drivers/pci/hotplug/rpadlpar_core.c --- linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pci/hotplug/rpadlpar_core.c 2007-12-21 15:36:12.000000000 -0500 @@ -159,8 +159,8 @@ /* Claim new bus resources */ pcibios_claim_one_bus(dev->bus); - /* ioremap() for child bus, which may or may not succeed */ - remap_bus_range(dev->subordinate); + /* Map IO space for child bus, which may or may not succeed */ + pcibios_map_io_space(dev->subordinate); /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(phb->bus); @@ -390,7 +390,7 @@ } else pcibios_remove_pci_devices(bus); - if (unmap_bus_range(bus)) { + if (pcibios_unmap_io_space(bus)) { printk(KERN_ERR "%s: failed to unmap bus range\n", __FUNCTION__); return -ERANGE; diff -Nurb linux-2.6.22-570/drivers/pci/pci-sysfs.c linux-2.6.22-591/drivers/pci/pci-sysfs.c --- linux-2.6.22-570/drivers/pci/pci-sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pci/pci-sysfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -213,7 +213,8 @@ }; static ssize_t -pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); unsigned int size = 64; @@ -285,7 +286,8 @@ } static ssize_t -pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); unsigned int size = count; @@ -352,7 +354,8 @@ * callback routine (pci_legacy_read). */ ssize_t -pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_bus *bus = to_pci_bus(container_of(kobj, struct class_device, @@ -376,7 +379,8 @@ * callback routine (pci_legacy_write). */ ssize_t -pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_bus *bus = to_pci_bus(container_of(kobj, struct class_device, @@ -499,7 +503,6 @@ sprintf(res_attr_name, "resource%d", i); res_attr->attr.name = res_attr_name; res_attr->attr.mode = S_IRUSR | S_IWUSR; - res_attr->attr.owner = THIS_MODULE; res_attr->size = pci_resource_len(pdev, i); res_attr->mmap = pci_mmap_resource; res_attr->private = &pdev->resource[i]; @@ -529,7 +532,8 @@ * writing anything except 0 enables it */ static ssize_t -pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); @@ -552,7 +556,8 @@ * device corresponding to @kobj. */ static ssize_t -pci_read_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) +pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); void __iomem *rom; @@ -582,7 +587,6 @@ .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = 256, .read = pci_read_config, @@ -593,7 +597,6 @@ .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = 4096, .read = pci_read_config, @@ -628,7 +631,6 @@ rom_attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE); rom_attr->attr.name = "rom"; rom_attr->attr.mode = S_IRUSR; - rom_attr->attr.owner = THIS_MODULE; rom_attr->read = pci_read_rom; rom_attr->write = pci_write_rom; retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr); diff -Nurb linux-2.6.22-570/drivers/pci/probe.c linux-2.6.22-591/drivers/pci/probe.c --- linux-2.6.22-570/drivers/pci/probe.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/pci/probe.c 2007-12-21 15:36:12.000000000 -0500 @@ -39,7 +39,6 @@ b->legacy_io->attr.name = "legacy_io"; b->legacy_io->size = 0xffff; b->legacy_io->attr.mode = S_IRUSR | S_IWUSR; - b->legacy_io->attr.owner = THIS_MODULE; b->legacy_io->read = pci_read_legacy_io; b->legacy_io->write = pci_write_legacy_io; class_device_create_bin_file(&b->class_dev, b->legacy_io); @@ -49,7 +48,6 @@ b->legacy_mem->attr.name = "legacy_mem"; b->legacy_mem->size = 1024*1024; b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR; - b->legacy_mem->attr.owner = THIS_MODULE; b->legacy_mem->mmap = pci_mmap_legacy_mem; class_device_create_bin_file(&b->class_dev, b->legacy_mem); } diff -Nurb linux-2.6.22-570/drivers/pcmcia/cs.c linux-2.6.22-591/drivers/pcmcia/cs.c --- linux-2.6.22-570/drivers/pcmcia/cs.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/pcmcia/cs.c 2007-12-21 15:36:12.000000000 -0500 @@ -654,6 +654,7 @@ add_wait_queue(&skt->thread_wait, &wait); complete(&skt->thread_done); + set_freezable(); for (;;) { unsigned long flags; unsigned int events; diff -Nurb linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c linux-2.6.22-591/drivers/pcmcia/socket_sysfs.c --- linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pcmcia/socket_sysfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -283,7 +283,9 @@ return (ret); } -static ssize_t pccard_show_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t pccard_show_cis(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { unsigned int size = 0x200; @@ -311,7 +313,9 @@ return (count); } -static ssize_t pccard_store_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t pccard_store_cis(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pcmcia_socket *s = to_socket(container_of(kobj, struct device, kobj)); cisdump_t *cis; @@ -366,7 +370,7 @@ }; static struct bin_attribute pccard_cis_attr = { - .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE}, + .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR }, .size = 0x200, .read = pccard_show_cis, .write = pccard_store_cis, diff -Nurb linux-2.6.22-570/drivers/pnp/driver.c linux-2.6.22-591/drivers/pnp/driver.c --- linux-2.6.22-570/drivers/pnp/driver.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pnp/driver.c 2007-12-21 15:36:12.000000000 -0500 @@ -167,6 +167,8 @@ return error; } + if (pnp_dev->protocol && pnp_dev->protocol->suspend) + pnp_dev->protocol->suspend(pnp_dev, state); return 0; } @@ -179,6 +181,9 @@ if (!pnp_drv) return 0; + if (pnp_dev->protocol && pnp_dev->protocol->resume) + pnp_dev->protocol->resume(pnp_dev); + if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) { error = pnp_start_dev(pnp_dev); if (error) diff -Nurb linux-2.6.22-570/drivers/pnp/pnpacpi/core.c linux-2.6.22-591/drivers/pnp/pnpacpi/core.c --- linux-2.6.22-570/drivers/pnp/pnpacpi/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pnp/pnpacpi/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -119,11 +119,23 @@ return ACPI_FAILURE(status) ? -ENODEV : 0; } +static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) +{ + return acpi_bus_set_power((acpi_handle)dev->data, 3); +} + +static int pnpacpi_resume(struct pnp_dev *dev) +{ + return acpi_bus_set_power((acpi_handle)dev->data, 0); +} + static struct pnp_protocol pnpacpi_protocol = { .name = "Plug and Play ACPI", .get = pnpacpi_get_resources, .set = pnpacpi_set_resources, .disable = pnpacpi_disable_resources, + .suspend = pnpacpi_suspend, + .resume = pnpacpi_resume, }; static int __init pnpacpi_add_device(struct acpi_device *device) diff -Nurb linux-2.6.22-570/drivers/pnp/pnpbios/core.c linux-2.6.22-591/drivers/pnp/pnpbios/core.c --- linux-2.6.22-570/drivers/pnp/pnpbios/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/pnp/pnpbios/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -147,7 +147,7 @@ info->location_id, info->serial, info->capabilities); envp[i] = NULL; - value = call_usermodehelper (argv [0], argv, envp, 0); + value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); kfree (buf); kfree (envp); return 0; @@ -160,6 +160,7 @@ { static struct pnp_docking_station_info now; int docked = -1, d = 0; + set_freezable(); while (!unloading) { int status; diff -Nurb linux-2.6.22-570/drivers/rapidio/rio-sysfs.c linux-2.6.22-591/drivers/rapidio/rio-sysfs.c --- linux-2.6.22-570/drivers/rapidio/rio-sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/rapidio/rio-sysfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -67,7 +67,8 @@ }; static ssize_t -rio_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(container_of(kobj, struct device, kobj)); @@ -137,7 +138,8 @@ } static ssize_t -rio_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(container_of(kobj, struct device, kobj)); @@ -197,7 +199,6 @@ .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = 0x200000, .read = rio_read_config, diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1553.c linux-2.6.22-591/drivers/rtc/rtc-ds1553.c --- linux-2.6.22-570/drivers/rtc/rtc-ds1553.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/rtc/rtc-ds1553.c 2007-12-21 15:36:12.000000000 -0500 @@ -258,8 +258,9 @@ .ioctl = ds1553_rtc_ioctl, }; -static ssize_t ds1553_nvram_read(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1553_nvram_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); @@ -272,8 +273,9 @@ return count; } -static ssize_t ds1553_nvram_write(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1553_nvram_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); @@ -290,7 +292,6 @@ .attr = { .name = "nvram", .mode = S_IRUGO | S_IWUGO, - .owner = THIS_MODULE, }, .size = RTC_OFFSET, .read = ds1553_nvram_read, diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1742.c linux-2.6.22-591/drivers/rtc/rtc-ds1742.c --- linux-2.6.22-570/drivers/rtc/rtc-ds1742.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/rtc/rtc-ds1742.c 2007-12-21 15:36:12.000000000 -0500 @@ -127,8 +127,9 @@ .set_time = ds1742_rtc_set_time, }; -static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1742_nvram_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); @@ -141,8 +142,9 @@ return count; } -static ssize_t ds1742_nvram_write(struct kobject *kobj, char *buf, - loff_t pos, size_t size) +static ssize_t ds1742_nvram_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { struct platform_device *pdev = to_platform_device(container_of(kobj, struct device, kobj)); @@ -159,7 +161,6 @@ .attr = { .name = "nvram", .mode = S_IRUGO | S_IWUGO, - .owner = THIS_MODULE, }, .read = ds1742_nvram_read, .write = ds1742_nvram_write, diff -Nurb linux-2.6.22-570/drivers/s390/cio/chp.c linux-2.6.22-591/drivers/s390/cio/chp.c --- linux-2.6.22-570/drivers/s390/cio/chp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/s390/cio/chp.c 2007-12-21 15:36:12.000000000 -0500 @@ -141,8 +141,9 @@ /* * Channel measurement related functions */ -static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t chp_measurement_chars_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct channel_path *chp; unsigned int size; @@ -165,7 +166,6 @@ .attr = { .name = "measurement_chars", .mode = S_IRUSR, - .owner = THIS_MODULE, }, .size = sizeof(struct cmg_chars), .read = chp_measurement_chars_read, @@ -193,8 +193,9 @@ } while (reference_buf.values[0] != buf->values[0]); } -static ssize_t chp_measurement_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t chp_measurement_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct channel_path *chp; struct channel_subsystem *css; @@ -217,7 +218,6 @@ .attr = { .name = "measurement", .mode = S_IRUSR, - .owner = THIS_MODULE, }, .size = sizeof(struct cmg_entry), .read = chp_measurement_read, diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_main.c linux-2.6.22-591/drivers/s390/net/qeth_main.c --- linux-2.6.22-570/drivers/s390/net/qeth_main.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/s390/net/qeth_main.c 2007-12-21 15:36:14.000000000 -0500 @@ -8127,7 +8127,7 @@ neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); - neigh->type = inet_addr_type(*(__be32 *) neigh->primary_key); + neigh->type = inet_addr_type(&init_net, *(__be32 *) neigh->primary_key); neigh->nud_state = NUD_NOARP; neigh->ops = arp_direct_ops; neigh->output = neigh->ops->queue_xmit; diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_sys.c linux-2.6.22-591/drivers/s390/net/qeth_sys.c --- linux-2.6.22-570/drivers/s390/net/qeth_sys.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/s390/net/qeth_sys.c 2007-12-21 15:36:12.000000000 -0500 @@ -991,7 +991,7 @@ #define QETH_DEVICE_ATTR(_id,_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_id = { \ - .attr = {.name=__stringify(_name), .mode=_mode, .owner=THIS_MODULE },\ + .attr = {.name=__stringify(_name), .mode=_mode, },\ .show = _show, \ .store = _store, \ }; diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c linux-2.6.22-591/drivers/s390/scsi/zfcp_aux.c --- linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/s390/scsi/zfcp_aux.c 2007-12-21 15:36:12.000000000 -0500 @@ -815,9 +815,7 @@ struct zfcp_unit * zfcp_unit_enqueue(struct zfcp_port *port, fcp_lun_t fcp_lun) { - struct zfcp_unit *unit, *tmp_unit; - unsigned int scsi_lun; - int found; + struct zfcp_unit *unit; /* * check that there is no unit with this FCP_LUN already in list @@ -863,21 +861,9 @@ } zfcp_unit_get(unit); + unit->scsi_lun = scsilun_to_int((struct scsi_lun *)&unit->fcp_lun); - scsi_lun = 0; - found = 0; write_lock_irq(&zfcp_data.config_lock); - list_for_each_entry(tmp_unit, &port->unit_list_head, list) { - if (tmp_unit->scsi_lun != scsi_lun) { - found = 1; - break; - } - scsi_lun++; - } - unit->scsi_lun = scsi_lun; - if (found) - list_add_tail(&unit->list, &tmp_unit->list); - else list_add_tail(&unit->list, &port->unit_list_head); atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status); atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &unit->status); diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c linux-2.6.22-591/drivers/s390/scsi/zfcp_erp.c --- linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/s390/scsi/zfcp_erp.c 2007-12-21 15:36:12.000000000 -0500 @@ -1986,6 +1986,10 @@ failed_openfcp: zfcp_close_fsf(erp_action->adapter); failed_qdio: + atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK | + ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED | + ZFCP_STATUS_ADAPTER_XPORT_OK, + &erp_action->adapter->status); out: return retval; } @@ -2167,6 +2171,9 @@ sleep *= 2; } + atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT, + &adapter->status); + if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status)) { ZFCP_LOG_INFO("error: exchange of configuration data for " diff -Nurb linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c linux-2.6.22-591/drivers/sbus/char/bbc_envctrl.c --- linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/sbus/char/bbc_envctrl.c 2007-12-21 15:36:12.000000000 -0500 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -170,8 +171,6 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) { static int shutting_down = 0; - static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; char *type = "???"; s8 val = -1; @@ -195,7 +194,7 @@ printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); shutting_down = 1; - if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0) + if (orderly_poweroff(true) < 0) printk(KERN_CRIT "envctrl: shutdown execution failed\n"); } diff -Nurb linux-2.6.22-570/drivers/sbus/char/envctrl.c linux-2.6.22-591/drivers/sbus/char/envctrl.c --- linux-2.6.22-570/drivers/sbus/char/envctrl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/sbus/char/envctrl.c 2007-12-21 15:36:12.000000000 -0500 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -966,10 +967,6 @@ static void envctrl_do_shutdown(void) { static int inprog = 0; - static char *envp[] = { - "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - char *argv[] = { - "/sbin/shutdown", "-h", "now", NULL }; int ret; if (inprog != 0) @@ -977,7 +974,7 @@ inprog = 1; printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); - ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0); + ret = orderly_poweroff(true); if (ret < 0) { printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); inprog = 0; /* unlikely to succeed, but we could try again */ diff -Nurb linux-2.6.22-570/drivers/scsi/3w-9xxx.c linux-2.6.22-591/drivers/scsi/3w-9xxx.c --- linux-2.6.22-570/drivers/scsi/3w-9xxx.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/3w-9xxx.c 2007-12-21 15:36:12.000000000 -0500 @@ -1307,22 +1307,26 @@ wake_up(&tw_dev->ioctl_wqueue); } } else { + struct scsi_cmnd *cmd; + + cmd = tw_dev->srb[request_id]; + twa_scsiop_execute_scsi_complete(tw_dev, request_id); /* If no error command was a success */ if (error == 0) { - tw_dev->srb[request_id]->result = (DID_OK << 16); + cmd->result = (DID_OK << 16); } /* If error, command failed */ if (error == 1) { /* Ask for a host reset */ - tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1); + cmd->result = (DID_OK << 16) | (CHECK_CONDITION << 1); } /* Report residual bytes for single sgl */ - if ((tw_dev->srb[request_id]->use_sg <= 1) && (full_command_packet->command.newcommand.status == 0)) { - if (full_command_packet->command.newcommand.sg_list[0].length < tw_dev->srb[request_id]->request_bufflen) - tw_dev->srb[request_id]->resid = tw_dev->srb[request_id]->request_bufflen - full_command_packet->command.newcommand.sg_list[0].length; + if ((scsi_sg_count(cmd) <= 1) && (full_command_packet->command.newcommand.status == 0)) { + if (full_command_packet->command.newcommand.sg_list[0].length < scsi_bufflen(tw_dev->srb[request_id])) + scsi_set_resid(cmd, scsi_bufflen(cmd) - full_command_packet->command.newcommand.sg_list[0].length); } /* Now complete the io */ @@ -1385,52 +1389,20 @@ { int use_sg; struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - struct pci_dev *pdev = tw_dev->tw_pci_dev; - int retval = 0; - - if (cmd->use_sg == 0) - goto out; - - use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); - if (use_sg == 0) { + use_sg = scsi_dma_map(cmd); + if (!use_sg) + return 0; + else if (use_sg < 0) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to map scatter gather list"); - goto out; + return 0; } cmd->SCp.phase = TW_PHASE_SGLIST; cmd->SCp.have_data_in = use_sg; - retval = use_sg; -out: - return retval; -} /* End twa_map_scsi_sg_data() */ - -/* This function will perform a pci-dma map for a single buffer */ -static dma_addr_t twa_map_scsi_single_data(TW_Device_Extension *tw_dev, int request_id) -{ - dma_addr_t mapping; - struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - struct pci_dev *pdev = tw_dev->tw_pci_dev; - dma_addr_t retval = 0; - - if (cmd->request_bufflen == 0) { - retval = 0; - goto out; - } - - mapping = pci_map_single(pdev, cmd->request_buffer, cmd->request_bufflen, DMA_BIDIRECTIONAL); - - if (mapping == 0) { - TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1d, "Failed to map page"); - goto out; - } - cmd->SCp.phase = TW_PHASE_SINGLE; - cmd->SCp.have_data_in = mapping; - retval = mapping; -out: - return retval; -} /* End twa_map_scsi_single_data() */ + return use_sg; +} /* End twa_map_scsi_sg_data() */ /* This function will poll for a response interrupt of a request */ static int twa_poll_response(TW_Device_Extension *tw_dev, int request_id, int seconds) @@ -1816,15 +1788,13 @@ u32 num_sectors = 0x0; int i, sg_count; struct scsi_cmnd *srb = NULL; - struct scatterlist *sglist = NULL; - dma_addr_t buffaddr = 0x0; + struct scatterlist *sglist = NULL, *sg; int retval = 1; if (tw_dev->srb[request_id]) { - if (tw_dev->srb[request_id]->request_buffer) { - sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; - } srb = tw_dev->srb[request_id]; + if (scsi_sglist(srb)) + sglist = scsi_sglist(srb); } /* Initialize command packet */ @@ -1857,32 +1827,12 @@ if (!sglistarg) { /* Map sglist from scsi layer to cmd packet */ - if (tw_dev->srb[request_id]->use_sg == 0) { - if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH) { - command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]); - command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH); - if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) - memcpy(tw_dev->generic_buffer_virt[request_id], tw_dev->srb[request_id]->request_buffer, tw_dev->srb[request_id]->request_bufflen); - } else { - buffaddr = twa_map_scsi_single_data(tw_dev, request_id); - if (buffaddr == 0) - goto out; - - command_packet->sg_list[0].address = TW_CPU_TO_SGL(buffaddr); - command_packet->sg_list[0].length = cpu_to_le32(tw_dev->srb[request_id]->request_bufflen); - } - command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), 1)); - if (command_packet->sg_list[0].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) { - TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2d, "Found unaligned address during execute scsi"); - goto out; - } - } - - if (tw_dev->srb[request_id]->use_sg > 0) { - if ((tw_dev->srb[request_id]->use_sg == 1) && (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH)) { - if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) { - struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; + if (scsi_sg_count(srb)) { + if ((scsi_sg_count(srb) == 1) && + (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { + if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) { + struct scatterlist *sg = scsi_sglist(srb); char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length); kunmap_atomic(buf - sg->offset, KM_IRQ0); @@ -1894,16 +1844,16 @@ if (sg_count == 0) goto out; - for (i = 0; i < sg_count; i++) { - command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(&sglist[i])); - command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(&sglist[i])); + scsi_for_each_sg(srb, sg, sg_count, i) { + command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(sg)); + command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(sg)); if (command_packet->sg_list[i].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2e, "Found unaligned sgl address during execute scsi"); goto out; } } } - command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), tw_dev->srb[request_id]->use_sg)); + command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), scsi_sg_count(tw_dev->srb[request_id]))); } } else { /* Internal cdb post */ @@ -1933,7 +1883,7 @@ /* Update SG statistics */ if (srb) { - tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg; + tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]); if (tw_dev->sgl_entries > tw_dev->max_sgl_entries) tw_dev->max_sgl_entries = tw_dev->sgl_entries; } @@ -1952,16 +1902,13 @@ /* This function completes an execute scsi operation */ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id) { - if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH && - (tw_dev->srb[request_id]->sc_data_direction == DMA_FROM_DEVICE || - tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)) { - if (tw_dev->srb[request_id]->use_sg == 0) { - memcpy(tw_dev->srb[request_id]->request_buffer, - tw_dev->generic_buffer_virt[request_id], - tw_dev->srb[request_id]->request_bufflen); - } - if (tw_dev->srb[request_id]->use_sg == 1) { - struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; + struct scsi_cmnd *cmd = tw_dev->srb[request_id]; + + if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH && + (cmd->sc_data_direction == DMA_FROM_DEVICE || + cmd->sc_data_direction == DMA_BIDIRECTIONAL)) { + if (scsi_sg_count(cmd) == 1) { + struct scatterlist *sg = scsi_sglist(tw_dev->srb[request_id]); char *buf; unsigned long flags = 0; local_irq_save(flags); @@ -2018,16 +1965,8 @@ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id) { struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - struct pci_dev *pdev = tw_dev->tw_pci_dev; - switch(cmd->SCp.phase) { - case TW_PHASE_SINGLE: - pci_unmap_single(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL); - break; - case TW_PHASE_SGLIST: - pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); - break; - } + scsi_dma_unmap(cmd); } /* End twa_unmap_scsi_data() */ /* scsi_host_template initializer */ diff -Nurb linux-2.6.22-570/drivers/scsi/3w-xxxx.c linux-2.6.22-591/drivers/scsi/3w-xxxx.c --- linux-2.6.22-570/drivers/scsi/3w-xxxx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/3w-xxxx.c 2007-12-21 15:36:12.000000000 -0500 @@ -1274,12 +1274,8 @@ dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data()\n"); - if (cmd->use_sg == 0) - return 0; - - use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); - - if (use_sg == 0) { + use_sg = scsi_dma_map(cmd); + if (use_sg < 0) { printk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data(): pci_map_sg() failed.\n"); return 0; } @@ -1290,40 +1286,11 @@ return use_sg; } /* End tw_map_scsi_sg_data() */ -static u32 tw_map_scsi_single_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) -{ - dma_addr_t mapping; - - dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data()\n"); - - if (cmd->request_bufflen == 0) - return 0; - - mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), offset_in_page(cmd->request_buffer), cmd->request_bufflen, DMA_BIDIRECTIONAL); - - if (mapping == 0) { - printk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data(): pci_map_page() failed.\n"); - return 0; - } - - cmd->SCp.phase = TW_PHASE_SINGLE; - cmd->SCp.have_data_in = mapping; - - return mapping; -} /* End tw_map_scsi_single_data() */ - static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) { dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n"); - switch(cmd->SCp.phase) { - case TW_PHASE_SINGLE: - pci_unmap_page(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL); - break; - case TW_PHASE_SGLIST: - pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); - break; - } + scsi_dma_unmap(cmd); } /* End tw_unmap_scsi_data() */ /* This function will reset a device extension */ @@ -1499,27 +1466,16 @@ void *buf; unsigned int transfer_len; unsigned long flags = 0; + struct scatterlist *sg = scsi_sglist(cmd); - if (cmd->use_sg) { - struct scatterlist *sg = - (struct scatterlist *)cmd->request_buffer; local_irq_save(flags); buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; transfer_len = min(sg->length, len); - } else { - buf = cmd->request_buffer; - transfer_len = min(cmd->request_bufflen, len); - } memcpy(buf, data, transfer_len); - if (cmd->use_sg) { - struct scatterlist *sg; - - sg = (struct scatterlist *)cmd->request_buffer; kunmap_atomic(buf - sg->offset, KM_IRQ0); local_irq_restore(flags); - } } /* This function is called by the isr to complete an inquiry command */ @@ -1764,19 +1720,20 @@ { TW_Command *command_packet; unsigned long command_que_value; - u32 lba = 0x0, num_sectors = 0x0, buffaddr = 0x0; + u32 lba = 0x0, num_sectors = 0x0; int i, use_sg; struct scsi_cmnd *srb; - struct scatterlist *sglist; + struct scatterlist *sglist, *sg; dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write()\n"); - if (tw_dev->srb[request_id]->request_buffer == NULL) { + srb = tw_dev->srb[request_id]; + + sglist = scsi_sglist(srb); + if (!sglist) { printk(KERN_WARNING "3w-xxxx: tw_scsiop_read_write(): Request buffer NULL.\n"); return 1; } - sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; - srb = tw_dev->srb[request_id]; /* Initialize command packet */ command_packet = (TW_Command *)tw_dev->command_packet_virtual_address[request_id]; @@ -1819,33 +1776,18 @@ command_packet->byte8.io.lba = lba; command_packet->byte6.block_count = num_sectors; - /* Do this if there are no sg list entries */ - if (tw_dev->srb[request_id]->use_sg == 0) { - dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write(): SG = 0\n"); - buffaddr = tw_map_scsi_single_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]); - if (buffaddr == 0) - return 1; - - command_packet->byte8.io.sgl[0].address = buffaddr; - command_packet->byte8.io.sgl[0].length = tw_dev->srb[request_id]->request_bufflen; - command_packet->size+=2; - } - - /* Do this if we have multiple sg list entries */ - if (tw_dev->srb[request_id]->use_sg > 0) { use_sg = tw_map_scsi_sg_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]); - if (use_sg == 0) + if (!use_sg) return 1; - for (i=0;ibyte8.io.sgl[i].address = sg_dma_address(&sglist[i]); - command_packet->byte8.io.sgl[i].length = sg_dma_len(&sglist[i]); + scsi_for_each_sg(tw_dev->srb[request_id], sg, use_sg, i) { + command_packet->byte8.io.sgl[i].address = sg_dma_address(sg); + command_packet->byte8.io.sgl[i].length = sg_dma_len(sg); command_packet->size+=2; } - } /* Update SG statistics */ - tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg; + tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]); if (tw_dev->sgl_entries > tw_dev->max_sgl_entries) tw_dev->max_sgl_entries = tw_dev->sgl_entries; diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.c linux-2.6.22-591/drivers/scsi/53c700.c --- linux-2.6.22-570/drivers/scsi/53c700.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c700.c 2007-12-21 15:36:12.000000000 -0500 @@ -585,16 +585,8 @@ struct NCR_700_command_slot *slot) { if(SCp->sc_data_direction != DMA_NONE && - SCp->sc_data_direction != DMA_BIDIRECTIONAL) { - if(SCp->use_sg) { - dma_unmap_sg(hostdata->dev, SCp->request_buffer, - SCp->use_sg, SCp->sc_data_direction); - } else { - dma_unmap_single(hostdata->dev, slot->dma_handle, - SCp->request_bufflen, - SCp->sc_data_direction); - } - } + SCp->sc_data_direction != DMA_BIDIRECTIONAL) + scsi_dma_unmap(SCp); } STATIC inline void @@ -661,7 +653,6 @@ { struct NCR_700_Host_Parameters *hostdata = (struct NCR_700_Host_Parameters *)host->hostdata[0]; - __u32 dcntl_extra = 0; __u8 min_period; __u8 min_xferp = (hostdata->chip710 ? NCR_710_MIN_XFERP : NCR_700_MIN_XFERP); @@ -686,13 +677,14 @@ burst_disable = BURST_DISABLE; break; } - dcntl_extra = COMPAT_700_MODE; + hostdata->dcntl_extra |= COMPAT_700_MODE; - NCR_700_writeb(dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(hostdata->dcntl_extra, host, DCNTL_REG); NCR_700_writeb(burst_length | hostdata->dmode_extra, host, DMODE_710_REG); - NCR_700_writeb(burst_disable | (hostdata->differential ? - DIFF : 0), host, CTEST7_REG); + NCR_700_writeb(burst_disable | hostdata->ctest7_extra | + (hostdata->differential ? DIFF : 0), + host, CTEST7_REG); NCR_700_writeb(BTB_TIMER_DISABLE, host, CTEST0_REG); NCR_700_writeb(FULL_ARBITRATION | ENABLE_PARITY | PARITY | AUTO_ATN, host, SCNTL0_REG); @@ -727,13 +719,13 @@ * of spec: sync divider 2, async divider 3 */ DEBUG(("53c700: sync 2 async 3\n")); NCR_700_writeb(SYNC_DIV_2_0, host, SBCL_REG); - NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG); hostdata->sync_clock = hostdata->clock/2; } else if(hostdata->clock > 50 && hostdata->clock <= 75) { /* sync divider 1.5, async divider 3 */ DEBUG(("53c700: sync 1.5 async 3\n")); NCR_700_writeb(SYNC_DIV_1_5, host, SBCL_REG); - NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG); hostdata->sync_clock = hostdata->clock*2; hostdata->sync_clock /= 3; @@ -741,18 +733,18 @@ /* sync divider 1, async divider 2 */ DEBUG(("53c700: sync 1 async 2\n")); NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG); - NCR_700_writeb(ASYNC_DIV_2_0 | dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(ASYNC_DIV_2_0 | hostdata->dcntl_extra, host, DCNTL_REG); hostdata->sync_clock = hostdata->clock; } else if(hostdata->clock > 25 && hostdata->clock <=37) { /* sync divider 1, async divider 1.5 */ DEBUG(("53c700: sync 1 async 1.5\n")); NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG); - NCR_700_writeb(ASYNC_DIV_1_5 | dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(ASYNC_DIV_1_5 | hostdata->dcntl_extra, host, DCNTL_REG); hostdata->sync_clock = hostdata->clock; } else { DEBUG(("53c700: sync 1 async 1\n")); NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG); - NCR_700_writeb(ASYNC_DIV_1_0 | dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(ASYNC_DIV_1_0 | hostdata->dcntl_extra, host, DCNTL_REG); /* sync divider 1, async divider 1 */ hostdata->sync_clock = hostdata->clock; } @@ -1263,12 +1255,11 @@ host->host_no, pun, lun, NCR_700_condition[i], NCR_700_phase[j], dsp - hostdata->pScript); if(SCp != NULL) { - scsi_print_command(SCp); + struct scatterlist *sg; - if(SCp->use_sg) { - for(i = 0; i < SCp->use_sg + 1; i++) { - printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, ((struct scatterlist *)SCp->request_buffer)[i].length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr); - } + scsi_print_command(SCp); + scsi_for_each_sg(SCp, sg, scsi_sg_count(SCp) + 1, i) { + printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, sg->length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr); } } NCR_700_internal_bus_reset(host); @@ -1844,8 +1835,8 @@ } /* sanity check: some of the commands generated by the mid-layer * have an eccentric idea of their sc_data_direction */ - if(!SCp->use_sg && !SCp->request_bufflen - && SCp->sc_data_direction != DMA_NONE) { + if(!scsi_sg_count(SCp) && !scsi_bufflen(SCp) && + SCp->sc_data_direction != DMA_NONE) { #ifdef NCR_700_DEBUG printk("53c700: Command"); scsi_print_command(SCp); @@ -1887,31 +1878,15 @@ int i; int sg_count; dma_addr_t vPtr = 0; + struct scatterlist *sg; __u32 count = 0; - if(SCp->use_sg) { - sg_count = dma_map_sg(hostdata->dev, - SCp->request_buffer, SCp->use_sg, - direction); - } else { - vPtr = dma_map_single(hostdata->dev, - SCp->request_buffer, - SCp->request_bufflen, - direction); - count = SCp->request_bufflen; - slot->dma_handle = vPtr; - sg_count = 1; - } - - - for(i = 0; i < sg_count; i++) { + sg_count = scsi_dma_map(SCp); + BUG_ON(sg_count < 0); - if(SCp->use_sg) { - struct scatterlist *sg = SCp->request_buffer; - - vPtr = sg_dma_address(&sg[i]); - count = sg_dma_len(&sg[i]); - } + scsi_for_each_sg(SCp, sg, sg_count, i) { + vPtr = sg_dma_address(sg); + count = sg_dma_len(sg); slot->SG[i].ins = bS_to_host(move_ins | count); DEBUG((" scatter block %d: move %d[%08x] from 0x%lx\n", diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.h linux-2.6.22-591/drivers/scsi/53c700.h --- linux-2.6.22-570/drivers/scsi/53c700.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c700.h 2007-12-21 15:36:12.000000000 -0500 @@ -177,6 +177,7 @@ __u8 state; #define NCR_700_FLAG_AUTOSENSE 0x01 __u8 flags; + __u8 pad1[2]; /* Needed for m68k where min alignment is 2 bytes */ int tag; __u32 resume_offset; struct scsi_cmnd *cmnd; @@ -196,6 +197,8 @@ void __iomem *base; /* the base for the port (copied to host) */ struct device *dev; __u32 dmode_extra; /* adjustable bus settings */ + __u32 dcntl_extra; /* adjustable bus settings */ + __u32 ctest7_extra; /* adjustable bus settings */ __u32 differential:1; /* if we are differential */ #ifdef CONFIG_53C700_LE_ON_BE /* This option is for HP only. Set it if your chip is wired for @@ -352,6 +355,7 @@ #define SEL_TIMEOUT_DISABLE 0x10 /* 710 only */ #define DFP 0x08 #define EVP 0x04 +#define CTEST7_TT1 0x02 #define DIFF 0x01 #define CTEST6_REG 0x1A #define TEMP_REG 0x1C @@ -385,6 +389,7 @@ #define SOFTWARE_RESET 0x01 #define COMPAT_700_MODE 0x01 #define SCRPTS_16BITS 0x20 +#define EA_710 0x20 #define ASYNC_DIV_2_0 0x00 #define ASYNC_DIV_1_5 0x40 #define ASYNC_DIV_1_0 0x80 diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.c linux-2.6.22-591/drivers/scsi/53c7xx.c --- linux-2.6.22-570/drivers/scsi/53c7xx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c7xx.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,6102 +0,0 @@ -/* - * 53c710 driver. Modified from Drew Eckhardts driver - * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] - * Check out PERM_OPTIONS and EXPECTED_CLOCK, which may be defined in the - * relevant machine specific file (eg. mvme16x.[ch], amiga7xx.[ch]). - * There are also currently some defines at the top of 53c7xx.scr. - * The chip type is #defined in script_asm.pl, as well as the Makefile. - * Host scsi ID expected to be 7 - see NCR53c7x0_init(). - * - * I have removed the PCI code and some of the 53c8xx specific code - - * simply to make this file smaller and easier to manage. - * - * MVME16x issues: - * Problems trying to read any chip registers in NCR53c7x0_init(), as they - * may never have been set by 16xBug (eg. If kernel has come in over tftp). - */ - -/* - * Adapted for Linux/m68k Amiga platforms for the A4000T/A4091 and - * WarpEngine SCSI controllers. - * By Alan Hourihane - * Thanks to Richard Hirst for making it possible with the MVME additions - */ - -/* - * 53c710 rev 0 doesn't support add with carry. Rev 1 and 2 does. To - * overcome this problem you can define FORCE_DSA_ALIGNMENT, which ensures - * that the DSA address is always xxxxxx00. If disconnection is not allowed, - * then the script only ever tries to add small (< 256) positive offsets to - * DSA, so lack of carry isn't a problem. FORCE_DSA_ALIGNMENT can, of course, - * be defined for all chip revisions at a small cost in memory usage. - */ - -#define FORCE_DSA_ALIGNMENT - -/* - * Selection timer does not always work on the 53c710, depending on the - * timing at the last disconnect, if this is a problem for you, try - * using validids as detailed below. - * - * Options for the NCR7xx driver - * - * noasync:0 - disables sync and asynchronous negotiation - * nosync:0 - disables synchronous negotiation (does async) - * nodisconnect:0 - disables disconnection - * validids:0x?? - Bitmask field that disallows certain ID's. - * - e.g. 0x03 allows ID 0,1 - * - 0x1F allows ID 0,1,2,3,4 - * opthi:n - replace top word of options with 'n' - * optlo:n - replace bottom word of options with 'n' - * - ALWAYS SPECIFY opthi THEN optlo <<<<<<<<<< - */ - -/* - * PERM_OPTIONS are driver options which will be enabled for all NCR boards - * in the system at driver initialization time. - * - * Don't THINK about touching these in PERM_OPTIONS : - * OPTION_MEMORY_MAPPED - * 680x0 doesn't have an IO map! - * - * OPTION_DEBUG_TEST1 - * Test 1 does bus mastering and interrupt tests, which will help weed - * out brain damaged main boards. - * - * Other PERM_OPTIONS settings are listed below. Note the actual options - * required are set in the relevant file (mvme16x.c, amiga7xx.c, etc): - * - * OPTION_NO_ASYNC - * Don't negotiate for asynchronous transfers on the first command - * when OPTION_ALWAYS_SYNCHRONOUS is set. Useful for dain bramaged - * devices which do something bad rather than sending a MESSAGE - * REJECT back to us like they should if they can't cope. - * - * OPTION_SYNCHRONOUS - * Enable support for synchronous transfers. Target negotiated - * synchronous transfers will be responded to. To initiate - * a synchronous transfer request, call - * - * request_synchronous (hostno, target) - * - * from within KGDB. - * - * OPTION_ALWAYS_SYNCHRONOUS - * Negotiate for synchronous transfers with every target after - * driver initialization or a SCSI bus reset. This is a bit dangerous, - * since there are some dain bramaged SCSI devices which will accept - * SDTR messages but keep talking asynchronously. - * - * OPTION_DISCONNECT - * Enable support for disconnect/reconnect. To change the - * default setting on a given host adapter, call - * - * request_disconnect (hostno, allow) - * - * where allow is non-zero to allow, 0 to disallow. - * - * If you really want to run 10MHz FAST SCSI-II transfers, you should - * know that the NCR driver currently ignores parity information. Most - * systems do 5MHz SCSI fine. I've seen a lot that have problems faster - * than 8MHz. To play it safe, we only request 5MHz transfers. - * - * If you'd rather get 10MHz transfers, edit sdtr_message and change - * the fourth byte from 50 to 25. - */ - -/* - * Sponsored by - * iX Multiuser Multitasking Magazine - * Hannover, Germany - * hm@ix.de - * - * Copyright 1993, 1994, 1995 Drew Eckhardt - * Visionary Computing - * (Unix and Linux consulting and custom programming) - * drew@PoohSticks.ORG - * +1 (303) 786-7975 - * - * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. - * - * For more information, please consult - * - * NCR53C810 - * SCSI I/O Processor - * Programmer's Guide - * - * NCR 53C810 - * PCI-SCSI I/O Processor - * Data Manual - * - * NCR 53C810/53C820 - * PCI-SCSI I/O Processor Design In Guide - * - * For literature on Symbios Logic Inc. formerly NCR, SCSI, - * and Communication products please call (800) 334-5454 or - * (719) 536-3300. - * - * PCI BIOS Specification Revision - * PCI Local Bus Specification - * PCI System Design Guide - * - * PCI Special Interest Group - * M/S HF3-15A - * 5200 N.E. Elam Young Parkway - * Hillsboro, Oregon 97124-6497 - * +1 (503) 696-2000 - * +1 (800) 433-5177 - */ - -/* - * Design issues : - * The cumulative latency needed to propagate a read/write request - * through the file system, buffer cache, driver stacks, SCSI host, and - * SCSI device is ultimately the limiting factor in throughput once we - * have a sufficiently fast host adapter. - * - * So, to maximize performance we want to keep the ratio of latency to data - * transfer time to a minimum by - * 1. Minimizing the total number of commands sent (typical command latency - * including drive and bus mastering host overhead is as high as 4.5ms) - * to transfer a given amount of data. - * - * This is accomplished by placing no arbitrary limit on the number - * of scatter/gather buffers supported, since we can transfer 1K - * per scatter/gather buffer without Eric's cluster patches, - * 4K with. - * - * 2. Minimizing the number of fatal interrupts serviced, since - * fatal interrupts halt the SCSI I/O processor. Basically, - * this means offloading the practical maximum amount of processing - * to the SCSI chip. - * - * On the NCR53c810/820/720, this is accomplished by using - * interrupt-on-the-fly signals when commands complete, - * and only handling fatal errors and SDTR / WDTR messages - * in the host code. - * - * On the NCR53c710, interrupts are generated as on the NCR53c8x0, - * only the lack of a interrupt-on-the-fly facility complicates - * things. Also, SCSI ID registers and commands are - * bit fielded rather than binary encoded. - * - * On the NCR53c700 and NCR53c700-66, operations that are done via - * indirect, table mode on the more advanced chips must be - * replaced by calls through a jump table which - * acts as a surrogate for the DSA. Unfortunately, this - * will mean that we must service an interrupt for each - * disconnect/reconnect. - * - * 3. Eliminating latency by pipelining operations at the different levels. - * - * This driver allows a configurable number of commands to be enqueued - * for each target/lun combination (experimentally, I have discovered - * that two seems to work best) and will ultimately allow for - * SCSI-II tagged queuing. - * - * - * Architecture : - * This driver is built around a Linux queue of commands waiting to - * be executed, and a shared Linux/NCR array of commands to start. Commands - * are transferred to the array by the run_process_issue_queue() function - * which is called whenever a command completes. - * - * As commands are completed, the interrupt routine is triggered, - * looks for commands in the linked list of completed commands with - * valid status, removes these commands from a list of running commands, - * calls the done routine, and flags their target/luns as not busy. - * - * Due to limitations in the intelligence of the NCR chips, certain - * concessions are made. In many cases, it is easier to dynamically - * generate/fix-up code rather than calculate on the NCR at run time. - * So, code is generated or fixed up for - * - * - Handling data transfers, using a variable number of MOVE instructions - * interspersed with CALL MSG_IN, WHEN MSGIN instructions. - * - * The DATAIN and DATAOUT routines are separate, so that an incorrect - * direction can be trapped, and space isn't wasted. - * - * It may turn out that we're better off using some sort - * of table indirect instruction in a loop with a variable - * sized table on the NCR53c710 and newer chips. - * - * - Checking for reselection (NCR53c710 and better) - * - * - Handling the details of SCSI context switches (NCR53c710 and better), - * such as reprogramming appropriate synchronous parameters, - * removing the dsa structure from the NCR's queue of outstanding - * commands, etc. - * - */ - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_AMIGA -#include -#include -#include - -#define BIG_ENDIAN -#define NO_IO_SPACE -#endif - -#ifdef CONFIG_MVME16x -#include - -#define BIG_ENDIAN -#define NO_IO_SPACE -#define VALID_IDS -#endif - -#ifdef CONFIG_BVME6000 -#include - -#define BIG_ENDIAN -#define NO_IO_SPACE -#define VALID_IDS -#endif - -#include "scsi.h" -#include -#include -#include -#include "53c7xx.h" -#include -#include - -#ifdef NO_IO_SPACE -/* - * The following make the definitions in 53c7xx.h (write8, etc) smaller, - * we don't have separate i/o space anyway. - */ -#undef inb -#undef outb -#undef inw -#undef outw -#undef inl -#undef outl -#define inb(x) 1 -#define inw(x) 1 -#define inl(x) 1 -#define outb(x,y) 1 -#define outw(x,y) 1 -#define outl(x,y) 1 -#endif - -static int check_address (unsigned long addr, int size); -static void dump_events (struct Scsi_Host *host, int count); -static Scsi_Cmnd * return_outstanding_commands (struct Scsi_Host *host, - int free, int issue); -static void hard_reset (struct Scsi_Host *host); -static void ncr_scsi_reset (struct Scsi_Host *host); -static void print_lots (struct Scsi_Host *host); -static void set_synchronous (struct Scsi_Host *host, int target, int sxfer, - int scntl3, int now_connected); -static int datapath_residual (struct Scsi_Host *host); -static const char * sbcl_to_phase (int sbcl); -static void print_progress (Scsi_Cmnd *cmd); -static void print_queues (struct Scsi_Host *host); -static void process_issue_queue (unsigned long flags); -static int shutdown (struct Scsi_Host *host); -static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int result); -static int disable (struct Scsi_Host *host); -static int NCR53c7xx_run_tests (struct Scsi_Host *host); -static irqreturn_t NCR53c7x0_intr(int irq, void *dev_id); -static void NCR53c7x0_intfly (struct Scsi_Host *host); -static int ncr_halt (struct Scsi_Host *host); -static void intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd - *cmd); -static void intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd); -static void print_dsa (struct Scsi_Host *host, u32 *dsa, - const char *prefix); -static int print_insn (struct Scsi_Host *host, const u32 *insn, - const char *prefix, int kernel); - -static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd); -static void NCR53c7x0_init_fixup (struct Scsi_Host *host); -static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct - NCR53c7x0_cmd *cmd); -static void NCR53c7x0_soft_reset (struct Scsi_Host *host); - -/* Size of event list (per host adapter) */ -static int track_events = 0; -static struct Scsi_Host *first_host = NULL; /* Head of list of NCR boards */ -static struct scsi_host_template *the_template = NULL; - -/* NCR53c710 script handling code */ - -#include "53c7xx_d.h" -#ifdef A_int_debug_sync -#define DEBUG_SYNC_INTR A_int_debug_sync -#endif -int NCR53c7xx_script_len = sizeof (SCRIPT); -int NCR53c7xx_dsa_len = A_dsa_end + Ent_dsa_zero - Ent_dsa_code_template; -#ifdef FORCE_DSA_ALIGNMENT -int CmdPageStart = (0 - Ent_dsa_zero - sizeof(struct NCR53c7x0_cmd)) & 0xff; -#endif - -static char *setup_strings[] = - {"","","","","","","",""}; - -#define MAX_SETUP_STRINGS ARRAY_SIZE(setup_strings) -#define SETUP_BUFFER_SIZE 200 -static char setup_buffer[SETUP_BUFFER_SIZE]; -static char setup_used[MAX_SETUP_STRINGS]; - -void ncr53c7xx_setup (char *str, int *ints) -{ - int i; - char *p1, *p2; - - p1 = setup_buffer; - *p1 = '\0'; - if (str) - strncpy(p1, str, SETUP_BUFFER_SIZE - strlen(setup_buffer)); - setup_buffer[SETUP_BUFFER_SIZE - 1] = '\0'; - p1 = setup_buffer; - i = 0; - while (*p1 && (i < MAX_SETUP_STRINGS)) { - p2 = strchr(p1, ','); - if (p2) { - *p2 = '\0'; - if (p1 != p2) - setup_strings[i] = p1; - p1 = p2 + 1; - i++; - } - else { - setup_strings[i] = p1; - break; - } - } - for (i=0; i= '0') && (*cp <= '9')) { - *val = simple_strtoul(cp,NULL,0); - } - return ++x; -} - - - -/* - * KNOWN BUGS : - * - There is some sort of conflict when the PPP driver is compiled with - * support for 16 channels? - * - * - On systems which predate the 1.3.x initialization order change, - * the NCR driver will cause Cannot get free page messages to appear. - * These are harmless, but I don't know of an easy way to avoid them. - * - * - With OPTION_DISCONNECT, on two systems under unknown circumstances, - * we get a PHASE MISMATCH with DSA set to zero (suggests that we - * are occurring somewhere in the reselection code) where - * DSP=some value DCMD|DBC=same value. - * - * Closer inspection suggests that we may be trying to execute - * some portion of the DSA? - * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO) - * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO) - * scsi0 : no current command : unexpected phase MSGIN. - * DSP=0x1c46cc, DCMD|DBC=0x1c46ac, DSA=0x0 - * DSPS=0x0, TEMP=0x1c3e70, DMODE=0x80 - * scsi0 : DSP-> - * 001c46cc : 0x001c46cc 0x00000000 - * 001c46d4 : 0x001c5ea0 0x000011f8 - * - * Changed the print code in the phase_mismatch handler so - * that we call print_lots to try to diagnose this. - * - */ - -/* - * Possible future direction of architecture for max performance : - * - * We're using a single start array for the NCR chip. This is - * sub-optimal, because we cannot add a command which would conflict with - * an executing command to this start queue, and therefore must insert the - * next command for a given I/T/L combination after the first has completed; - * incurring our interrupt latency between SCSI commands. - * - * To allow further pipelining of the NCR and host CPU operation, we want - * to set things up so that immediately on termination of a command destined - * for a given LUN, we get that LUN busy again. - * - * To do this, we need to add a 32 bit pointer to which is jumped to - * on completion of a command. If no new command is available, this - * would point to the usual DSA issue queue select routine. - * - * If one were, it would point to a per-NCR53c7x0_cmd select routine - * which starts execution immediately, inserting the command at the head - * of the start queue if the NCR chip is selected or reselected. - * - * We would change so that we keep a list of outstanding commands - * for each unit, rather than a single running_list. We'd insert - * a new command into the right running list; if the NCR didn't - * have something running for that yet, we'd put it in the - * start queue as well. Some magic needs to happen to handle the - * race condition between the first command terminating before the - * new one is written. - * - * Potential for profiling : - * Call do_gettimeofday(struct timeval *tv) to get 800ns resolution. - */ - - -/* - * TODO : - * 1. To support WIDE transfers, not much needs to happen. We - * should do CHMOVE instructions instead of MOVEs when - * we have scatter/gather segments of uneven length. When - * we do this, we need to handle the case where we disconnect - * between segments. - * - * 2. Currently, when Icky things happen we do a FATAL(). Instead, - * we want to do an integrity check on the parts of the NCR hostdata - * structure which were initialized at boot time; FATAL() if that - * fails, and otherwise try to recover. Keep track of how many - * times this has happened within a single SCSI command; if it - * gets excessive, then FATAL(). - * - * 3. Parity checking is currently disabled, and a few things should - * happen here now that we support synchronous SCSI transfers : - * 1. On soft-reset, we shoould set the EPC (Enable Parity Checking) - * and AAP (Assert SATN/ on parity error) bits in SCNTL0. - * - * 2. We should enable the parity interrupt in the SIEN0 register. - * - * 3. intr_phase_mismatch() needs to believe that message out is - * always an "acceptable" phase to have a mismatch in. If - * the old phase was MSG_IN, we should send a MESSAGE PARITY - * error. If the old phase was something else, we should send - * a INITIATOR_DETECTED_ERROR message. Note that this could - * cause a RESTORE POINTERS message; so we should handle that - * correctly first. Instead, we should probably do an - * initiator_abort. - * - * 4. MPEE bit of CTEST4 should be set so we get interrupted if - * we detect an error. - * - * - * 5. The initial code has been tested on the NCR53c810. I don't - * have access to NCR53c700, 700-66 (Forex boards), NCR53c710 - * (NCR Pentium systems), NCR53c720, NCR53c820, or NCR53c825 boards to - * finish development on those platforms. - * - * NCR53c820/825/720 - need to add wide transfer support, including WDTR - * negotiation, programming of wide transfer capabilities - * on reselection and table indirect selection. - * - * NCR53c710 - need to add fatal interrupt or GEN code for - * command completion signaling. Need to modify all - * SDID, SCID, etc. registers, and table indirect select code - * since these use bit fielded (ie 1<NOP_insn) ? - /* - * If the IF TRUE bit is set, it's a JUMP instruction. The - * operand is a bus pointer to the dsa_begin routine for this DSA. The - * dsa field of the NCR53c7x0_cmd structure starts with the - * DSA code template. By converting to a virtual address, - * subtracting the code template size, and offset of the - * dsa field, we end up with a pointer to the start of the - * structure (alternatively, we could use the - * dsa_cmnd field, an anachronism from when we weren't - * sure what the relationship between the NCR structures - * and host structures were going to be. - */ - (struct NCR53c7x0_cmd *) ((char *) bus_to_virt (issue[1]) - - (hostdata->E_dsa_code_begin - hostdata->E_dsa_code_template) - - offsetof(struct NCR53c7x0_cmd, dsa)) - /* If the IF TRUE bit is not set, it's a NOP */ - : NULL; -} - - -/* - * FIXME: we should junk these, in favor of synchronous_want and - * wide_want in the NCR53c7x0_hostdata structure. - */ - -/* Template for "preferred" synchronous transfer parameters. */ - -static const unsigned char sdtr_message[] = { -#ifdef CONFIG_SCSI_NCR53C7xx_FAST - EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 25 /* *4ns */, 8 /* off */ -#else - EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 50 /* *4ns */, 8 /* off */ -#endif -}; - -/* Template to request asynchronous transfers */ - -static const unsigned char async_message[] = { - EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 0, 0 /* asynchronous */ -}; - -/* Template for "preferred" WIDE transfer parameters */ - -static const unsigned char wdtr_message[] = { - EXTENDED_MESSAGE, 2 /* length */, EXTENDED_WDTR, 1 /* 2^1 bytes */ -}; - -#if 0 -/* - * Function : struct Scsi_Host *find_host (int host) - * - * Purpose : KGDB support function which translates a host number - * to a host structure. - * - * Inputs : host - number of SCSI host - * - * Returns : NULL on failure, pointer to host structure on success. - */ - -static struct Scsi_Host * -find_host (int host) { - struct Scsi_Host *h; - for (h = first_host; h && h->host_no != host; h = h->next); - if (!h) { - printk (KERN_ALERT "scsi%d not found\n", host); - return NULL; - } else if (h->hostt != the_template) { - printk (KERN_ALERT "scsi%d is not a NCR board\n", host); - return NULL; - } - return h; -} - -#if 0 -/* - * Function : request_synchronous (int host, int target) - * - * Purpose : KGDB interface which will allow us to negotiate for - * synchronous transfers. This ill be replaced with a more - * integrated function; perhaps a new entry in the scsi_host - * structure, accessible via an ioctl() or perhaps /proc/scsi. - * - * Inputs : host - number of SCSI host; target - number of target. - * - * Returns : 0 when negotiation has been setup for next SCSI command, - * -1 on failure. - */ - -static int -request_synchronous (int host, int target) { - struct Scsi_Host *h; - struct NCR53c7x0_hostdata *hostdata; - unsigned long flags; - if (target < 0) { - printk (KERN_ALERT "target %d is bogus\n", target); - return -1; - } - if (!(h = find_host (host))) - return -1; - else if (h->this_id == target) { - printk (KERN_ALERT "target %d is host ID\n", target); - return -1; - } - else if (target >= h->max_id) { - printk (KERN_ALERT "target %d exceeds maximum of %d\n", target, - h->max_id); - return -1; - } - hostdata = (struct NCR53c7x0_hostdata *)h->hostdata[0]; - - local_irq_save(flags); - if (hostdata->initiate_sdtr & (1 << target)) { - local_irq_restore(flags); - printk (KERN_ALERT "target %d already doing SDTR\n", target); - return -1; - } - hostdata->initiate_sdtr |= (1 << target); - local_irq_restore(flags); - return 0; -} -#endif - -/* - * Function : request_disconnect (int host, int on_or_off) - * - * Purpose : KGDB support function, tells us to allow or disallow - * disconnections. - * - * Inputs : host - number of SCSI host; on_or_off - non-zero to allow, - * zero to disallow. - * - * Returns : 0 on success, * -1 on failure. - */ - -static int -request_disconnect (int host, int on_or_off) { - struct Scsi_Host *h; - struct NCR53c7x0_hostdata *hostdata; - if (!(h = find_host (host))) - return -1; - hostdata = (struct NCR53c7x0_hostdata *) h->hostdata[0]; - if (on_or_off) - hostdata->options |= OPTION_DISCONNECT; - else - hostdata->options &= ~OPTION_DISCONNECT; - return 0; -} -#endif - -/* - * Function : static void NCR53c7x0_driver_init (struct Scsi_Host *host) - * - * Purpose : Initialize internal structures, as required on startup, or - * after a SCSI bus reset. - * - * Inputs : host - pointer to this host adapter's structure - */ - -static void -NCR53c7x0_driver_init (struct Scsi_Host *host) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - int i, j; - u32 *ncrcurrent; - - for (i = 0; i < 16; ++i) { - hostdata->request_sense[i] = 0; - for (j = 0; j < 8; ++j) - hostdata->busy[i][j] = 0; - set_synchronous (host, i, /* sxfer */ 0, hostdata->saved_scntl3, 0); - } - hostdata->issue_queue = NULL; - hostdata->running_list = hostdata->finished_queue = - hostdata->ncrcurrent = NULL; - for (i = 0, ncrcurrent = (u32 *) hostdata->schedule; - i < host->can_queue; ++i, ncrcurrent += 2) { - ncrcurrent[0] = hostdata->NOP_insn; - ncrcurrent[1] = 0xdeadbeef; - } - ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) | DBC_TCI_TRUE; - ncrcurrent[1] = (u32) virt_to_bus (hostdata->script) + - hostdata->E_wait_reselect; - hostdata->reconnect_dsa_head = 0; - hostdata->addr_reconnect_dsa_head = (u32) - virt_to_bus((void *) &(hostdata->reconnect_dsa_head)); - hostdata->expecting_iid = 0; - hostdata->expecting_sto = 0; - if (hostdata->options & OPTION_ALWAYS_SYNCHRONOUS) - hostdata->initiate_sdtr = 0xffff; - else - hostdata->initiate_sdtr = 0; - hostdata->talked_to = 0; - hostdata->idle = 1; -} - -/* - * Function : static int clock_to_ccf_710 (int clock) - * - * Purpose : Return the clock conversion factor for a given SCSI clock. - * - * Inputs : clock - SCSI clock expressed in Hz. - * - * Returns : ccf on success, -1 on failure. - */ - -static int -clock_to_ccf_710 (int clock) { - if (clock <= 16666666) - return -1; - if (clock <= 25000000) - return 2; /* Divide by 1.0 */ - else if (clock <= 37500000) - return 1; /* Divide by 1.5 */ - else if (clock <= 50000000) - return 0; /* Divide by 2.0 */ - else if (clock <= 66000000) - return 3; /* Divide by 3.0 */ - else - return -1; -} - -/* - * Function : static int NCR53c7x0_init (struct Scsi_Host *host) - * - * Purpose : initialize the internal structures for a given SCSI host - * - * Inputs : host - pointer to this host adapter's structure - * - * Preconditions : when this function is called, the chip_type - * field of the hostdata structure MUST have been set. - * - * Returns : 0 on success, -1 on failure. - */ - -int -NCR53c7x0_init (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - int i, ccf; - unsigned char revision; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - /* - * There are some things which we need to know about in order to provide - * a semblance of support. Print 'em if they aren't what we expect, - * otherwise don't add to the noise. - * - * -1 means we don't know what to expect. - */ - int val, flags; - char buf[32]; - int expected_id = -1; - int expected_clock = -1; - int uninitialized = 0; -#ifdef NO_IO_SPACE - int expected_mapping = OPTION_MEMORY_MAPPED; -#else - int expected_mapping = OPTION_IO_MAPPED; -#endif - for (i=0;i<7;i++) - hostdata->valid_ids[i] = 1; /* Default all ID's to scan */ - - /* Parse commandline flags */ - if (check_setup_strings("noasync",&flags,&val,buf)) - { - hostdata->options |= OPTION_NO_ASYNC; - hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS); - } - - if (check_setup_strings("nosync",&flags,&val,buf)) - { - hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS); - } - - if (check_setup_strings("nodisconnect",&flags,&val,buf)) - hostdata->options &= ~OPTION_DISCONNECT; - - if (check_setup_strings("validids",&flags,&val,buf)) - { - for (i=0;i<7;i++) - hostdata->valid_ids[i] = val & (1<options = (long long)val << 32; - if (check_setup_strings("optlo",&flags,&val,buf)) - hostdata->options |= val; - - NCR53c7x0_local_setup(host); - switch (hostdata->chip) { - case 710: - case 770: - hostdata->dstat_sir_intr = NCR53c7x0_dstat_sir_intr; - hostdata->init_save_regs = NULL; - hostdata->dsa_fixup = NCR53c7xx_dsa_fixup; - hostdata->init_fixup = NCR53c7x0_init_fixup; - hostdata->soft_reset = NCR53c7x0_soft_reset; - hostdata->run_tests = NCR53c7xx_run_tests; - expected_clock = hostdata->scsi_clock; - expected_id = 7; - break; - default: - printk ("scsi%d : chip type of %d is not supported yet, detaching.\n", - host->host_no, hostdata->chip); - scsi_unregister (host); - return -1; - } - - /* Assign constants accessed by NCR */ - hostdata->NCR53c7xx_zero = 0; - hostdata->NCR53c7xx_msg_reject = MESSAGE_REJECT; - hostdata->NCR53c7xx_msg_abort = ABORT; - hostdata->NCR53c7xx_msg_nop = NOP; - hostdata->NOP_insn = (DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24; - if (expected_mapping == -1 || - (hostdata->options & (OPTION_MEMORY_MAPPED)) != - (expected_mapping & OPTION_MEMORY_MAPPED)) - printk ("scsi%d : using %s mapped access\n", host->host_no, - (hostdata->options & OPTION_MEMORY_MAPPED) ? "memory" : - "io"); - - hostdata->dmode = (hostdata->chip == 700 || hostdata->chip == 70066) ? - DMODE_REG_00 : DMODE_REG_10; - hostdata->istat = ((hostdata->chip / 100) == 8) ? - ISTAT_REG_800 : ISTAT_REG_700; - -/* We have to assume that this may be the first access to the chip, so - * we must set EA in DCNTL. */ - - NCR53c7x0_write8 (DCNTL_REG, DCNTL_10_EA|DCNTL_10_COM); - - -/* Only the ISTAT register is readable when the NCR is running, so make - sure it's halted. */ - ncr_halt(host); - -/* - * XXX - the NCR53c700 uses bitfielded registers for SCID, SDID, etc, - * as does the 710 with one bit per SCSI ID. Conversely, the NCR - * uses a normal, 3 bit binary representation of these values. - * - * Get the rest of the NCR documentation, and FIND OUT where the change - * was. - */ - -#if 0 - /* May not be able to do this - chip my not have been set up yet */ - tmp = hostdata->this_id_mask = NCR53c7x0_read8(SCID_REG); - for (host->this_id = 0; tmp != 1; tmp >>=1, ++host->this_id); -#else - host->this_id = 7; -#endif - -/* - * Note : we should never encounter a board setup for ID0. So, - * if we see ID0, assume that it was uninitialized and set it - * to the industry standard 7. - */ - if (!host->this_id) { - printk("scsi%d : initiator ID was %d, changing to 7\n", - host->host_no, host->this_id); - host->this_id = 7; - hostdata->this_id_mask = 1 << 7; - uninitialized = 1; - }; - - if (expected_id == -1 || host->this_id != expected_id) - printk("scsi%d : using initiator ID %d\n", host->host_no, - host->this_id); - - /* - * Save important registers to allow a soft reset. - */ - - /* - * CTEST7 controls cache snooping, burst mode, and support for - * external differential drivers. This isn't currently used - the - * default value may not be optimal anyway. - * Even worse, it may never have been set up since reset. - */ - hostdata->saved_ctest7 = NCR53c7x0_read8(CTEST7_REG) & CTEST7_SAVE; - revision = (NCR53c7x0_read8(CTEST8_REG) & 0xF0) >> 4; - switch (revision) { - case 1: revision = 0; break; - case 2: revision = 1; break; - case 4: revision = 2; break; - case 8: revision = 3; break; - default: revision = 255; break; - } - printk("scsi%d: Revision 0x%x\n",host->host_no,revision); - - if ((revision == 0 || revision == 255) && (hostdata->options & (OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS))) - { - printk ("scsi%d: Disabling sync working and disconnect/reselect\n", - host->host_no); - hostdata->options &= ~(OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS); - } - - /* - * On NCR53c700 series chips, DCNTL controls the SCSI clock divisor, - * on 800 series chips, it allows for a totem-pole IRQ driver. - * NOTE saved_dcntl currently overwritten in init function. - * The value read here may be garbage anyway, MVME16x board at least - * does not initialise chip if kernel arrived via tftp. - */ - - hostdata->saved_dcntl = NCR53c7x0_read8(DCNTL_REG); - - /* - * DMODE controls DMA burst length, and on 700 series chips, - * 286 mode and bus width - * NOTE: On MVME16x, chip may have been reset, so this could be a - * power-on/reset default value. - */ - hostdata->saved_dmode = NCR53c7x0_read8(hostdata->dmode); - - /* - * Now that burst length and enabled/disabled status is known, - * clue the user in on it. - */ - - ccf = clock_to_ccf_710 (expected_clock); - - for (i = 0; i < 16; ++i) - hostdata->cmd_allocated[i] = 0; - - if (hostdata->init_save_regs) - hostdata->init_save_regs (host); - if (hostdata->init_fixup) - hostdata->init_fixup (host); - - if (!the_template) { - the_template = host->hostt; - first_host = host; - } - - /* - * Linux SCSI drivers have always been plagued with initialization - * problems - some didn't work with the BIOS disabled since they expected - * initialization from it, some didn't work when the networking code - * was enabled and registers got scrambled, etc. - * - * To avoid problems like this, in the future, we will do a soft - * reset on the SCSI chip, taking it back to a sane state. - */ - - hostdata->soft_reset (host); - -#if 1 - hostdata->debug_count_limit = -1; -#else - hostdata->debug_count_limit = 1; -#endif - hostdata->intrs = -1; - hostdata->resets = -1; - memcpy ((void *) hostdata->synchronous_want, (void *) sdtr_message, - sizeof (hostdata->synchronous_want)); - - NCR53c7x0_driver_init (host); - - if (request_irq(host->irq, NCR53c7x0_intr, IRQF_SHARED, "53c7xx", host)) - { - printk("scsi%d : IRQ%d not free, detaching\n", - host->host_no, host->irq); - goto err_unregister; - } - - if ((hostdata->run_tests && hostdata->run_tests(host) == -1) || - (hostdata->options & OPTION_DEBUG_TESTS_ONLY)) { - /* XXX Should disable interrupts, etc. here */ - goto err_free_irq; - } else { - if (host->io_port) { - host->n_io_port = 128; - if (!request_region (host->io_port, host->n_io_port, "ncr53c7xx")) - goto err_free_irq; - } - } - - if (NCR53c7x0_read8 (SBCL_REG) & SBCL_BSY) { - printk ("scsi%d : bus wedge, doing SCSI reset\n", host->host_no); - hard_reset (host); - } - return 0; - - err_free_irq: - free_irq(host->irq, NCR53c7x0_intr); - err_unregister: - scsi_unregister(host); - return -1; -} - -/* - * Function : int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip, - * unsigned long base, int io_port, int irq, int dma, long long options, - * int clock); - * - * Purpose : initializes a NCR53c7,8x0 based on base addresses, - * IRQ, and DMA channel. - * - * Inputs : tpnt - Template for this SCSI adapter, board - board level - * product, chip - 710 - * - * Returns : 0 on success, -1 on failure. - * - */ - -int -ncr53c7xx_init (struct scsi_host_template *tpnt, int board, int chip, - unsigned long base, int io_port, int irq, int dma, - long long options, int clock) -{ - struct Scsi_Host *instance; - struct NCR53c7x0_hostdata *hostdata; - char chip_str[80]; - int script_len = 0, dsa_len = 0, size = 0, max_cmd_size = 0, - schedule_size = 0, ok = 0; - void *tmp; - unsigned long page; - - switch (chip) { - case 710: - case 770: - schedule_size = (tpnt->can_queue + 1) * 8 /* JUMP instruction size */; - script_len = NCR53c7xx_script_len; - dsa_len = NCR53c7xx_dsa_len; - options |= OPTION_INTFLY; - sprintf (chip_str, "NCR53c%d", chip); - break; - default: - printk("scsi-ncr53c7xx : unsupported SCSI chip %d\n", chip); - return -1; - } - - printk("scsi-ncr53c7xx : %s at memory 0x%lx, io 0x%x, irq %d", - chip_str, base, io_port, irq); - if (dma == DMA_NONE) - printk("\n"); - else - printk(", dma %d\n", dma); - - if (options & OPTION_DEBUG_PROBE_ONLY) { - printk ("scsi-ncr53c7xx : probe only enabled, aborting initialization\n"); - return -1; - } - - max_cmd_size = sizeof(struct NCR53c7x0_cmd) + dsa_len + - /* Size of dynamic part of command structure : */ - 2 * /* Worst case : we don't know if we need DATA IN or DATA out */ - ( 2 * /* Current instructions per scatter/gather segment */ - tpnt->sg_tablesize + - 3 /* Current startup / termination required per phase */ - ) * - 8 /* Each instruction is eight bytes */; - - /* Allocate fixed part of hostdata, dynamic part to hold appropriate - SCSI SCRIPT(tm) plus a single, maximum-sized NCR53c7x0_cmd structure. - - We need a NCR53c7x0_cmd structure for scan_scsis() when we are - not loaded as a module, and when we're loaded as a module, we - can't use a non-dynamically allocated structure because modules - are vmalloc()'d, which can allow structures to cross page - boundaries and breaks our physical/virtual address assumptions - for DMA. - - So, we stick it past the end of our hostdata structure. - - ASSUMPTION : - Regardless of how many simultaneous SCSI commands we allow, - the probe code only executes a _single_ instruction at a time, - so we only need one here, and don't need to allocate NCR53c7x0_cmd - structures for each target until we are no longer in scan_scsis - and kmalloc() has become functional (memory_init() happens - after all device driver initialization). - */ - - size = sizeof(struct NCR53c7x0_hostdata) + script_len + - /* Note that alignment will be guaranteed, since we put the command - allocated at probe time after the fixed-up SCSI script, which - consists of 32 bit words, aligned on a 32 bit boundary. But - on a 64bit machine we need 8 byte alignment for hostdata->free, so - we add in another 4 bytes to take care of potential misalignment - */ - (sizeof(void *) - sizeof(u32)) + max_cmd_size + schedule_size; - - page = __get_free_pages(GFP_ATOMIC,1); - if(page==0) - { - printk(KERN_ERR "53c7xx: out of memory.\n"); - return -ENOMEM; - } -#ifdef FORCE_DSA_ALIGNMENT - /* - * 53c710 rev.0 doesn't have an add-with-carry instruction. - * Ensure we allocate enough memory to force DSA alignment. - */ - size += 256; -#endif - /* Size should be < 8K, so we can fit it in two pages. */ - if (size > 8192) { - printk(KERN_ERR "53c7xx: hostdata > 8K\n"); - return -1; - } - - instance = scsi_register (tpnt, 4); - if (!instance) - { - free_page(page); - return -1; - } - instance->hostdata[0] = page; - memset((void *)instance->hostdata[0], 0, 8192); - cache_push(virt_to_phys((void *)(instance->hostdata[0])), 8192); - cache_clear(virt_to_phys((void *)(instance->hostdata[0])), 8192); - kernel_set_cachemode((void *)instance->hostdata[0], 8192, IOMAP_NOCACHE_SER); - - /* FIXME : if we ever support an ISA NCR53c7xx based board, we - need to check if the chip is running in a 16 bit mode, and if so - unregister it if it is past the 16M (0x1000000) mark */ - - hostdata = (struct NCR53c7x0_hostdata *)instance->hostdata[0]; - hostdata->size = size; - hostdata->script_count = script_len / sizeof(u32); - hostdata->board = board; - hostdata->chip = chip; - - /* - * Being memory mapped is more desirable, since - * - * - Memory accesses may be faster. - * - * - The destination and source address spaces are the same for - * all instructions, meaning we don't have to twiddle dmode or - * any other registers. - * - * So, we try for memory mapped, and if we don't get it, - * we go for port mapped, and that failing we tell the user - * it can't work. - */ - - if (base) { - instance->base = base; - /* Check for forced I/O mapping */ - if (!(options & OPTION_IO_MAPPED)) { - options |= OPTION_MEMORY_MAPPED; - ok = 1; - } - } else { - options &= ~OPTION_MEMORY_MAPPED; - } - - if (io_port) { - instance->io_port = io_port; - options |= OPTION_IO_MAPPED; - ok = 1; - } else { - options &= ~OPTION_IO_MAPPED; - } - - if (!ok) { - printk ("scsi%d : not initializing, no I/O or memory mapping known \n", - instance->host_no); - scsi_unregister (instance); - return -1; - } - instance->irq = irq; - instance->dma_channel = dma; - - hostdata->options = options; - hostdata->dsa_len = dsa_len; - hostdata->max_cmd_size = max_cmd_size; - hostdata->num_cmds = 1; - hostdata->scsi_clock = clock; - /* Initialize single command */ - tmp = (hostdata->script + hostdata->script_count); -#ifdef FORCE_DSA_ALIGNMENT - { - void *t = ROUNDUP(tmp, void *); - if (((u32)t & 0xff) > CmdPageStart) - t = (void *)((u32)t + 255); - t = (void *)(((u32)t & ~0xff) + CmdPageStart); - hostdata->free = t; -#if 0 - printk ("scsi: Registered size increased by 256 to %d\n", size); - printk ("scsi: CmdPageStart = 0x%02x\n", CmdPageStart); - printk ("scsi: tmp = 0x%08x, hostdata->free set to 0x%08x\n", - (u32)tmp, (u32)t); -#endif - } -#else - hostdata->free = ROUNDUP(tmp, void *); -#endif - hostdata->free->real = tmp; - hostdata->free->size = max_cmd_size; - hostdata->free->free = NULL; - hostdata->free->next = NULL; - hostdata->extra_allocate = 0; - - /* Allocate command start code space */ - hostdata->schedule = (chip == 700 || chip == 70066) ? - NULL : (u32 *) ((char *)hostdata->free + max_cmd_size); - -/* - * For diagnostic purposes, we don't really care how fast things blaze. - * For profiling, we want to access the 800ns resolution system clock, - * using a 'C' call on the host processor. - * - * Therefore, there's no need for the NCR chip to directly manipulate - * this data, and we should put it wherever is most convenient for - * Linux. - */ - if (track_events) - hostdata->events = (struct NCR53c7x0_event *) (track_events ? - vmalloc (sizeof (struct NCR53c7x0_event) * track_events) : NULL); - else - hostdata->events = NULL; - - if (hostdata->events) { - memset ((void *) hostdata->events, 0, sizeof(struct NCR53c7x0_event) * - track_events); - hostdata->event_size = track_events; - hostdata->event_index = 0; - } else - hostdata->event_size = 0; - - return NCR53c7x0_init(instance); -} - - -/* - * Function : static void NCR53c7x0_init_fixup (struct Scsi_Host *host) - * - * Purpose : copy and fixup the SCSI SCRIPTS(tm) code for this device. - * - * Inputs : host - pointer to this host adapter's structure - * - */ - -static void -NCR53c7x0_init_fixup (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned char tmp; - int i, ncr_to_memory, memory_to_ncr; - u32 base; - NCR53c7x0_local_setup(host); - - - /* XXX - NOTE : this code MUST be made endian aware */ - /* Copy code into buffer that was allocated at detection time. */ - memcpy ((void *) hostdata->script, (void *) SCRIPT, - sizeof(SCRIPT)); - /* Fixup labels */ - for (i = 0; i < PATCHES; ++i) - hostdata->script[LABELPATCHES[i]] += - virt_to_bus(hostdata->script); - /* Fixup addresses of constants that used to be EXTERNAL */ - - patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_abort, - virt_to_bus(&(hostdata->NCR53c7xx_msg_abort))); - patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_reject, - virt_to_bus(&(hostdata->NCR53c7xx_msg_reject))); - patch_abs_32 (hostdata->script, 0, NCR53c7xx_zero, - virt_to_bus(&(hostdata->NCR53c7xx_zero))); - patch_abs_32 (hostdata->script, 0, NCR53c7xx_sink, - virt_to_bus(&(hostdata->NCR53c7xx_sink))); - patch_abs_32 (hostdata->script, 0, NOP_insn, - virt_to_bus(&(hostdata->NOP_insn))); - patch_abs_32 (hostdata->script, 0, schedule, - virt_to_bus((void *) hostdata->schedule)); - - /* Fixup references to external variables: */ - for (i = 0; i < EXTERNAL_PATCHES_LEN; ++i) - hostdata->script[EXTERNAL_PATCHES[i].offset] += - virt_to_bus(EXTERNAL_PATCHES[i].address); - - /* - * Fixup absolutes set at boot-time. - * - * All non-code absolute variables suffixed with "dsa_" and "int_" - * are constants, and need no fixup provided the assembler has done - * it for us (I don't know what the "real" NCR assembler does in - * this case, my assembler does the right magic). - */ - - patch_abs_rwri_data (hostdata->script, 0, dsa_save_data_pointer, - Ent_dsa_code_save_data_pointer - Ent_dsa_zero); - patch_abs_rwri_data (hostdata->script, 0, dsa_restore_pointers, - Ent_dsa_code_restore_pointers - Ent_dsa_zero); - patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect, - Ent_dsa_code_check_reselect - Ent_dsa_zero); - - /* - * Just for the hell of it, preserve the settings of - * Burst Length and Enable Read Line bits from the DMODE - * register. Make sure SCRIPTS start automagically. - */ - -#if defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000) - /* We know better what we want than 16xBug does! */ - tmp = DMODE_10_BL_8 | DMODE_10_FC2; -#else - tmp = NCR53c7x0_read8(DMODE_REG_10); - tmp &= (DMODE_BL_MASK | DMODE_10_FC2 | DMODE_10_FC1 | DMODE_710_PD | - DMODE_710_UO); -#endif - - if (!(hostdata->options & OPTION_MEMORY_MAPPED)) { - base = (u32) host->io_port; - memory_to_ncr = tmp|DMODE_800_DIOM; - ncr_to_memory = tmp|DMODE_800_SIOM; - } else { - base = virt_to_bus((void *)host->base); - memory_to_ncr = ncr_to_memory = tmp; - } - - /* SCRATCHB_REG_10 == SCRATCHA_REG_800, as it happens */ - patch_abs_32 (hostdata->script, 0, addr_scratch, base + SCRATCHA_REG_800); - patch_abs_32 (hostdata->script, 0, addr_temp, base + TEMP_REG); - patch_abs_32 (hostdata->script, 0, addr_dsa, base + DSA_REG); - - /* - * I needed some variables in the script to be accessible to - * both the NCR chip and the host processor. For these variables, - * I made the arbitrary decision to store them directly in the - * hostdata structure rather than in the RELATIVE area of the - * SCRIPTS. - */ - - - patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_memory, tmp); - patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_ncr, memory_to_ncr); - patch_abs_rwri_data (hostdata->script, 0, dmode_ncr_to_memory, ncr_to_memory); - - patch_abs_32 (hostdata->script, 0, msg_buf, - virt_to_bus((void *)&(hostdata->msg_buf))); - patch_abs_32 (hostdata->script, 0, reconnect_dsa_head, - virt_to_bus((void *)&(hostdata->reconnect_dsa_head))); - patch_abs_32 (hostdata->script, 0, addr_reconnect_dsa_head, - virt_to_bus((void *)&(hostdata->addr_reconnect_dsa_head))); - patch_abs_32 (hostdata->script, 0, reselected_identify, - virt_to_bus((void *)&(hostdata->reselected_identify))); -/* reselected_tag is currently unused */ -#if 0 - patch_abs_32 (hostdata->script, 0, reselected_tag, - virt_to_bus((void *)&(hostdata->reselected_tag))); -#endif - - patch_abs_32 (hostdata->script, 0, test_dest, - virt_to_bus((void*)&hostdata->test_dest)); - patch_abs_32 (hostdata->script, 0, test_src, - virt_to_bus(&hostdata->test_source)); - patch_abs_32 (hostdata->script, 0, saved_dsa, - virt_to_bus((void *)&hostdata->saved2_dsa)); - patch_abs_32 (hostdata->script, 0, emulfly, - virt_to_bus((void *)&hostdata->emulated_intfly)); - - patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect, - (unsigned char)(Ent_dsa_code_check_reselect - Ent_dsa_zero)); - -/* These are for event logging; the ncr_event enum contains the - actual interrupt numbers. */ -#ifdef A_int_EVENT_SELECT - patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT, (u32) EVENT_SELECT); -#endif -#ifdef A_int_EVENT_DISCONNECT - patch_abs_32 (hostdata->script, 0, int_EVENT_DISCONNECT, (u32) EVENT_DISCONNECT); -#endif -#ifdef A_int_EVENT_RESELECT - patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT, (u32) EVENT_RESELECT); -#endif -#ifdef A_int_EVENT_COMPLETE - patch_abs_32 (hostdata->script, 0, int_EVENT_COMPLETE, (u32) EVENT_COMPLETE); -#endif -#ifdef A_int_EVENT_IDLE - patch_abs_32 (hostdata->script, 0, int_EVENT_IDLE, (u32) EVENT_IDLE); -#endif -#ifdef A_int_EVENT_SELECT_FAILED - patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT_FAILED, - (u32) EVENT_SELECT_FAILED); -#endif -#ifdef A_int_EVENT_BEFORE_SELECT - patch_abs_32 (hostdata->script, 0, int_EVENT_BEFORE_SELECT, - (u32) EVENT_BEFORE_SELECT); -#endif -#ifdef A_int_EVENT_RESELECT_FAILED - patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT_FAILED, - (u32) EVENT_RESELECT_FAILED); -#endif - - /* - * Make sure the NCR and Linux code agree on the location of - * certain fields. - */ - - hostdata->E_accept_message = Ent_accept_message; - hostdata->E_command_complete = Ent_command_complete; - hostdata->E_cmdout_cmdout = Ent_cmdout_cmdout; - hostdata->E_data_transfer = Ent_data_transfer; - hostdata->E_debug_break = Ent_debug_break; - hostdata->E_dsa_code_template = Ent_dsa_code_template; - hostdata->E_dsa_code_template_end = Ent_dsa_code_template_end; - hostdata->E_end_data_transfer = Ent_end_data_transfer; - hostdata->E_initiator_abort = Ent_initiator_abort; - hostdata->E_msg_in = Ent_msg_in; - hostdata->E_other_transfer = Ent_other_transfer; - hostdata->E_other_in = Ent_other_in; - hostdata->E_other_out = Ent_other_out; - hostdata->E_reject_message = Ent_reject_message; - hostdata->E_respond_message = Ent_respond_message; - hostdata->E_select = Ent_select; - hostdata->E_select_msgout = Ent_select_msgout; - hostdata->E_target_abort = Ent_target_abort; -#ifdef Ent_test_0 - hostdata->E_test_0 = Ent_test_0; -#endif - hostdata->E_test_1 = Ent_test_1; - hostdata->E_test_2 = Ent_test_2; -#ifdef Ent_test_3 - hostdata->E_test_3 = Ent_test_3; -#endif - hostdata->E_wait_reselect = Ent_wait_reselect; - hostdata->E_dsa_code_begin = Ent_dsa_code_begin; - - hostdata->dsa_cmdout = A_dsa_cmdout; - hostdata->dsa_cmnd = A_dsa_cmnd; - hostdata->dsa_datain = A_dsa_datain; - hostdata->dsa_dataout = A_dsa_dataout; - hostdata->dsa_end = A_dsa_end; - hostdata->dsa_msgin = A_dsa_msgin; - hostdata->dsa_msgout = A_dsa_msgout; - hostdata->dsa_msgout_other = A_dsa_msgout_other; - hostdata->dsa_next = A_dsa_next; - hostdata->dsa_select = A_dsa_select; - hostdata->dsa_start = Ent_dsa_code_template - Ent_dsa_zero; - hostdata->dsa_status = A_dsa_status; - hostdata->dsa_jump_dest = Ent_dsa_code_fix_jump - Ent_dsa_zero + - 8 /* destination operand */; - - /* sanity check */ - if (A_dsa_fields_start != Ent_dsa_code_template_end - - Ent_dsa_zero) - printk("scsi%d : NCR dsa_fields start is %d not %d\n", - host->host_no, A_dsa_fields_start, Ent_dsa_code_template_end - - Ent_dsa_zero); - - printk("scsi%d : NCR code relocated to 0x%lx (virt 0x%p)\n", host->host_no, - virt_to_bus(hostdata->script), hostdata->script); -} - -/* - * Function : static int NCR53c7xx_run_tests (struct Scsi_Host *host) - * - * Purpose : run various verification tests on the NCR chip, - * including interrupt generation, and proper bus mastering - * operation. - * - * Inputs : host - a properly initialized Scsi_Host structure - * - * Preconditions : the NCR chip must be in a halted state. - * - * Returns : 0 if all tests were successful, -1 on error. - * - */ - -static int -NCR53c7xx_run_tests (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned long timeout; - u32 start; - int failed, i; - unsigned long flags; - NCR53c7x0_local_setup(host); - - /* The NCR chip _must_ be idle to run the test scripts */ - - local_irq_save(flags); - if (!hostdata->idle) { - printk ("scsi%d : chip not idle, aborting tests\n", host->host_no); - local_irq_restore(flags); - return -1; - } - - /* - * Check for functional interrupts, this could work as an - * autoprobe routine. - */ - - if ((hostdata->options & OPTION_DEBUG_TEST1) && - hostdata->state != STATE_DISABLED) { - hostdata->idle = 0; - hostdata->test_running = 1; - hostdata->test_completed = -1; - hostdata->test_dest = 0; - hostdata->test_source = 0xdeadbeef; - start = virt_to_bus (hostdata->script) + hostdata->E_test_1; - hostdata->state = STATE_RUNNING; - printk ("scsi%d : test 1", host->host_no); - NCR53c7x0_write32 (DSP_REG, start); - if (hostdata->options & OPTION_DEBUG_TRACE) - NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM | - DCNTL_STD); - printk (" started\n"); - local_irq_restore(flags); - - /* - * This is currently a .5 second timeout, since (in theory) no slow - * board will take that long. In practice, we've seen one - * pentium which occassionally fails with this, but works with - * 10 times as much? - */ - - timeout = jiffies + 5 * HZ / 10; - while ((hostdata->test_completed == -1) && time_before(jiffies, timeout)) - barrier(); - - failed = 1; - if (hostdata->test_completed == -1) - printk ("scsi%d : driver test 1 timed out%s\n",host->host_no , - (hostdata->test_dest == 0xdeadbeef) ? - " due to lost interrupt.\n" - " Please verify that the correct IRQ is being used for your board,\n" - : ""); - else if (hostdata->test_completed != 1) - printk ("scsi%d : test 1 bad interrupt value (%d)\n", - host->host_no, hostdata->test_completed); - else - failed = (hostdata->test_dest != 0xdeadbeef); - - if (hostdata->test_dest != 0xdeadbeef) { - printk ("scsi%d : driver test 1 read 0x%x instead of 0xdeadbeef indicating a\n" - " probable cache invalidation problem. Please configure caching\n" - " as write-through or disabled\n", - host->host_no, hostdata->test_dest); - } - - if (failed) { - printk ("scsi%d : DSP = 0x%p (script at 0x%p, start at 0x%x)\n", - host->host_no, bus_to_virt(NCR53c7x0_read32(DSP_REG)), - hostdata->script, start); - printk ("scsi%d : DSPS = 0x%x\n", host->host_no, - NCR53c7x0_read32(DSPS_REG)); - local_irq_restore(flags); - return -1; - } - hostdata->test_running = 0; - } - - if ((hostdata->options & OPTION_DEBUG_TEST2) && - hostdata->state != STATE_DISABLED) { - u32 dsa[48]; - unsigned char identify = IDENTIFY(0, 0); - unsigned char cmd[6]; - unsigned char data[36]; - unsigned char status = 0xff; - unsigned char msg = 0xff; - - cmd[0] = INQUIRY; - cmd[1] = cmd[2] = cmd[3] = cmd[5] = 0; - cmd[4] = sizeof(data); - - dsa[2] = 1; - dsa[3] = virt_to_bus(&identify); - dsa[4] = 6; - dsa[5] = virt_to_bus(&cmd); - dsa[6] = sizeof(data); - dsa[7] = virt_to_bus(&data); - dsa[8] = 1; - dsa[9] = virt_to_bus(&status); - dsa[10] = 1; - dsa[11] = virt_to_bus(&msg); - - for (i = 0; i < 6; ++i) { -#ifdef VALID_IDS - if (!hostdata->valid_ids[i]) - continue; -#endif - local_irq_disable(); - if (!hostdata->idle) { - printk ("scsi%d : chip not idle, aborting tests\n", host->host_no); - local_irq_restore(flags); - return -1; - } - - /* 710: bit mapped scsi ID, async */ - dsa[0] = (1 << i) << 16; - hostdata->idle = 0; - hostdata->test_running = 2; - hostdata->test_completed = -1; - start = virt_to_bus(hostdata->script) + hostdata->E_test_2; - hostdata->state = STATE_RUNNING; - NCR53c7x0_write32 (DSA_REG, virt_to_bus(dsa)); - NCR53c7x0_write32 (DSP_REG, start); - if (hostdata->options & OPTION_DEBUG_TRACE) - NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | - DCNTL_SSM | DCNTL_STD); - local_irq_restore(flags); - - timeout = jiffies + 5 * HZ; /* arbitrary */ - while ((hostdata->test_completed == -1) && time_before(jiffies, timeout)) - barrier(); - - NCR53c7x0_write32 (DSA_REG, 0); - - if (hostdata->test_completed == 2) { - data[35] = 0; - printk ("scsi%d : test 2 INQUIRY to target %d, lun 0 : %s\n", - host->host_no, i, data + 8); - printk ("scsi%d : status ", host->host_no); - scsi_print_status (status); - printk ("\nscsi%d : message ", host->host_no); - spi_print_msg(&msg); - printk ("\n"); - } else if (hostdata->test_completed == 3) { - printk("scsi%d : test 2 no connection with target %d\n", - host->host_no, i); - if (!hostdata->idle) { - printk("scsi%d : not idle\n", host->host_no); - local_irq_restore(flags); - return -1; - } - } else if (hostdata->test_completed == -1) { - printk ("scsi%d : test 2 timed out\n", host->host_no); - local_irq_restore(flags); - return -1; - } - hostdata->test_running = 0; - } - } - - local_irq_restore(flags); - return 0; -} - -/* - * Function : static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd) - * - * Purpose : copy the NCR53c8xx dsa structure into cmd's dsa buffer, - * performing all necessary relocation. - * - * Inputs : cmd, a NCR53c7x0_cmd structure with a dsa area large - * enough to hold the NCR53c8xx dsa. - */ - -static void -NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd) { - Scsi_Cmnd *c = cmd->cmd; - struct Scsi_Host *host = c->device->host; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - int i; - - memcpy (cmd->dsa, hostdata->script + (hostdata->E_dsa_code_template / 4), - hostdata->E_dsa_code_template_end - hostdata->E_dsa_code_template); - - /* - * Note : within the NCR 'C' code, dsa points to the _start_ - * of the DSA structure, and _not_ the offset of dsa_zero within - * that structure used to facilitate shorter signed offsets - * for the 8 bit ALU. - * - * The implications of this are that - * - * - 32 bit A_dsa_* absolute values require an additional - * dsa_zero added to their value to be correct, since they are - * relative to dsa_zero which is in essentially a separate - * space from the code symbols. - * - * - All other symbols require no special treatment. - */ - - patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_lun, c->device->lun); - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_addr_next, virt_to_bus(&cmd->dsa_next_addr)); - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_next, virt_to_bus(cmd->dsa) + Ent_dsa_zero - - Ent_dsa_code_template + A_dsa_next); - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_sync, virt_to_bus((void *)hostdata->sync[c->device->id].script)); - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_sscf_710, virt_to_bus((void *)&hostdata->sync[c->device->id].sscf_710)); - patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_target, 1 << c->device->id); - /* XXX - new pointer stuff */ - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_addr_saved_pointer, virt_to_bus(&cmd->saved_data_pointer)); - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_addr_saved_residual, virt_to_bus(&cmd->saved_residual)); - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_addr_residual, virt_to_bus(&cmd->residual)); - - /* XXX - new start stuff */ - - patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), - dsa_temp_addr_dsa_value, virt_to_bus(&cmd->dsa_addr)); -} - -/* - * Function : run_process_issue_queue (void) - * - * Purpose : insure that the coroutine is running and will process our - * request. process_issue_queue_running is checked/set here (in an - * inline function) rather than in process_issue_queue itself to reduce - * the chances of stack overflow. - * - */ - -static volatile int process_issue_queue_running = 0; - -static __inline__ void -run_process_issue_queue(void) { - unsigned long flags; - local_irq_save(flags); - if (!process_issue_queue_running) { - process_issue_queue_running = 1; - process_issue_queue(flags); - /* - * process_issue_queue_running is cleared in process_issue_queue - * once it can't do more work, and process_issue_queue exits with - * interrupts disabled. - */ - } - local_irq_restore(flags); -} - -/* - * Function : static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int - * result) - * - * Purpose : mark SCSI command as finished, OR'ing the host portion - * of the result word into the result field of the corresponding - * Scsi_Cmnd structure, and removing it from the internal queues. - * - * Inputs : cmd - command, result - entire result field - * - * Preconditions : the NCR chip should be in a halted state when - * abnormal_finished is run, since it modifies structures which - * the NCR expects to have exclusive access to. - */ - -static void -abnormal_finished (struct NCR53c7x0_cmd *cmd, int result) { - Scsi_Cmnd *c = cmd->cmd; - struct Scsi_Host *host = c->device->host; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned long flags; - int left, found; - volatile struct NCR53c7x0_cmd * linux_search; - volatile struct NCR53c7x0_cmd * volatile *linux_prev; - volatile u32 *ncr_prev, *ncrcurrent, ncr_search; - -#if 0 - printk ("scsi%d: abnormal finished\n", host->host_no); -#endif - - local_irq_save(flags); - found = 0; - /* - * Traverse the NCR issue array until we find a match or run out - * of instructions. Instructions in the NCR issue array are - * either JUMP or NOP instructions, which are 2 words in length. - */ - - - for (found = 0, left = host->can_queue, ncrcurrent = hostdata->schedule; - left > 0; --left, ncrcurrent += 2) - { - if (issue_to_cmd (host, hostdata, (u32 *) ncrcurrent) == cmd) - { - ncrcurrent[0] = hostdata->NOP_insn; - ncrcurrent[1] = 0xdeadbeef; - ++found; - break; - } - } - - /* - * Traverse the NCR reconnect list of DSA structures until we find - * a pointer to this dsa or have found too many command structures. - * We let prev point at the next field of the previous element or - * head of the list, so we don't do anything different for removing - * the head element. - */ - - for (left = host->can_queue, - ncr_search = hostdata->reconnect_dsa_head, - ncr_prev = &hostdata->reconnect_dsa_head; - left >= 0 && ncr_search && - ((char*)bus_to_virt(ncr_search) + hostdata->dsa_start) - != (char *) cmd->dsa; - ncr_prev = (u32*) ((char*)bus_to_virt(ncr_search) + - hostdata->dsa_next), ncr_search = *ncr_prev, --left); - - if (left < 0) - printk("scsi%d: loop detected in ncr reconncect list\n", - host->host_no); - else if (ncr_search) { - if (found) - printk("scsi%d: scsi %ld in ncr issue array and reconnect lists\n", - host->host_no, c->pid); - else { - volatile u32 * next = (u32 *) - ((char *)bus_to_virt(ncr_search) + hostdata->dsa_next); - *ncr_prev = *next; -/* If we're at the tail end of the issue queue, update that pointer too. */ - found = 1; - } - } - - /* - * Traverse the host running list until we find this command or discover - * we have too many elements, pointing linux_prev at the next field of the - * linux_previous element or head of the list, search at this element. - */ - - for (left = host->can_queue, linux_search = hostdata->running_list, - linux_prev = &hostdata->running_list; - left >= 0 && linux_search && linux_search != cmd; - linux_prev = &(linux_search->next), - linux_search = linux_search->next, --left); - - if (left < 0) - printk ("scsi%d: loop detected in host running list for scsi pid %ld\n", - host->host_no, c->pid); - else if (linux_search) { - *linux_prev = linux_search->next; - --hostdata->busy[c->device->id][c->device->lun]; - } - - /* Return the NCR command structure to the free list */ - cmd->next = hostdata->free; - hostdata->free = cmd; - c->host_scribble = NULL; - - /* And return */ - c->result = result; - c->scsi_done(c); - - local_irq_restore(flags); - run_process_issue_queue(); -} - -/* - * Function : static void intr_break (struct Scsi_Host *host, - * struct NCR53c7x0_cmd *cmd) - * - * Purpose : Handler for breakpoint interrupts from a SCSI script - * - * Inputs : host - pointer to this host adapter's structure, - * cmd - pointer to the command (if any) dsa was pointing - * to. - * - */ - -static void -intr_break (struct Scsi_Host *host, struct - NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_break *bp; -#if 0 - Scsi_Cmnd *c = cmd ? cmd->cmd : NULL; -#endif - u32 *dsp; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned long flags; - NCR53c7x0_local_setup(host); - - /* - * Find the break point corresponding to this address, and - * dump the appropriate debugging information to standard - * output. - */ - local_irq_save(flags); - dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG)); - for (bp = hostdata->breakpoints; bp && bp->address != dsp; - bp = bp->next); - if (!bp) - panic("scsi%d : break point interrupt from %p with no breakpoint!", - host->host_no, dsp); - - /* - * Configure the NCR chip for manual start mode, so that we can - * point the DSP register at the instruction that follows the - * INT int_debug_break instruction. - */ - - NCR53c7x0_write8 (hostdata->dmode, - NCR53c7x0_read8(hostdata->dmode)|DMODE_MAN); - - /* - * And update the DSP register, using the size of the old - * instruction in bytes. - */ - - local_irq_restore(flags); -} -/* - * Function : static void print_synchronous (const char *prefix, - * const unsigned char *msg) - * - * Purpose : print a pretty, user and machine parsable representation - * of a SDTR message, including the "real" parameters, data - * clock so we can tell transfer rate at a glance. - * - * Inputs ; prefix - text to prepend, msg - SDTR message (5 bytes) - */ - -static void -print_synchronous (const char *prefix, const unsigned char *msg) { - if (msg[4]) { - int Hz = 1000000000 / (msg[3] * 4); - int integer = Hz / 1000000; - int fraction = (Hz - (integer * 1000000)) / 10000; - printk ("%speriod %dns offset %d %d.%02dMHz %s SCSI%s\n", - prefix, (int) msg[3] * 4, (int) msg[4], integer, fraction, - (((msg[3] * 4) < 200) ? "FAST" : "synchronous"), - (((msg[3] * 4) < 200) ? "-II" : "")); - } else - printk ("%sasynchronous SCSI\n", prefix); -} - -/* - * Function : static void set_synchronous (struct Scsi_Host *host, - * int target, int sxfer, int scntl3, int now_connected) - * - * Purpose : reprogram transfers between the selected SCSI initiator and - * target with the given register values; in the indirect - * select operand, reselection script, and chip registers. - * - * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id, - * sxfer and scntl3 - NCR registers. now_connected - if non-zero, - * we should reprogram the registers now too. - * - * NOTE: For 53c710, scntl3 is actually used for SCF bits from - * SBCL, as we don't have a SCNTL3. - */ - -static void -set_synchronous (struct Scsi_Host *host, int target, int sxfer, int scntl3, - int now_connected) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - u32 *script; - NCR53c7x0_local_setup(host); - - /* These are eight bit registers */ - sxfer &= 0xff; - scntl3 &= 0xff; - - hostdata->sync[target].sxfer_sanity = sxfer; - hostdata->sync[target].scntl3_sanity = scntl3; - -/* - * HARD CODED : synchronous script is EIGHT words long. This - * must agree with 53c7.8xx.h - */ - - if ((hostdata->chip != 700) && (hostdata->chip != 70066)) { - hostdata->sync[target].select_indirect = (1 << target) << 16 | - (sxfer << 8); - hostdata->sync[target].sscf_710 = scntl3; - - script = (u32 *) hostdata->sync[target].script; - - /* XXX - add NCR53c7x0 code to reprogram SCF bits if we want to */ - script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY | - DCMD_RWRI_OP_MOVE) << 24) | - (SBCL_REG << 16) | (scntl3 << 8); - script[1] = 0; - script += 2; - - script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY | - DCMD_RWRI_OP_MOVE) << 24) | - (SXFER_REG << 16) | (sxfer << 8); - script[1] = 0; - script += 2; - -#ifdef DEBUG_SYNC_INTR - if (hostdata->options & OPTION_DEBUG_DISCONNECT) { - script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_INT) << 24) | DBC_TCI_TRUE; - script[1] = DEBUG_SYNC_INTR; - script += 2; - } -#endif - - script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_RETURN) << 24) | DBC_TCI_TRUE; - script[1] = 0; - script += 2; - } - - if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) - printk ("scsi%d : target %d sync parameters are sxfer=0x%x, scntl3=0x%x\n", - host->host_no, target, sxfer, scntl3); - - if (now_connected) { - NCR53c7x0_write8(SBCL_REG, scntl3); - NCR53c7x0_write8(SXFER_REG, sxfer); - } -} - - -/* - * Function : static int asynchronous (struct Scsi_Host *host, int target) - * - * Purpose : reprogram between the selected SCSI Host adapter and target - * (assumed to be currently connected) for asynchronous transfers. - * - * Inputs : host - SCSI host structure, target - numeric target ID. - * - * Preconditions : the NCR chip should be in one of the halted states - */ - -static void -asynchronous (struct Scsi_Host *host, int target) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - NCR53c7x0_local_setup(host); - set_synchronous (host, target, /* no offset */ 0, hostdata->saved_scntl3, - 1); - printk ("scsi%d : setting target %d to asynchronous SCSI\n", - host->host_no, target); -} - -/* - * XXX - do we want to go out of our way (ie, add extra code to selection - * in the NCR53c710/NCR53c720 script) to reprogram the synchronous - * conversion bits, or can we be content in just setting the - * sxfer bits? I chose to do so [richard@sleepie.demon.co.uk] - */ - -/* Table for NCR53c8xx synchronous values */ - -/* This table is also correct for 710, allowing that scf=4 is equivalent - * of SSCF=0 (ie use DCNTL, divide by 3) for a 50.01-66.00MHz clock. - * For any other clock values, we cannot use entries with SCF values of - * 4. I guess that for a 66MHz clock, the slowest it will set is 2MHz, - * and for a 50MHz clock, the slowest will be 2.27Mhz. Should check - * that a device doesn't try and negotiate sync below these limits! - */ - -static const struct { - int div; /* Total clock divisor * 10 */ - unsigned char scf; /* */ - unsigned char tp; /* 4 + tp = xferp divisor */ -} syncs[] = { -/* div scf tp div scf tp div scf tp */ - { 40, 1, 0}, { 50, 1, 1}, { 60, 1, 2}, - { 70, 1, 3}, { 75, 2, 1}, { 80, 1, 4}, - { 90, 1, 5}, { 100, 1, 6}, { 105, 2, 3}, - { 110, 1, 7}, { 120, 2, 4}, { 135, 2, 5}, - { 140, 3, 3}, { 150, 2, 6}, { 160, 3, 4}, - { 165, 2, 7}, { 180, 3, 5}, { 200, 3, 6}, - { 210, 4, 3}, { 220, 3, 7}, { 240, 4, 4}, - { 270, 4, 5}, { 300, 4, 6}, { 330, 4, 7} -}; - -/* - * Function : static void synchronous (struct Scsi_Host *host, int target, - * char *msg) - * - * Purpose : reprogram transfers between the selected SCSI initiator and - * target for synchronous SCSI transfers such that the synchronous - * offset is less than that requested and period at least as long - * as that requested. Also modify *msg such that it contains - * an appropriate response. - * - * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id, - * msg - synchronous transfer request. - */ - - -static void -synchronous (struct Scsi_Host *host, int target, char *msg) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - int desire, divisor, i, limit; - unsigned char scntl3, sxfer; -/* The diagnostic message fits on one line, even with max. width integers */ - char buf[80]; - -/* Desired transfer clock in Hz */ - desire = 1000000000L / (msg[3] * 4); -/* Scale the available SCSI clock by 10 so we get tenths */ - divisor = (hostdata->scsi_clock * 10) / desire; - -/* NCR chips can handle at most an offset of 8 */ - if (msg[4] > 8) - msg[4] = 8; - - if (hostdata->options & OPTION_DEBUG_SDTR) - printk("scsi%d : optimal synchronous divisor of %d.%01d\n", - host->host_no, divisor / 10, divisor % 10); - - limit = ARRAY_SIZE(syncs) - 1; - for (i = 0; (i < limit) && (divisor > syncs[i].div); ++i); - - if (hostdata->options & OPTION_DEBUG_SDTR) - printk("scsi%d : selected synchronous divisor of %d.%01d\n", - host->host_no, syncs[i].div / 10, syncs[i].div % 10); - - msg[3] = ((1000000000L / hostdata->scsi_clock) * syncs[i].div / 10 / 4); - - if (hostdata->options & OPTION_DEBUG_SDTR) - printk("scsi%d : selected synchronous period of %dns\n", host->host_no, - msg[3] * 4); - - scntl3 = syncs[i].scf; - sxfer = (msg[4] << SXFER_MO_SHIFT) | (syncs[i].tp << 4); - if (hostdata->options & OPTION_DEBUG_SDTR) - printk ("scsi%d : sxfer=0x%x scntl3=0x%x\n", - host->host_no, (int) sxfer, (int) scntl3); - set_synchronous (host, target, sxfer, scntl3, 1); - sprintf (buf, "scsi%d : setting target %d to ", host->host_no, target); - print_synchronous (buf, msg); -} - -/* - * Function : static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, - * struct NCR53c7x0_cmd *cmd) - * - * Purpose : Handler for INT generated instructions for the - * NCR53c810/820 SCSI SCRIPT - * - * Inputs : host - pointer to this host adapter's structure, - * cmd - pointer to the command (if any) dsa was pointing - * to. - * - */ - -static int -NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct - NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - int print; - Scsi_Cmnd *c = cmd ? cmd->cmd : NULL; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - u32 dsps,*dsp; /* Argument of the INT instruction */ - - NCR53c7x0_local_setup(host); - dsps = NCR53c7x0_read32(DSPS_REG); - dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG)); - - /* RGH 150597: Frig. Commands which fail with Check Condition are - * Flagged as successful - hack dsps to indicate check condition */ -#if 0 - /* RGH 200597: Need to disable for BVME6000, as it gets Check Conditions - * and then dies. Seems to handle Check Condition at startup, but - * not mid kernel build. */ - if (dsps == A_int_norm_emulateintfly && cmd && cmd->result == 2) - dsps = A_int_err_check_condition; -#endif - - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : DSPS = 0x%x\n", host->host_no, dsps); - - switch (dsps) { - case A_int_msg_1: - print = 1; - switch (hostdata->msg_buf[0]) { - /* - * Unless we've initiated synchronous negotiation, I don't - * think that this should happen. - */ - case MESSAGE_REJECT: - hostdata->dsp = hostdata->script + hostdata->E_accept_message / - sizeof(u32); - hostdata->dsp_changed = 1; - if (cmd && (cmd->flags & CMD_FLAG_SDTR)) { - printk ("scsi%d : target %d rejected SDTR\n", host->host_no, - c->device->id); - cmd->flags &= ~CMD_FLAG_SDTR; - asynchronous (host, c->device->id); - print = 0; - } - break; - case INITIATE_RECOVERY: - printk ("scsi%d : extended contingent allegiance not supported yet, rejecting\n", - host->host_no); - /* Fall through to default */ - hostdata->dsp = hostdata->script + hostdata->E_reject_message / - sizeof(u32); - hostdata->dsp_changed = 1; - break; - default: - printk ("scsi%d : unsupported message, rejecting\n", - host->host_no); - hostdata->dsp = hostdata->script + hostdata->E_reject_message / - sizeof(u32); - hostdata->dsp_changed = 1; - } - if (print) { - printk ("scsi%d : received message", host->host_no); - if (c) - printk (" from target %d lun %d ", c->device->id, c->device->lun); - spi_print_msg((unsigned char *) hostdata->msg_buf); - printk("\n"); - } - - return SPECIFIC_INT_NOTHING; - - - case A_int_msg_sdtr: -/* - * At this point, hostdata->msg_buf contains - * 0 EXTENDED MESSAGE - * 1 length - * 2 SDTR - * 3 period * 4ns - * 4 offset - */ - - if (cmd) { - char buf[80]; - sprintf (buf, "scsi%d : target %d %s ", host->host_no, c->device->id, - (cmd->flags & CMD_FLAG_SDTR) ? "accepting" : "requesting"); - print_synchronous (buf, (unsigned char *) hostdata->msg_buf); - - /* - * Initiator initiated, won't happen unless synchronous - * transfers are enabled. If we get a SDTR message in - * response to our SDTR, we should program our parameters - * such that - * offset <= requested offset - * period >= requested period - */ - if (cmd->flags & CMD_FLAG_SDTR) { - cmd->flags &= ~CMD_FLAG_SDTR; - if (hostdata->msg_buf[4]) - synchronous (host, c->device->id, (unsigned char *) - hostdata->msg_buf); - else - asynchronous (host, c->device->id); - hostdata->dsp = hostdata->script + hostdata->E_accept_message / - sizeof(u32); - hostdata->dsp_changed = 1; - return SPECIFIC_INT_NOTHING; - } else { - if (hostdata->options & OPTION_SYNCHRONOUS) { - cmd->flags |= CMD_FLAG_DID_SDTR; - synchronous (host, c->device->id, (unsigned char *) - hostdata->msg_buf); - } else { - hostdata->msg_buf[4] = 0; /* 0 offset = async */ - asynchronous (host, c->device->id); - } - patch_dsa_32 (cmd->dsa, dsa_msgout_other, 0, 5); - patch_dsa_32 (cmd->dsa, dsa_msgout_other, 1, (u32) - virt_to_bus ((void *)&hostdata->msg_buf)); - hostdata->dsp = hostdata->script + - hostdata->E_respond_message / sizeof(u32); - hostdata->dsp_changed = 1; - } - return SPECIFIC_INT_NOTHING; - } - /* Fall through to abort if we couldn't find a cmd, and - therefore a dsa structure to twiddle */ - case A_int_msg_wdtr: - hostdata->dsp = hostdata->script + hostdata->E_reject_message / - sizeof(u32); - hostdata->dsp_changed = 1; - return SPECIFIC_INT_NOTHING; - case A_int_err_unexpected_phase: - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : unexpected phase\n", host->host_no); - return SPECIFIC_INT_ABORT; - case A_int_err_selected: - if ((hostdata->chip / 100) == 8) - printk ("scsi%d : selected by target %d\n", host->host_no, - (int) NCR53c7x0_read8(SDID_REG_800) &7); - else - printk ("scsi%d : selected by target LCRC=0x%02x\n", host->host_no, - (int) NCR53c7x0_read8(LCRC_REG_10)); - hostdata->dsp = hostdata->script + hostdata->E_target_abort / - sizeof(u32); - hostdata->dsp_changed = 1; - return SPECIFIC_INT_NOTHING; - case A_int_err_unexpected_reselect: - if ((hostdata->chip / 100) == 8) - printk ("scsi%d : unexpected reselect by target %d lun %d\n", - host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & 7, - hostdata->reselected_identify & 7); - else - printk ("scsi%d : unexpected reselect LCRC=0x%02x\n", host->host_no, - (int) NCR53c7x0_read8(LCRC_REG_10)); - hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / - sizeof(u32); - hostdata->dsp_changed = 1; - return SPECIFIC_INT_NOTHING; -/* - * Since contingent allegiance conditions are cleared by the next - * command issued to a target, we must issue a REQUEST SENSE - * command after receiving a CHECK CONDITION status, before - * another command is issued. - * - * Since this NCR53c7x0_cmd will be freed after use, we don't - * care if we step on the various fields, so modify a few things. - */ - case A_int_err_check_condition: -#if 0 - if (hostdata->options & OPTION_DEBUG_INTR) -#endif - printk ("scsi%d : CHECK CONDITION\n", host->host_no); - if (!c) { - printk("scsi%d : CHECK CONDITION with no SCSI command\n", - host->host_no); - return SPECIFIC_INT_PANIC; - } - - /* - * FIXME : this uses the normal one-byte selection message. - * We may want to renegotiate for synchronous & WIDE transfers - * since these could be the crux of our problem. - * - hostdata->NOP_insn* FIXME : once SCSI-II tagged queuing is implemented, we'll - * have to set this up so that the rest of the DSA - * agrees with this being an untagged queue'd command. - */ - - patch_dsa_32 (cmd->dsa, dsa_msgout, 0, 1); - - /* - * Modify the table indirect for COMMAND OUT phase, since - * Request Sense is a six byte command. - */ - - patch_dsa_32 (cmd->dsa, dsa_cmdout, 0, 6); - - /* - * The CDB is now mirrored in our local non-cached - * structure, but keep the old structure up to date as well, - * just in case anyone looks at it. - */ - - /* - * XXX Need to worry about data buffer alignment/cache state - * XXX here, but currently never get A_int_err_check_condition, - * XXX so ignore problem for now. - */ - cmd->cmnd[0] = c->cmnd[0] = REQUEST_SENSE; - cmd->cmnd[0] = c->cmnd[1] &= 0xe0; /* Zero all but LUN */ - cmd->cmnd[0] = c->cmnd[2] = 0; - cmd->cmnd[0] = c->cmnd[3] = 0; - cmd->cmnd[0] = c->cmnd[4] = sizeof(c->sense_buffer); - cmd->cmnd[0] = c->cmnd[5] = 0; - - /* - * Disable dataout phase, and program datain to transfer to the - * sense buffer, and add a jump to other_transfer after the - * command so overflow/underrun conditions are detected. - */ - - patch_dsa_32 (cmd->dsa, dsa_dataout, 0, - virt_to_bus(hostdata->script) + hostdata->E_other_transfer); - patch_dsa_32 (cmd->dsa, dsa_datain, 0, - virt_to_bus(cmd->data_transfer_start)); - cmd->data_transfer_start[0] = (((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | - DCMD_BMI_IO)) << 24) | sizeof(c->sense_buffer); - cmd->data_transfer_start[1] = (u32) virt_to_bus(c->sense_buffer); - - cmd->data_transfer_start[2] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) - << 24) | DBC_TCI_TRUE; - cmd->data_transfer_start[3] = (u32) virt_to_bus(hostdata->script) + - hostdata->E_other_transfer; - - /* - * Currently, this command is flagged as completed, ie - * it has valid status and message data. Reflag it as - * incomplete. Q - need to do something so that original - * status, etc are used. - */ - - cmd->result = cmd->cmd->result = 0xffff; - - /* - * Restart command as a REQUEST SENSE. - */ - hostdata->dsp = (u32 *) hostdata->script + hostdata->E_select / - sizeof(u32); - hostdata->dsp_changed = 1; - return SPECIFIC_INT_NOTHING; - case A_int_debug_break: - return SPECIFIC_INT_BREAK; - case A_int_norm_aborted: - hostdata->dsp = (u32 *) hostdata->schedule; - hostdata->dsp_changed = 1; - if (cmd) - abnormal_finished (cmd, DID_ERROR << 16); - return SPECIFIC_INT_NOTHING; - case A_int_norm_emulateintfly: - NCR53c7x0_intfly(host); - return SPECIFIC_INT_NOTHING; - case A_int_test_1: - case A_int_test_2: - hostdata->idle = 1; - hostdata->test_completed = (dsps - A_int_test_1) / 0x00010000 + 1; - if (hostdata->options & OPTION_DEBUG_INTR) - printk("scsi%d : test%d complete\n", host->host_no, - hostdata->test_completed); - return SPECIFIC_INT_NOTHING; -#ifdef A_int_debug_reselected_ok - case A_int_debug_reselected_ok: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT)) { - /* - * Note - this dsa is not based on location relative to - * the command structure, but to location relative to the - * DSA register - */ - u32 *dsa; - dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG)); - - printk("scsi%d : reselected_ok (DSA = 0x%x (virt 0x%p)\n", - host->host_no, NCR53c7x0_read32(DSA_REG), dsa); - printk("scsi%d : resume address is 0x%x (virt 0x%p)\n", - host->host_no, cmd->saved_data_pointer, - bus_to_virt(cmd->saved_data_pointer)); - print_insn (host, hostdata->script + Ent_reselected_ok / - sizeof(u32), "", 1); - if ((hostdata->chip / 100) == 8) - printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n", - host->host_no, NCR53c7x0_read8(SXFER_REG), - NCR53c7x0_read8(SCNTL3_REG_800)); - else - printk ("scsi%d : sxfer=0x%x, cannot read SBCL\n", - host->host_no, NCR53c7x0_read8(SXFER_REG)); - if (c) { - print_insn (host, (u32 *) - hostdata->sync[c->device->id].script, "", 1); - print_insn (host, (u32 *) - hostdata->sync[c->device->id].script + 2, "", 1); - } - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_reselect_check - case A_int_debug_reselect_check: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - u32 *dsa; -#if 0 - u32 *code; -#endif - /* - * Note - this dsa is not based on location relative to - * the command structure, but to location relative to the - * DSA register - */ - dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG)); - printk("scsi%d : reselected_check_next (DSA = 0x%lx (virt 0x%p))\n", - host->host_no, virt_to_bus(dsa), dsa); - if (dsa) { - printk("scsi%d : resume address is 0x%x (virt 0x%p)\n", - host->host_no, cmd->saved_data_pointer, - bus_to_virt (cmd->saved_data_pointer)); -#if 0 - printk("scsi%d : template code :\n", host->host_no); - for (code = dsa + (Ent_dsa_code_check_reselect - Ent_dsa_zero) - / sizeof(u32); code < (dsa + Ent_dsa_zero / sizeof(u32)); - code += print_insn (host, code, "", 1)); -#endif - } - print_insn (host, hostdata->script + Ent_reselected_ok / - sizeof(u32), "", 1); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_dsa_schedule - case A_int_debug_dsa_schedule: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - u32 *dsa; - /* - * Note - this dsa is not based on location relative to - * the command structure, but to location relative to the - * DSA register - */ - dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG)); - printk("scsi%d : dsa_schedule (old DSA = 0x%lx (virt 0x%p))\n", - host->host_no, virt_to_bus(dsa), dsa); - if (dsa) - printk("scsi%d : resume address is 0x%x (virt 0x%p)\n" - " (temp was 0x%x (virt 0x%p))\n", - host->host_no, cmd->saved_data_pointer, - bus_to_virt (cmd->saved_data_pointer), - NCR53c7x0_read32 (TEMP_REG), - bus_to_virt (NCR53c7x0_read32(TEMP_REG))); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_scheduled - case A_int_debug_scheduled: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - printk("scsi%d : new I/O 0x%x (virt 0x%p) scheduled\n", - host->host_no, NCR53c7x0_read32(DSA_REG), - bus_to_virt(NCR53c7x0_read32(DSA_REG))); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_idle - case A_int_debug_idle: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - printk("scsi%d : idle\n", host->host_no); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_cmd - case A_int_debug_cmd: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - printk("scsi%d : command sent\n"); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_dsa_loaded - case A_int_debug_dsa_loaded: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - printk("scsi%d : DSA loaded with 0x%x (virt 0x%p)\n", host->host_no, - NCR53c7x0_read32(DSA_REG), - bus_to_virt(NCR53c7x0_read32(DSA_REG))); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_reselected - case A_int_debug_reselected: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT)) { - if ((hostdata->chip / 100) == 8) - printk("scsi%d : reselected by target %d lun %d\n", - host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & ~0x80, - (int) hostdata->reselected_identify & 7); - else - printk("scsi%d : reselected by LCRC=0x%02x lun %d\n", - host->host_no, (int) NCR53c7x0_read8(LCRC_REG_10), - (int) hostdata->reselected_identify & 7); - print_queues(host); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_disconnect_msg - case A_int_debug_disconnect_msg: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { - if (c) - printk("scsi%d : target %d lun %d disconnecting\n", - host->host_no, c->device->id, c->device->lun); - else - printk("scsi%d : unknown target disconnecting\n", - host->host_no); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_disconnected - case A_int_debug_disconnected: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT)) { - printk ("scsi%d : disconnected, new queues are\n", - host->host_no); - print_queues(host); -#if 0 - /* Not valid on ncr53c710! */ - printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n", - host->host_no, NCR53c7x0_read8(SXFER_REG), - NCR53c7x0_read8(SCNTL3_REG_800)); -#endif - if (c) { - print_insn (host, (u32 *) - hostdata->sync[c->device->id].script, "", 1); - print_insn (host, (u32 *) - hostdata->sync[c->device->id].script + 2, "", 1); - } - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_panic - case A_int_debug_panic: - printk("scsi%d : int_debug_panic received\n", host->host_no); - print_lots (host); - return SPECIFIC_INT_PANIC; -#endif -#ifdef A_int_debug_saved - case A_int_debug_saved: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT)) { - printk ("scsi%d : saved data pointer 0x%x (virt 0x%p)\n", - host->host_no, cmd->saved_data_pointer, - bus_to_virt (cmd->saved_data_pointer)); - print_progress (c); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_restored - case A_int_debug_restored: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT)) { - if (cmd) { - int size; - printk ("scsi%d : restored data pointer 0x%x (virt 0x%p)\n", - host->host_no, cmd->saved_data_pointer, bus_to_virt ( - cmd->saved_data_pointer)); - size = print_insn (host, (u32 *) - bus_to_virt(cmd->saved_data_pointer), "", 1); - size = print_insn (host, (u32 *) - bus_to_virt(cmd->saved_data_pointer) + size, "", 1); - print_progress (c); - } -#if 0 - printk ("scsi%d : datapath residual %d\n", - host->host_no, datapath_residual (host)) ; -#endif - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_sync - case A_int_debug_sync: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) { - unsigned char sxfer = NCR53c7x0_read8 (SXFER_REG), scntl3; - if ((hostdata->chip / 100) == 8) { - scntl3 = NCR53c7x0_read8 (SCNTL3_REG_800); - if (c) { - if (sxfer != hostdata->sync[c->device->id].sxfer_sanity || - scntl3 != hostdata->sync[c->device->id].scntl3_sanity) { - printk ("scsi%d : sync sanity check failed sxfer=0x%x, scntl3=0x%x", - host->host_no, sxfer, scntl3); - NCR53c7x0_write8 (SXFER_REG, sxfer); - NCR53c7x0_write8 (SCNTL3_REG_800, scntl3); - } - } else - printk ("scsi%d : unknown command sxfer=0x%x, scntl3=0x%x\n", - host->host_no, (int) sxfer, (int) scntl3); - } else { - if (c) { - if (sxfer != hostdata->sync[c->device->id].sxfer_sanity) { - printk ("scsi%d : sync sanity check failed sxfer=0x%x", - host->host_no, sxfer); - NCR53c7x0_write8 (SXFER_REG, sxfer); - NCR53c7x0_write8 (SBCL_REG, - hostdata->sync[c->device->id].sscf_710); - } - } else - printk ("scsi%d : unknown command sxfer=0x%x\n", - host->host_no, (int) sxfer); - } - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_datain - case A_int_debug_datain: - if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| - OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) { - int size; - if ((hostdata->chip / 100) == 8) - printk ("scsi%d : In do_datain (%s) sxfer=0x%x, scntl3=0x%x\n" - " datapath residual=%d\n", - host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)), - (int) NCR53c7x0_read8(SXFER_REG), - (int) NCR53c7x0_read8(SCNTL3_REG_800), - datapath_residual (host)) ; - else - printk ("scsi%d : In do_datain (%s) sxfer=0x%x\n" - " datapath residual=%d\n", - host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)), - (int) NCR53c7x0_read8(SXFER_REG), - datapath_residual (host)) ; - print_insn (host, dsp, "", 1); - size = print_insn (host, (u32 *) bus_to_virt(dsp[1]), "", 1); - print_insn (host, (u32 *) bus_to_virt(dsp[1]) + size, "", 1); - } - return SPECIFIC_INT_RESTART; -#endif -#ifdef A_int_debug_check_dsa - case A_int_debug_check_dsa: - if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) { - int sdid; - int tmp; - char *where; - if (hostdata->chip / 100 == 8) - sdid = NCR53c7x0_read8 (SDID_REG_800) & 15; - else { - tmp = NCR53c7x0_read8 (SDID_REG_700); - if (!tmp) - panic ("SDID_REG_700 = 0"); - tmp >>= 1; - sdid = 0; - while (tmp) { - tmp >>= 1; - sdid++; - } - } - where = dsp - NCR53c7x0_insn_size(NCR53c7x0_read8 - (DCMD_REG)) == hostdata->script + - Ent_select_check_dsa / sizeof(u32) ? - "selection" : "reselection"; - if (c && sdid != c->device->id) { - printk ("scsi%d : SDID target %d != DSA target %d at %s\n", - host->host_no, sdid, c->device->id, where); - print_lots(host); - dump_events (host, 20); - return SPECIFIC_INT_PANIC; - } - } - return SPECIFIC_INT_RESTART; -#endif - default: - if ((dsps & 0xff000000) == 0x03000000) { - printk ("scsi%d : misc debug interrupt 0x%x\n", - host->host_no, dsps); - return SPECIFIC_INT_RESTART; - } else if ((dsps & 0xff000000) == 0x05000000) { - if (hostdata->events) { - struct NCR53c7x0_event *event; - ++hostdata->event_index; - if (hostdata->event_index >= hostdata->event_size) - hostdata->event_index = 0; - event = (struct NCR53c7x0_event *) hostdata->events + - hostdata->event_index; - event->event = (enum ncr_event) dsps; - event->dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); - if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) { - if (hostdata->chip / 100 == 8) - event->target = NCR53c7x0_read8(SSID_REG_800); - else { - unsigned char tmp, sdid; - tmp = NCR53c7x0_read8 (SDID_REG_700); - if (!tmp) - panic ("SDID_REG_700 = 0"); - tmp >>= 1; - sdid = 0; - while (tmp) { - tmp >>= 1; - sdid++; - } - event->target = sdid; - } - } - else - event->target = 255; - - if (event->event == EVENT_RESELECT) - event->lun = hostdata->reselected_identify & 0xf; - else if (c) - event->lun = c->device->lun; - else - event->lun = 255; - do_gettimeofday(&(event->time)); - if (c) { - event->pid = c->pid; - memcpy ((void *) event->cmnd, (void *) c->cmnd, - sizeof (event->cmnd)); - } else { - event->pid = -1; - } - } - return SPECIFIC_INT_RESTART; - } - - printk ("scsi%d : unknown user interrupt 0x%x\n", - host->host_no, (unsigned) dsps); - return SPECIFIC_INT_PANIC; - } -} - -/* - * XXX - the stock NCR assembler won't output the scriptu.h file, - * which undefine's all #define'd CPP symbols from the script.h - * file, which will create problems if you use multiple scripts - * with the same symbol names. - * - * If you insist on using NCR's assembler, you could generate - * scriptu.h from script.h using something like - * - * grep #define script.h | \ - * sed 's/#define[ ][ ]*\([_a-zA-Z][_a-zA-Z0-9]*\).*$/#undefine \1/' \ - * > scriptu.h - */ - -#include "53c7xx_u.h" - -/* XXX - add alternate script handling code here */ - - -/* - * Function : static void NCR537xx_soft_reset (struct Scsi_Host *host) - * - * Purpose : perform a soft reset of the NCR53c7xx chip - * - * Inputs : host - pointer to this host adapter's structure - * - * Preconditions : NCR53c7x0_init must have been called for this - * host. - * - */ - -static void -NCR53c7x0_soft_reset (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - unsigned long flags; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - NCR53c7x0_local_setup(host); - - local_irq_save(flags); - - /* Disable scsi chip and s/w level 7 ints */ - -#ifdef CONFIG_MVME16x - if (MACH_IS_MVME16x) - { - volatile unsigned long v; - - v = *(volatile unsigned long *)0xfff4006c; - v &= ~0x8000; - *(volatile unsigned long *)0xfff4006c = v; - v = *(volatile unsigned long *)0xfff4202c; - v &= ~0x10; - *(volatile unsigned long *)0xfff4202c = v; - } -#endif - /* Anything specific for your hardware? */ - - /* - * Do a soft reset of the chip so that everything is - * reinitialized to the power-on state. - * - * Basically follow the procedure outlined in the NCR53c700 - * data manual under Chapter Six, How to Use, Steps Necessary to - * Start SCRIPTS, with the exception of actually starting the - * script and setting up the synchronous transfer gunk. - */ - - /* Should we reset the scsi bus here??????????????????? */ - - NCR53c7x0_write8(ISTAT_REG_700, ISTAT_10_SRST); - NCR53c7x0_write8(ISTAT_REG_700, 0); - - /* - * saved_dcntl is set up in NCR53c7x0_init() before it is overwritten - * here. We should have some better way of working out the CF bit - * setting.. - */ - - hostdata->saved_dcntl = DCNTL_10_EA|DCNTL_10_COM; - if (hostdata->scsi_clock > 50000000) - hostdata->saved_dcntl |= DCNTL_700_CF_3; - else - if (hostdata->scsi_clock > 37500000) - hostdata->saved_dcntl |= DCNTL_700_CF_2; -#if 0 - else - /* Any clocks less than 37.5MHz? */ -#endif - - if (hostdata->options & OPTION_DEBUG_TRACE) - NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM); - else - NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl); - /* Following disables snooping - snooping is not required, as non- - * cached pages are used for shared data, and appropriate use is - * made of cache_push/cache_clear. Indeed, for 68060 - * enabling snooping causes disk corruption of ext2fs free block - * bitmaps and the like. If you have a 68060 with snooping hardwared - * on, then you need to enable CONFIG_060_WRITETHROUGH. - */ - NCR53c7x0_write8(CTEST7_REG, CTEST7_10_TT1|CTEST7_STD); - /* Actually burst of eight, according to my 53c710 databook */ - NCR53c7x0_write8(hostdata->dmode, DMODE_10_BL_8 | DMODE_10_FC2); - NCR53c7x0_write8(SCID_REG, 1 << host->this_id); - NCR53c7x0_write8(SBCL_REG, 0); - NCR53c7x0_write8(SCNTL1_REG, SCNTL1_ESR_700); - NCR53c7x0_write8(SCNTL0_REG, ((hostdata->options & OPTION_PARITY) ? - SCNTL0_EPC : 0) | SCNTL0_EPG_700 | SCNTL0_ARB1 | SCNTL0_ARB2); - - /* - * Enable all interrupts, except parity which we only want when - * the user requests it. - */ - - NCR53c7x0_write8(DIEN_REG, DIEN_700_BF | - DIEN_ABRT | DIEN_SSI | DIEN_SIR | DIEN_700_OPC); - - NCR53c7x0_write8(SIEN_REG_700, ((hostdata->options & OPTION_PARITY) ? - SIEN_PAR : 0) | SIEN_700_STO | SIEN_RST | SIEN_UDC | - SIEN_SGE | SIEN_MA); - -#ifdef CONFIG_MVME16x - if (MACH_IS_MVME16x) - { - volatile unsigned long v; - - /* Enable scsi chip and s/w level 7 ints */ - v = *(volatile unsigned long *)0xfff40080; - v = (v & ~(0xf << 28)) | (4 << 28); - *(volatile unsigned long *)0xfff40080 = v; - v = *(volatile unsigned long *)0xfff4006c; - v |= 0x8000; - *(volatile unsigned long *)0xfff4006c = v; - v = *(volatile unsigned long *)0xfff4202c; - v = (v & ~0xff) | 0x10 | 4; - *(volatile unsigned long *)0xfff4202c = v; - } -#endif - /* Anything needed for your hardware? */ - local_irq_restore(flags); -} - - -/* - * Function static struct NCR53c7x0_cmd *allocate_cmd (Scsi_Cmnd *cmd) - * - * Purpose : Return the first free NCR53c7x0_cmd structure (which are - * reused in a LIFO manner to minimize cache thrashing). - * - * Side effects : If we haven't yet scheduled allocation of NCR53c7x0_cmd - * structures for this device, do so. Attempt to complete all scheduled - * allocations using get_zeroed_page(), putting NCR53c7x0_cmd structures on - * the free list. Teach programmers not to drink and hack. - * - * Inputs : cmd - SCSI command - * - * Returns : NCR53c7x0_cmd structure allocated on behalf of cmd; - * NULL on failure. - */ - -static void -my_free_page (void *addr, int dummy) -{ - /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which - * XXX may be invalid (CONFIG_060_WRITETHROUGH) - */ - kernel_set_cachemode((void *)addr, 4096, IOMAP_FULL_CACHING); - free_page ((u32)addr); -} - -static struct NCR53c7x0_cmd * -allocate_cmd (Scsi_Cmnd *cmd) { - struct Scsi_Host *host = cmd->device->host; - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) host->hostdata[0]; - u32 real; /* Real address */ - int size; /* Size of *tmp */ - struct NCR53c7x0_cmd *tmp; - unsigned long flags; - - if (hostdata->options & OPTION_DEBUG_ALLOCATION) - printk ("scsi%d : num_cmds = %d, can_queue = %d\n" - " target = %d, lun = %d, %s\n", - host->host_no, hostdata->num_cmds, host->can_queue, - cmd->device->id, cmd->device->lun, (hostdata->cmd_allocated[cmd->device->id] & - (1 << cmd->device->lun)) ? "already allocated" : "not allocated"); - -/* - * If we have not yet reserved commands for this I_T_L nexus, and - * the device exists (as indicated by permanent Scsi_Cmnd structures - * being allocated under 1.3.x, or being outside of scan_scsis in - * 1.2.x), do so now. - */ - if (!(hostdata->cmd_allocated[cmd->device->id] & (1 << cmd->device->lun)) && - cmd->device && cmd->device->has_cmdblocks) { - if ((hostdata->extra_allocate + hostdata->num_cmds) < host->can_queue) - hostdata->extra_allocate += host->cmd_per_lun; - hostdata->cmd_allocated[cmd->device->id] |= (1 << cmd->device->lun); - } - - for (; hostdata->extra_allocate > 0 ; --hostdata->extra_allocate, - ++hostdata->num_cmds) { - /* historically, kmalloc has returned unaligned addresses; pad so we - have enough room to ROUNDUP */ - size = hostdata->max_cmd_size + sizeof (void *); -#ifdef FORCE_DSA_ALIGNMENT - /* - * 53c710 rev.0 doesn't have an add-with-carry instruction. - * Ensure we allocate enough memory to force alignment. - */ - size += 256; -#endif -/* FIXME: for ISA bus '7xx chips, we need to or GFP_DMA in here */ - - if (size > 4096) { - printk (KERN_ERR "53c7xx: allocate_cmd size > 4K\n"); - return NULL; - } - real = get_zeroed_page(GFP_ATOMIC); - if (real == 0) - return NULL; - cache_push(virt_to_phys((void *)real), 4096); - cache_clear(virt_to_phys((void *)real), 4096); - kernel_set_cachemode((void *)real, 4096, IOMAP_NOCACHE_SER); - tmp = ROUNDUP(real, void *); -#ifdef FORCE_DSA_ALIGNMENT - { - if (((u32)tmp & 0xff) > CmdPageStart) - tmp = (struct NCR53c7x0_cmd *)((u32)tmp + 255); - tmp = (struct NCR53c7x0_cmd *)(((u32)tmp & ~0xff) + CmdPageStart); -#if 0 - printk ("scsi: size = %d, real = 0x%08x, tmp set to 0x%08x\n", - size, real, (u32)tmp); -#endif - } -#endif - tmp->real = (void *)real; - tmp->size = size; - tmp->free = ((void (*)(void *, int)) my_free_page); - local_irq_save(flags); - tmp->next = hostdata->free; - hostdata->free = tmp; - local_irq_restore(flags); - } - local_irq_save(flags); - tmp = (struct NCR53c7x0_cmd *) hostdata->free; - if (tmp) { - hostdata->free = tmp->next; - } - local_irq_restore(flags); - if (!tmp) - printk ("scsi%d : can't allocate command for target %d lun %d\n", - host->host_no, cmd->device->id, cmd->device->lun); - return tmp; -} - -/* - * Function static struct NCR53c7x0_cmd *create_cmd (Scsi_Cmnd *cmd) - * - * - * Purpose : allocate a NCR53c7x0_cmd structure, initialize it based on the - * Scsi_Cmnd structure passed in cmd, including dsa and Linux field - * initialization, and dsa code relocation. - * - * Inputs : cmd - SCSI command - * - * Returns : NCR53c7x0_cmd structure corresponding to cmd, - * NULL on failure. - */ -static struct NCR53c7x0_cmd * -create_cmd (Scsi_Cmnd *cmd) { - NCR53c7x0_local_declare(); - struct Scsi_Host *host = cmd->device->host; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - struct NCR53c7x0_cmd *tmp; /* NCR53c7x0_cmd structure for this command */ - int datain, /* Number of instructions per phase */ - dataout; - int data_transfer_instructions, /* Count of dynamic instructions */ - i; /* Counter */ - u32 *cmd_datain, /* Address of datain/dataout code */ - *cmd_dataout; /* Incremented as we assemble */ -#ifdef notyet - unsigned char *msgptr; /* Current byte in select message */ - int msglen; /* Length of whole select message */ -#endif - unsigned long flags; - u32 exp_select_indirect; /* Used in sanity check */ - NCR53c7x0_local_setup(cmd->device->host); - - if (!(tmp = allocate_cmd (cmd))) - return NULL; - - /* - * Copy CDB and initialised result fields from Scsi_Cmnd to NCR53c7x0_cmd. - * We do this because NCR53c7x0_cmd may have a special cache mode - * selected to cope with lack of bus snooping, etc. - */ - - memcpy(tmp->cmnd, cmd->cmnd, 12); - tmp->result = cmd->result; - - /* - * Decide whether we need to generate commands for DATA IN, - * DATA OUT, neither, or both based on the SCSI command - */ - - switch (cmd->cmnd[0]) { - /* These commands do DATA IN */ - case INQUIRY: - case MODE_SENSE: - case READ_6: - case READ_10: - case READ_CAPACITY: - case REQUEST_SENSE: - case READ_BLOCK_LIMITS: - case READ_TOC: - datain = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3; - dataout = 0; - break; - /* These commands do DATA OUT */ - case MODE_SELECT: - case WRITE_6: - case WRITE_10: -#if 0 - printk("scsi%d : command is ", host->host_no); - __scsi_print_command(cmd->cmnd); -#endif -#if 0 - printk ("scsi%d : %d scatter/gather segments\n", host->host_no, - cmd->use_sg); -#endif - datain = 0; - dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3; -#if 0 - hostdata->options |= OPTION_DEBUG_INTR; -#endif - break; - /* - * These commands do no data transfer, we should force an - * interrupt if a data phase is attempted on them. - */ - case TEST_UNIT_READY: - case ALLOW_MEDIUM_REMOVAL: - case START_STOP: - datain = dataout = 0; - break; - /* - * We don't know about these commands, so generate code to handle - * both DATA IN and DATA OUT phases. More efficient to identify them - * and add them to the above cases. - */ - default: - printk("scsi%d : datain+dataout for command ", host->host_no); - __scsi_print_command(cmd->cmnd); - datain = dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3; - } - - /* - * New code : so that active pointers work correctly regardless - * of where the saved data pointer is at, we want to immediately - * enter the dynamic code after selection, and on a non-data - * phase perform a CALL to the non-data phase handler, with - * returns back to this address. - * - * If a phase mismatch is encountered in the middle of a - * Block MOVE instruction, we want to _leave_ that instruction - * unchanged as the current case is, modify a temporary buffer, - * and point the active pointer (TEMP) at that. - * - * Furthermore, we want to implement a saved data pointer, - * set by the SAVE_DATA_POINTERs message. - * - * So, the data transfer segments will change to - * CALL data_transfer, WHEN NOT data phase - * MOVE x, x, WHEN data phase - * ( repeat ) - * JUMP other_transfer - */ - - data_transfer_instructions = datain + dataout; - - /* - * When we perform a request sense, we overwrite various things, - * including the data transfer code. Make sure we have enough - * space to do that. - */ - - if (data_transfer_instructions < 2) - data_transfer_instructions = 2; - - - /* - * The saved data pointer is set up so that a RESTORE POINTERS message - * will start the data transfer over at the beginning. - */ - - tmp->saved_data_pointer = virt_to_bus (hostdata->script) + - hostdata->E_data_transfer; - - /* - * Initialize Linux specific fields. - */ - - tmp->cmd = cmd; - tmp->next = NULL; - tmp->flags = 0; - tmp->dsa_next_addr = virt_to_bus(tmp->dsa) + hostdata->dsa_next - - hostdata->dsa_start; - tmp->dsa_addr = virt_to_bus(tmp->dsa) - hostdata->dsa_start; - - /* - * Calculate addresses of dynamic code to fill in DSA - */ - - tmp->data_transfer_start = tmp->dsa + (hostdata->dsa_end - - hostdata->dsa_start) / sizeof(u32); - tmp->data_transfer_end = tmp->data_transfer_start + - 2 * data_transfer_instructions; - - cmd_datain = datain ? tmp->data_transfer_start : NULL; - cmd_dataout = dataout ? (datain ? cmd_datain + 2 * datain : tmp-> - data_transfer_start) : NULL; - - /* - * Fill in the NCR53c7x0_cmd structure as follows - * dsa, with fixed up DSA code - * datain code - * dataout code - */ - - /* Copy template code into dsa and perform all necessary fixups */ - if (hostdata->dsa_fixup) - hostdata->dsa_fixup(tmp); - - patch_dsa_32(tmp->dsa, dsa_next, 0, 0); - /* - * XXX is this giving 53c710 access to the Scsi_Cmnd in some way? - * Do we need to change it for caching reasons? - */ - patch_dsa_32(tmp->dsa, dsa_cmnd, 0, virt_to_bus(cmd)); - - if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) { - - exp_select_indirect = ((1 << cmd->device->id) << 16) | - (hostdata->sync[cmd->device->id].sxfer_sanity << 8); - - if (hostdata->sync[cmd->device->id].select_indirect != - exp_select_indirect) { - printk ("scsi%d : sanity check failed select_indirect=0x%x\n", - host->host_no, hostdata->sync[cmd->device->id].select_indirect); - FATAL(host); - - } - } - - patch_dsa_32(tmp->dsa, dsa_select, 0, - hostdata->sync[cmd->device->id].select_indirect); - - /* - * Right now, we'll do the WIDE and SYNCHRONOUS negotiations on - * different commands; although it should be trivial to do them - * both at the same time. - */ - if (hostdata->initiate_wdtr & (1 << cmd->device->id)) { - memcpy ((void *) (tmp->select + 1), (void *) wdtr_message, - sizeof(wdtr_message)); - patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(wdtr_message)); - local_irq_save(flags); - hostdata->initiate_wdtr &= ~(1 << cmd->device->id); - local_irq_restore(flags); - } else if (hostdata->initiate_sdtr & (1 << cmd->device->id)) { - memcpy ((void *) (tmp->select + 1), (void *) sdtr_message, - sizeof(sdtr_message)); - patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(sdtr_message)); - tmp->flags |= CMD_FLAG_SDTR; - local_irq_save(flags); - hostdata->initiate_sdtr &= ~(1 << cmd->device->id); - local_irq_restore(flags); - - } -#if 1 - else if (!(hostdata->talked_to & (1 << cmd->device->id)) && - !(hostdata->options & OPTION_NO_ASYNC)) { - - memcpy ((void *) (tmp->select + 1), (void *) async_message, - sizeof(async_message)); - patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(async_message)); - tmp->flags |= CMD_FLAG_SDTR; - } -#endif - else - patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1); - - hostdata->talked_to |= (1 << cmd->device->id); - tmp->select[0] = (hostdata->options & OPTION_DISCONNECT) ? - IDENTIFY (1, cmd->device->lun) : IDENTIFY (0, cmd->device->lun); - patch_dsa_32(tmp->dsa, dsa_msgout, 1, virt_to_bus(tmp->select)); - patch_dsa_32(tmp->dsa, dsa_cmdout, 0, cmd->cmd_len); - patch_dsa_32(tmp->dsa, dsa_cmdout, 1, virt_to_bus(tmp->cmnd)); - patch_dsa_32(tmp->dsa, dsa_dataout, 0, cmd_dataout ? - virt_to_bus (cmd_dataout) - : virt_to_bus (hostdata->script) + hostdata->E_other_transfer); - patch_dsa_32(tmp->dsa, dsa_datain, 0, cmd_datain ? - virt_to_bus (cmd_datain) - : virt_to_bus (hostdata->script) + hostdata->E_other_transfer); - /* - * XXX - need to make endian aware, should use separate variables - * for both status and message bytes. - */ - patch_dsa_32(tmp->dsa, dsa_msgin, 0, 1); -/* - * FIXME : these only works for little endian. We probably want to - * provide message and status fields in the NCR53c7x0_cmd - * structure, and assign them to cmd->result when we're done. - */ -#ifdef BIG_ENDIAN - patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 2); - patch_dsa_32(tmp->dsa, dsa_status, 0, 1); - patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result) + 3); -#else - patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 1); - patch_dsa_32(tmp->dsa, dsa_status, 0, 1); - patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result)); -#endif - patch_dsa_32(tmp->dsa, dsa_msgout_other, 0, 1); - patch_dsa_32(tmp->dsa, dsa_msgout_other, 1, - virt_to_bus(&(hostdata->NCR53c7xx_msg_nop))); - - /* - * Generate code for zero or more of the DATA IN, DATA OUT phases - * in the format - * - * CALL data_transfer, WHEN NOT phase - * MOVE first buffer length, first buffer address, WHEN phase - * ... - * MOVE last buffer length, last buffer address, WHEN phase - * JUMP other_transfer - */ - -/* - * See if we're getting to data transfer by generating an unconditional - * interrupt. - */ -#if 0 - if (datain) { - cmd_datain[0] = 0x98080000; - cmd_datain[1] = 0x03ffd00d; - cmd_datain += 2; - } -#endif - -/* - * XXX - I'm undecided whether all of this nonsense is faster - * in the long run, or whether I should just go and implement a loop - * on the NCR chip using table indirect mode? - * - * In any case, this is how it _must_ be done for 53c700/700-66 chips, - * so this stays even when we come up with something better. - * - * When we're limited to 1 simultaneous command, no overlapping processing, - * we're seeing 630K/sec, with 7% CPU usage on a slow Syquest 45M - * drive. - * - * Not bad, not good. We'll see. - */ - - tmp->bounce.len = 0; /* Assume aligned buffer */ - - for (i = 0; cmd->use_sg ? (i < cmd->use_sg) : !i; cmd_datain += 4, - cmd_dataout += 4, ++i) { - u32 vbuf = cmd->use_sg - ? (u32)page_address(((struct scatterlist *)cmd->request_buffer)[i].page)+ - ((struct scatterlist *)cmd->request_buffer)[i].offset - : (u32)(cmd->request_buffer); - u32 bbuf = virt_to_bus((void *)vbuf); - u32 count = cmd->use_sg ? - ((struct scatterlist *)cmd->request_buffer)[i].length : - cmd->request_bufflen; - - /* - * If we have buffers which are not aligned with 16 byte cache - * lines, then we just hope nothing accesses the other parts of - * those cache lines while the transfer is in progress. That would - * fill the cache, and subsequent reads of the dma data would pick - * up the wrong thing. - * XXX We need a bounce buffer to handle that correctly. - */ - - if (((bbuf & 15) || (count & 15)) && (datain || dataout)) - { - /* Bounce buffer needed */ - if (cmd->use_sg) - printk ("53c7xx: Non-aligned buffer with use_sg\n"); - else if (datain && dataout) - printk ("53c7xx: Non-aligned buffer with datain && dataout\n"); - else if (count > 256) - printk ("53c7xx: Non-aligned transfer > 256 bytes\n"); - else - { - if (datain) - { - tmp->bounce.len = count; - tmp->bounce.addr = vbuf; - bbuf = virt_to_bus(tmp->bounce.buf); - tmp->bounce.buf[0] = 0xff; - tmp->bounce.buf[1] = 0xfe; - tmp->bounce.buf[2] = 0xfd; - tmp->bounce.buf[3] = 0xfc; - } - if (dataout) - { - memcpy ((void *)tmp->bounce.buf, (void *)vbuf, count); - bbuf = virt_to_bus(tmp->bounce.buf); - } - } - } - - if (datain) { - cache_clear(virt_to_phys((void *)vbuf), count); - /* CALL other_in, WHEN NOT DATA_IN */ - cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL | - DCMD_TCI_IO) << 24) | - DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE; - cmd_datain[1] = virt_to_bus (hostdata->script) + - hostdata->E_other_in; - /* MOVE count, buf, WHEN DATA_IN */ - cmd_datain[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | DCMD_BMI_IO) - << 24) | count; - cmd_datain[3] = bbuf; -#if 0 - print_insn (host, cmd_datain, "dynamic ", 1); - print_insn (host, cmd_datain + 2, "dynamic ", 1); -#endif - } - if (dataout) { - cache_push(virt_to_phys((void *)vbuf), count); - /* CALL other_out, WHEN NOT DATA_OUT */ - cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL) << 24) | - DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE; - cmd_dataout[1] = virt_to_bus(hostdata->script) + - hostdata->E_other_out; - /* MOVE count, buf, WHEN DATA+OUT */ - cmd_dataout[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I) << 24) - | count; - cmd_dataout[3] = bbuf; -#if 0 - print_insn (host, cmd_dataout, "dynamic ", 1); - print_insn (host, cmd_dataout + 2, "dynamic ", 1); -#endif - } - } - - /* - * Install JUMP instructions after the data transfer routines to return - * control to the do_other_transfer routines. - */ - - - if (datain) { - cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) | - DBC_TCI_TRUE; - cmd_datain[1] = virt_to_bus(hostdata->script) + - hostdata->E_other_transfer; -#if 0 - print_insn (host, cmd_datain, "dynamic jump ", 1); -#endif - cmd_datain += 2; - } -#if 0 - if (datain) { - cmd_datain[0] = 0x98080000; - cmd_datain[1] = 0x03ffdeed; - cmd_datain += 2; - } -#endif - if (dataout) { - cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) | - DBC_TCI_TRUE; - cmd_dataout[1] = virt_to_bus(hostdata->script) + - hostdata->E_other_transfer; -#if 0 - print_insn (host, cmd_dataout, "dynamic jump ", 1); -#endif - cmd_dataout += 2; - } - - return tmp; -} - -/* - * Function : int NCR53c7xx_queue_command (Scsi_Cmnd *cmd, - * void (*done)(Scsi_Cmnd *)) - * - * Purpose : enqueues a SCSI command - * - * Inputs : cmd - SCSI command, done - function called on completion, with - * a pointer to the command descriptor. - * - * Returns : 0 - * - * Side effects : - * cmd is added to the per instance driver issue_queue, with major - * twiddling done to the host specific fields of cmd. If the - * process_issue_queue coroutine isn't running, it is restarted. - * - * NOTE : we use the host_scribble field of the Scsi_Cmnd structure to - * hold our own data, and pervert the ptr field of the SCp field - * to create a linked list. - */ - -int -NCR53c7xx_queue_command (Scsi_Cmnd *cmd, void (* done)(Scsi_Cmnd *)) { - struct Scsi_Host *host = cmd->device->host; - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) host->hostdata[0]; - unsigned long flags; - Scsi_Cmnd *tmp; - - cmd->scsi_done = done; - cmd->host_scribble = NULL; - cmd->SCp.ptr = NULL; - cmd->SCp.buffer = NULL; - -#ifdef VALID_IDS - /* Ignore commands on invalid IDs */ - if (!hostdata->valid_ids[cmd->device->id]) { - printk("scsi%d : ignoring target %d lun %d\n", host->host_no, - cmd->device->id, cmd->device->lun); - cmd->result = (DID_BAD_TARGET << 16); - done(cmd); - return 0; - } -#endif - - local_irq_save(flags); - if ((hostdata->options & (OPTION_DEBUG_INIT_ONLY|OPTION_DEBUG_PROBE_ONLY)) - || ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) && - !(hostdata->debug_lun_limit[cmd->device->id] & (1 << cmd->device->lun))) -#ifdef LINUX_1_2 - || cmd->device->id > 7 -#else - || cmd->device->id >= host->max_id -#endif - || cmd->device->id == host->this_id - || hostdata->state == STATE_DISABLED) { - printk("scsi%d : disabled or bad target %d lun %d\n", host->host_no, - cmd->device->id, cmd->device->lun); - cmd->result = (DID_BAD_TARGET << 16); - done(cmd); - local_irq_restore(flags); - return 0; - } - - if ((hostdata->options & OPTION_DEBUG_NCOMMANDS_LIMIT) && - (hostdata->debug_count_limit == 0)) { - printk("scsi%d : maximum commands exceeded\n", host->host_no); - cmd->result = (DID_BAD_TARGET << 16); - done(cmd); - local_irq_restore(flags); - return 0; - } - - if (hostdata->options & OPTION_DEBUG_READ_ONLY) { - switch (cmd->cmnd[0]) { - case WRITE_6: - case WRITE_10: - printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", - host->host_no); - cmd->result = (DID_BAD_TARGET << 16); - done(cmd); - local_irq_restore(flags); - return 0; - } - } - - if ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) && - hostdata->debug_count_limit != -1) - --hostdata->debug_count_limit; - - cmd->result = 0xffff; /* The NCR will overwrite message - and status with valid data */ - cmd->host_scribble = (unsigned char *) tmp = create_cmd (cmd); - - /* - * REQUEST SENSE commands are inserted at the head of the queue - * so that we do not clear the contingent allegiance condition - * they may be looking at. - */ - - if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) { - cmd->SCp.ptr = (unsigned char *) hostdata->issue_queue; - hostdata->issue_queue = cmd; - } else { - for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp->SCp.ptr; - tmp = (Scsi_Cmnd *) tmp->SCp.ptr); - tmp->SCp.ptr = (unsigned char *) cmd; - } - local_irq_restore(flags); - run_process_issue_queue(); - return 0; -} - -/* - * Function : void to_schedule_list (struct Scsi_Host *host, - * struct NCR53c7x0_hostdata * hostdata, Scsi_Cmnd *cmd) - * - * Purpose : takes a SCSI command which was just removed from the - * issue queue, and deals with it by inserting it in the first - * free slot in the schedule list or by terminating it immediately. - * - * Inputs : - * host - SCSI host adapter; hostdata - hostdata structure for - * this adapter; cmd - a pointer to the command; should have - * the host_scribble field initialized to point to a valid - * - * Side effects : - * cmd is added to the per instance schedule list, with minor - * twiddling done to the host specific fields of cmd. - * - */ - -static __inline__ void -to_schedule_list (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata, - struct NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - Scsi_Cmnd *tmp = cmd->cmd; - unsigned long flags; - /* dsa start is negative, so subtraction is used */ - volatile u32 *ncrcurrent; - - int i; - NCR53c7x0_local_setup(host); -#if 0 - printk("scsi%d : new dsa is 0x%lx (virt 0x%p)\n", host->host_no, - virt_to_bus(hostdata->dsa), hostdata->dsa); -#endif - - local_irq_save(flags); - - /* - * Work around race condition : if an interrupt fired and we - * got disabled forget about this command. - */ - - if (hostdata->state == STATE_DISABLED) { - printk("scsi%d : driver disabled\n", host->host_no); - tmp->result = (DID_BAD_TARGET << 16); - cmd->next = (struct NCR53c7x0_cmd *) hostdata->free; - hostdata->free = cmd; - tmp->scsi_done(tmp); - local_irq_restore(flags); - return; - } - - for (i = host->can_queue, ncrcurrent = hostdata->schedule; - i > 0 && ncrcurrent[0] != hostdata->NOP_insn; - --i, ncrcurrent += 2 /* JUMP instructions are two words */); - - if (i > 0) { - ++hostdata->busy[tmp->device->id][tmp->device->lun]; - cmd->next = hostdata->running_list; - hostdata->running_list = cmd; - - /* Restore this instruction to a NOP once the command starts */ - cmd->dsa [(hostdata->dsa_jump_dest - hostdata->dsa_start) / - sizeof(u32)] = (u32) virt_to_bus ((void *)ncrcurrent); - /* Replace the current jump operand. */ - ncrcurrent[1] = - virt_to_bus ((void *) cmd->dsa) + hostdata->E_dsa_code_begin - - hostdata->E_dsa_code_template; - /* Replace the NOP instruction with a JUMP */ - ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) | - DBC_TCI_TRUE; - } else { - printk ("scsi%d: no free slot\n", host->host_no); - disable(host); - tmp->result = (DID_ERROR << 16); - cmd->next = (struct NCR53c7x0_cmd *) hostdata->free; - hostdata->free = cmd; - tmp->scsi_done(tmp); - local_irq_restore(flags); - return; - } - - /* - * If the NCR chip is in an idle state, start it running the scheduler - * immediately. Otherwise, signal the chip to jump to schedule as - * soon as it is idle. - */ - - if (hostdata->idle) { - hostdata->idle = 0; - hostdata->state = STATE_RUNNING; - NCR53c7x0_write32 (DSP_REG, virt_to_bus ((void *)hostdata->schedule)); - if (hostdata->options & OPTION_DEBUG_TRACE) - NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | - DCNTL_SSM | DCNTL_STD); - } else { - NCR53c7x0_write8(hostdata->istat, ISTAT_10_SIGP); - } - - local_irq_restore(flags); -} - -/* - * Function : busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata - * *hostdata, Scsi_Cmnd *cmd) - * - * Purpose : decide if we can pass the given SCSI command on to the - * device in question or not. - * - * Returns : non-zero when we're busy, 0 when we aren't. - */ - -static __inline__ int -busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata, - Scsi_Cmnd *cmd) { - /* FIXME : in the future, this needs to accommodate SCSI-II tagged - queuing, and we may be able to play with fairness here a bit. - */ - return hostdata->busy[cmd->device->id][cmd->device->lun]; -} - -/* - * Function : process_issue_queue (void) - * - * Purpose : transfer commands from the issue queue to NCR start queue - * of each NCR53c7/8xx in the system, avoiding kernel stack - * overflows when the scsi_done() function is invoked recursively. - * - * NOTE : process_issue_queue exits with interrupts *disabled*, so the - * caller must reenable them if it desires. - * - * NOTE : process_issue_queue should be called from both - * NCR53c7x0_queue_command() and from the interrupt handler - * after command completion in case NCR53c7x0_queue_command() - * isn't invoked again but we've freed up resources that are - * needed. - */ - -static void -process_issue_queue (unsigned long flags) { - Scsi_Cmnd *tmp, *prev; - struct Scsi_Host *host; - struct NCR53c7x0_hostdata *hostdata; - int done; - - /* - * We run (with interrupts disabled) until we're sure that none of - * the host adapters have anything that can be done, at which point - * we set process_issue_queue_running to 0 and exit. - * - * Interrupts are enabled before doing various other internal - * instructions, after we've decided that we need to run through - * the loop again. - * - */ - - do { - local_irq_disable(); /* Freeze request queues */ - done = 1; - for (host = first_host; host && host->hostt == the_template; - host = host->next) { - hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0]; - local_irq_disable(); - if (hostdata->issue_queue) { - if (hostdata->state == STATE_DISABLED) { - tmp = (Scsi_Cmnd *) hostdata->issue_queue; - hostdata->issue_queue = (Scsi_Cmnd *) tmp->SCp.ptr; - tmp->result = (DID_BAD_TARGET << 16); - if (tmp->host_scribble) { - ((struct NCR53c7x0_cmd *)tmp->host_scribble)->next = - hostdata->free; - hostdata->free = - (struct NCR53c7x0_cmd *)tmp->host_scribble; - tmp->host_scribble = NULL; - } - tmp->scsi_done (tmp); - done = 0; - } else - for (tmp = (Scsi_Cmnd *) hostdata->issue_queue, - prev = NULL; tmp; prev = tmp, tmp = (Scsi_Cmnd *) - tmp->SCp.ptr) - if (!tmp->host_scribble || - !busyp (host, hostdata, tmp)) { - if (prev) - prev->SCp.ptr = tmp->SCp.ptr; - else - hostdata->issue_queue = (Scsi_Cmnd *) - tmp->SCp.ptr; - tmp->SCp.ptr = NULL; - if (tmp->host_scribble) { - if (hostdata->options & OPTION_DEBUG_QUEUES) - printk ("scsi%d : moving command for target %d lun %d to start list\n", - host->host_no, tmp->device->id, tmp->device->lun); - - - to_schedule_list (host, hostdata, - (struct NCR53c7x0_cmd *) - tmp->host_scribble); - } else { - if (((tmp->result & 0xff) == 0xff) || - ((tmp->result & 0xff00) == 0xff00)) { - printk ("scsi%d : danger Will Robinson!\n", - host->host_no); - tmp->result = DID_ERROR << 16; - disable (host); - } - tmp->scsi_done(tmp); - } - done = 0; - } /* if target/lun is not busy */ - } /* if hostdata->issue_queue */ - if (!done) - local_irq_restore(flags); - } /* for host */ - } while (!done); - process_issue_queue_running = 0; -} - -/* - * Function : static void intr_scsi (struct Scsi_Host *host, - * struct NCR53c7x0_cmd *cmd) - * - * Purpose : handle all SCSI interrupts, indicated by the setting - * of the SIP bit in the ISTAT register. - * - * Inputs : host, cmd - host and NCR command causing the interrupt, cmd - * may be NULL. - */ - -static void -intr_scsi (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) host->hostdata[0]; - unsigned char sstat0_sist0, sist1, /* Registers */ - fatal; /* Did a fatal interrupt - occur ? */ - - NCR53c7x0_local_setup(host); - - fatal = 0; - - sstat0_sist0 = NCR53c7x0_read8(SSTAT0_REG); - sist1 = 0; - - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : SIST0 0x%0x, SIST1 0x%0x\n", host->host_no, - sstat0_sist0, sist1); - - /* 250ms selection timeout */ - if (sstat0_sist0 & SSTAT0_700_STO) { - fatal = 1; - if (hostdata->options & OPTION_DEBUG_INTR) { - printk ("scsi%d : Selection Timeout\n", host->host_no); - if (cmd) { - printk("scsi%d : target %d, lun %d, command ", - host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun); - __scsi_print_command (cmd->cmd->cmnd); - printk("scsi%d : dsp = 0x%x (virt 0x%p)\n", host->host_no, - NCR53c7x0_read32(DSP_REG), - bus_to_virt(NCR53c7x0_read32(DSP_REG))); - } else { - printk("scsi%d : no command\n", host->host_no); - } - } -/* - * XXX - question : how do we want to handle the Illegal Instruction - * interrupt, which may occur before or after the Selection Timeout - * interrupt? - */ - - if (1) { - hostdata->idle = 1; - hostdata->expecting_sto = 0; - - if (hostdata->test_running) { - hostdata->test_running = 0; - hostdata->test_completed = 3; - } else if (cmd) { - abnormal_finished(cmd, DID_BAD_TARGET << 16); - } -#if 0 - hostdata->intrs = 0; -#endif - } - } - -/* - * FIXME : in theory, we can also get a UDC when a STO occurs. - */ - if (sstat0_sist0 & SSTAT0_UDC) { - fatal = 1; - if (cmd) { - printk("scsi%d : target %d lun %d unexpected disconnect\n", - host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun); - print_lots (host); - abnormal_finished(cmd, DID_ERROR << 16); - } else - printk("scsi%d : unexpected disconnect (no command)\n", - host->host_no); - - hostdata->dsp = (u32 *) hostdata->schedule; - hostdata->dsp_changed = 1; - } - - /* SCSI PARITY error */ - if (sstat0_sist0 & SSTAT0_PAR) { - fatal = 1; - if (cmd && cmd->cmd) { - printk("scsi%d : target %d lun %d parity error.\n", - host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun); - abnormal_finished (cmd, DID_PARITY << 16); - } else - printk("scsi%d : parity error\n", host->host_no); - /* Should send message out, parity error */ - - /* XXX - Reduce synchronous transfer rate! */ - hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / - sizeof(u32); - hostdata->dsp_changed = 1; - /* SCSI GROSS error */ - } - - if (sstat0_sist0 & SSTAT0_SGE) { - fatal = 1; - printk("scsi%d : gross error, saved2_dsa = 0x%x\n", host->host_no, - (unsigned int)hostdata->saved2_dsa); - print_lots (host); - - /* - * A SCSI gross error may occur when we have - * - * - A synchronous offset which causes the SCSI FIFO to be overwritten. - * - * - A REQ which causes the maximum synchronous offset programmed in - * the SXFER register to be exceeded. - * - * - A phase change with an outstanding synchronous offset. - * - * - Residual data in the synchronous data FIFO, with a transfer - * other than a synchronous receive is started.$# - */ - - - /* XXX Should deduce synchronous transfer rate! */ - hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / - sizeof(u32); - hostdata->dsp_changed = 1; - /* Phase mismatch */ - } - - if (sstat0_sist0 & SSTAT0_MA) { - fatal = 1; - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : SSTAT0_MA\n", host->host_no); - intr_phase_mismatch (host, cmd); - } - -#if 0 - if (sstat0_sist0 & SIST0_800_RSL) - printk ("scsi%d : Oh no Mr. Bill!\n", host->host_no); -#endif - -/* - * If a fatal SCSI interrupt occurs, we must insure that the DMA and - * SCSI FIFOs were flushed. - */ - - if (fatal) { - if (!hostdata->dstat_valid) { - hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); - hostdata->dstat_valid = 1; - } - - if (!(hostdata->dstat & DSTAT_DFE)) { - printk ("scsi%d : DMA FIFO not empty\n", host->host_no); - /* - * Really need to check this code for 710 RGH. - * Havn't seen any problems, but maybe we should FLUSH before - * clearing sometimes. - */ - NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF); - while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF) - ; - hostdata->dstat |= DSTAT_DFE; - } - } -} - -#ifdef CYCLIC_TRACE - -/* - * The following implements a cyclic log of instructions executed, if you turn - * TRACE on. It will also print the log for you. Very useful when debugging - * 53c710 support, possibly not really needed any more. - */ - -u32 insn_log[4096]; -u32 insn_log_index = 0; - -void log1 (u32 i) -{ - insn_log[insn_log_index++] = i; - if (insn_log_index == 4096) - insn_log_index = 0; -} - -void log_insn (u32 *ip) -{ - log1 ((u32)ip); - log1 (*ip); - log1 (*(ip+1)); - if (((*ip >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) - log1 (*(ip+2)); -} - -void dump_log(void) -{ - int cnt = 0; - int i = insn_log_index; - int size; - struct Scsi_Host *host = first_host; - - while (cnt < 4096) { - printk ("%08x (+%6x): ", insn_log[i], (insn_log[i] - (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4); - if (++i == 4096) - i = 0; - cnt++; - if (((insn_log[i] >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) - size = 3; - else - size = 2; - while (size--) { - printk ("%08x ", insn_log[i]); - if (++i == 4096) - i = 0; - cnt++; - } - printk ("\n"); - } -} -#endif - - -/* - * Function : static void NCR53c7x0_intfly (struct Scsi_Host *host) - * - * Purpose : Scan command queue for specified host, looking for completed - * commands. - * - * Inputs : Scsi_Host pointer. - * - * This is called from the interrupt handler, when a simulated INTFLY - * interrupt occurs. - */ - -static void -NCR53c7x0_intfly (struct Scsi_Host *host) -{ - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata; /* host->hostdata[0] */ - struct NCR53c7x0_cmd *cmd, /* command which halted */ - **cmd_prev_ptr; - unsigned long flags; - char search_found = 0; /* Got at least one ? */ - - hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0]; - NCR53c7x0_local_setup(host); - - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : INTFLY\n", host->host_no); - - /* - * Traverse our list of running commands, and look - * for those with valid (non-0xff ff) status and message - * bytes encoded in the result which signify command - * completion. - */ - - local_irq_save(flags); -restart: - for (cmd_prev_ptr = (struct NCR53c7x0_cmd **)&(hostdata->running_list), - cmd = (struct NCR53c7x0_cmd *) hostdata->running_list; cmd ; - cmd_prev_ptr = (struct NCR53c7x0_cmd **) &(cmd->next), - cmd = (struct NCR53c7x0_cmd *) cmd->next) - { - Scsi_Cmnd *tmp; - - if (!cmd) { - printk("scsi%d : very weird.\n", host->host_no); - break; - } - - if (!(tmp = cmd->cmd)) { - printk("scsi%d : weird. NCR53c7x0_cmd has no Scsi_Cmnd\n", - host->host_no); - continue; - } - /* Copy the result over now; may not be complete, - * but subsequent tests may as well be done on - * cached memory. - */ - tmp->result = cmd->result; - - if (((tmp->result & 0xff) == 0xff) || - ((tmp->result & 0xff00) == 0xff00)) - continue; - - search_found = 1; - - if (cmd->bounce.len) - memcpy ((void *)cmd->bounce.addr, - (void *)cmd->bounce.buf, cmd->bounce.len); - - /* Important - remove from list _before_ done is called */ - if (cmd_prev_ptr) - *cmd_prev_ptr = (struct NCR53c7x0_cmd *) cmd->next; - - --hostdata->busy[tmp->device->id][tmp->device->lun]; - cmd->next = hostdata->free; - hostdata->free = cmd; - - tmp->host_scribble = NULL; - - if (hostdata->options & OPTION_DEBUG_INTR) { - printk ("scsi%d : command complete : pid %lu, id %d,lun %d result 0x%x ", - host->host_no, tmp->pid, tmp->device->id, tmp->device->lun, tmp->result); - __scsi_print_command (tmp->cmnd); - } - - tmp->scsi_done(tmp); - goto restart; - } - local_irq_restore(flags); - - if (!search_found) { - printk ("scsi%d : WARNING : INTFLY with no completed commands.\n", - host->host_no); - } else { - run_process_issue_queue(); - } - return; -} - -/* - * Function : static irqreturn_t NCR53c7x0_intr (int irq, void *dev_id) - * - * Purpose : handle NCR53c7x0 interrupts for all NCR devices sharing - * the same IRQ line. - * - * Inputs : Since we're using the IRQF_DISABLED interrupt handler - * semantics, irq indicates the interrupt which invoked - * this handler. - * - * On the 710 we simualte an INTFLY with a script interrupt, and the - * script interrupt handler will call back to this function. - */ - -static irqreturn_t -NCR53c7x0_intr (int irq, void *dev_id) -{ - NCR53c7x0_local_declare(); - struct Scsi_Host *host; /* Host we are looking at */ - unsigned char istat; /* Values of interrupt regs */ - struct NCR53c7x0_hostdata *hostdata; /* host->hostdata[0] */ - struct NCR53c7x0_cmd *cmd; /* command which halted */ - u32 *dsa; /* DSA */ - int handled = 0; - -#ifdef NCR_DEBUG - char buf[80]; /* Debugging sprintf buffer */ - size_t buflen; /* Length of same */ -#endif - - host = (struct Scsi_Host *)dev_id; - hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0]; - NCR53c7x0_local_setup(host); - - /* - * Only read istat once per loop, since reading it again will unstack - * interrupts - */ - - while ((istat = NCR53c7x0_read8(hostdata->istat)) & (ISTAT_SIP|ISTAT_DIP)) { - handled = 1; - hostdata->dsp_changed = 0; - hostdata->dstat_valid = 0; - hostdata->state = STATE_HALTED; - - if (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) - printk ("scsi%d : SCSI FIFO not empty\n", host->host_no); - - /* - * NCR53c700 and NCR53c700-66 change the current SCSI - * process, hostdata->ncrcurrent, in the Linux driver so - * cmd = hostdata->ncrcurrent. - * - * With other chips, we must look through the commands - * executing and find the command structure which - * corresponds to the DSA register. - */ - - if (hostdata->options & OPTION_700) { - cmd = (struct NCR53c7x0_cmd *) hostdata->ncrcurrent; - } else { - dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); - for (cmd = (struct NCR53c7x0_cmd *) hostdata->running_list; - cmd && (dsa + (hostdata->dsa_start / sizeof(u32))) != cmd->dsa; - cmd = (struct NCR53c7x0_cmd *)(cmd->next)) - ; - } - if (hostdata->options & OPTION_DEBUG_INTR) { - if (cmd) { - printk("scsi%d : interrupt for pid %lu, id %d, lun %d ", - host->host_no, cmd->cmd->pid, (int) cmd->cmd->device->id, - (int) cmd->cmd->device->lun); - __scsi_print_command (cmd->cmd->cmnd); - } else { - printk("scsi%d : no active command\n", host->host_no); - } - } - - if (istat & ISTAT_SIP) { - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : ISTAT_SIP\n", host->host_no); - intr_scsi (host, cmd); - } - - if (istat & ISTAT_DIP) { - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : ISTAT_DIP\n", host->host_no); - intr_dma (host, cmd); - } - - if (!hostdata->dstat_valid) { - hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); - hostdata->dstat_valid = 1; - } - - if (!(hostdata->dstat & DSTAT_DFE)) { - printk ("scsi%d : DMA FIFO not empty\n", host->host_no); - /* Really need to check this out for 710 RGH */ - NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF); - while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF) - ; - hostdata->dstat |= DSTAT_DFE; - } - - if (!hostdata->idle && hostdata->state == STATE_HALTED) { - if (!hostdata->dsp_changed) - hostdata->dsp = (u32 *)bus_to_virt(NCR53c7x0_read32(DSP_REG)); -#if 0 - printk("scsi%d : new dsp is 0x%lx (virt 0x%p)\n", - host->host_no, virt_to_bus(hostdata->dsp), hostdata->dsp); -#endif - - hostdata->state = STATE_RUNNING; - NCR53c7x0_write32 (DSP_REG, virt_to_bus(hostdata->dsp)); - if (hostdata->options & OPTION_DEBUG_TRACE) { -#ifdef CYCLIC_TRACE - log_insn (hostdata->dsp); -#else - print_insn (host, hostdata->dsp, "t ", 1); -#endif - NCR53c7x0_write8 (DCNTL_REG, - hostdata->saved_dcntl | DCNTL_SSM | DCNTL_STD); - } - } - } - return IRQ_HANDLED; -} - - -/* - * Function : static int abort_connected (struct Scsi_Host *host) - * - * Purpose : Assuming that the NCR SCSI processor is currently - * halted, break the currently established nexus. Clean - * up of the NCR53c7x0_cmd and Scsi_Cmnd structures should - * be done on receipt of the abort interrupt. - * - * Inputs : host - SCSI host - * - */ - -static int -abort_connected (struct Scsi_Host *host) { -#ifdef NEW_ABORT - NCR53c7x0_local_declare(); -#endif - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; -/* FIXME : this probably should change for production kernels; at the - least, counter should move to a per-host structure. */ - static int counter = 5; -#ifdef NEW_ABORT - int sstat, phase, offset; - u32 *script; - NCR53c7x0_local_setup(host); -#endif - - if (--counter <= 0) { - disable(host); - return 0; - } - - printk ("scsi%d : DANGER : abort_connected() called \n", - host->host_no); - -#ifdef NEW_ABORT - -/* - * New strategy : Rather than using a generic abort routine, - * we'll specifically try to source or sink the appropriate - * amount of data for the phase we're currently in (taking into - * account the current synchronous offset) - */ - - sstat = NCR53c8x0_read8 (SSTAT2_REG); - offset = OFFSET (sstat & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT; - phase = sstat & SSTAT2_PHASE_MASK; - -/* - * SET ATN - * MOVE source_or_sink, WHEN CURRENT PHASE - * < repeat for each outstanding byte > - * JUMP send_abort_message - */ - - script = hostdata->abort_script = kmalloc ( - 8 /* instruction size */ * ( - 1 /* set ATN */ + - (!offset ? 1 : offset) /* One transfer per outstanding byte */ + - 1 /* send abort message */), - GFP_ATOMIC); - - -#else /* def NEW_ABORT */ - hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / - sizeof(u32); -#endif /* def NEW_ABORT */ - hostdata->dsp_changed = 1; - -/* XXX - need to flag the command as aborted after the abort_connected - code runs - */ - return 0; -} - -/* - * Function : static int datapath_residual (Scsi_Host *host) - * - * Purpose : return residual data count of what's in the chip. - * - * Inputs : host - SCSI host - */ - -static int -datapath_residual (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - int count, synchronous, sstat; - unsigned int ddir; - - NCR53c7x0_local_setup(host); - /* COMPAT : the 700 and 700-66 need to use DFIFO_00_BO_MASK */ - count = ((NCR53c7x0_read8 (DFIFO_REG) & DFIFO_10_BO_MASK) - - (NCR53c7x0_read32 (DBC_REG) & DFIFO_10_BO_MASK)) & DFIFO_10_BO_MASK; - synchronous = NCR53c7x0_read8 (SXFER_REG) & SXFER_MO_MASK; - /* COMPAT : DDIR is elsewhere on non-'8xx chips. */ - ddir = NCR53c7x0_read8 (CTEST0_REG_700) & CTEST0_700_DDIR; - - if (ddir) { - /* Receive */ - if (synchronous) - count += (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT; - else - if (NCR53c7x0_read8 (SSTAT1_REG) & SSTAT1_ILF) - ++count; - } else { - /* Send */ - sstat = NCR53c7x0_read8 (SSTAT1_REG); - if (sstat & SSTAT1_OLF) - ++count; - if (synchronous && (sstat & SSTAT1_ORF)) - ++count; - } - return count; -} - -/* - * Function : static const char * sbcl_to_phase (int sbcl)_ - * - * Purpose : Convert SBCL register to user-parsable phase representation - * - * Inputs : sbcl - value of sbcl register - */ - - -static const char * -sbcl_to_phase (int sbcl) { - switch (sbcl & SBCL_PHASE_MASK) { - case SBCL_PHASE_DATAIN: - return "DATAIN"; - case SBCL_PHASE_DATAOUT: - return "DATAOUT"; - case SBCL_PHASE_MSGIN: - return "MSGIN"; - case SBCL_PHASE_MSGOUT: - return "MSGOUT"; - case SBCL_PHASE_CMDOUT: - return "CMDOUT"; - case SBCL_PHASE_STATIN: - return "STATUSIN"; - default: - return "unknown"; - } -} - -/* - * Function : static const char * sstat2_to_phase (int sstat)_ - * - * Purpose : Convert SSTAT2 register to user-parsable phase representation - * - * Inputs : sstat - value of sstat register - */ - - -static const char * -sstat2_to_phase (int sstat) { - switch (sstat & SSTAT2_PHASE_MASK) { - case SSTAT2_PHASE_DATAIN: - return "DATAIN"; - case SSTAT2_PHASE_DATAOUT: - return "DATAOUT"; - case SSTAT2_PHASE_MSGIN: - return "MSGIN"; - case SSTAT2_PHASE_MSGOUT: - return "MSGOUT"; - case SSTAT2_PHASE_CMDOUT: - return "CMDOUT"; - case SSTAT2_PHASE_STATIN: - return "STATUSIN"; - default: - return "unknown"; - } -} - -/* - * Function : static void intr_phase_mismatch (struct Scsi_Host *host, - * struct NCR53c7x0_cmd *cmd) - * - * Purpose : Handle phase mismatch interrupts - * - * Inputs : host, cmd - host and NCR command causing the interrupt, cmd - * may be NULL. - * - * Side effects : The abort_connected() routine is called or the NCR chip - * is restarted, jumping to the command_complete entry point, or - * patching the address and transfer count of the current instruction - * and calling the msg_in entry point as appropriate. - */ - -static void -intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - u32 dbc_dcmd, *dsp, *dsp_next; - unsigned char dcmd, sbcl; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - int residual; - enum {ACTION_ABORT, ACTION_ABORT_PRINT, ACTION_CONTINUE} action = - ACTION_ABORT_PRINT; - const char *where = NULL; - - NCR53c7x0_local_setup(host); - - /* - * Corrective action is based on where in the SCSI SCRIPT(tm) the error - * occurred, as well as which SCSI phase we are currently in. - */ - dsp_next = bus_to_virt(NCR53c7x0_read32(DSP_REG)); - - /* - * Fetch the current instruction, and remove the operands for easier - * interpretation. - */ - dbc_dcmd = NCR53c7x0_read32(DBC_REG); - dcmd = (dbc_dcmd & 0xff000000) >> 24; - /* - * Like other processors, the NCR adjusts the instruction pointer before - * instruction decode. Set the DSP address back to what it should - * be for this instruction based on its size (2 or 3 32 bit words). - */ - dsp = dsp_next - NCR53c7x0_insn_size(dcmd); - - - /* - * Read new SCSI phase from the SBCL lines. Since all of our code uses - * a WHEN conditional instead of an IF conditional, we don't need to - * wait for a new REQ. - */ - sbcl = NCR53c7x0_read8(SBCL_REG) & SBCL_PHASE_MASK; - - if (!cmd) { - action = ACTION_ABORT_PRINT; - where = "no current command"; - /* - * The way my SCSI SCRIPTS(tm) are architected, recoverable phase - * mismatches should only occur where we're doing a multi-byte - * BMI instruction. Specifically, this means - * - * - select messages (a SCSI-I target may ignore additional messages - * after the IDENTIFY; any target may reject a SDTR or WDTR) - * - * - command out (targets may send a message to signal an error - * condition, or go into STATUSIN after they've decided - * they don't like the command. - * - * - reply_message (targets may reject a multi-byte message in the - * middle) - * - * - data transfer routines (command completion with buffer space - * left, disconnect message, or error message) - */ - } else if (((dsp >= cmd->data_transfer_start && - dsp < cmd->data_transfer_end)) || dsp == (cmd->residual + 2)) { - if ((dcmd & (DCMD_TYPE_MASK|DCMD_BMI_OP_MASK|DCMD_BMI_INDIRECT| - DCMD_BMI_MSG|DCMD_BMI_CD)) == (DCMD_TYPE_BMI| - DCMD_BMI_OP_MOVE_I)) { - residual = datapath_residual (host); - if (hostdata->options & OPTION_DEBUG_DISCONNECT) - printk ("scsi%d : handling residual transfer (+ %d bytes from DMA FIFO)\n", - host->host_no, residual); - - /* - * The first instruction is a CALL to the alternate handler for - * this data transfer phase, so we can do calls to - * munge_msg_restart as we would if control were passed - * from normal dynamic code. - */ - if (dsp != cmd->residual + 2) { - cmd->residual[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL | - ((dcmd & DCMD_BMI_IO) ? DCMD_TCI_IO : 0)) << 24) | - DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE; - cmd->residual[1] = virt_to_bus(hostdata->script) - + ((dcmd & DCMD_BMI_IO) - ? hostdata->E_other_in : hostdata->E_other_out); - } - - /* - * The second instruction is the a data transfer block - * move instruction, reflecting the pointer and count at the - * time of the phase mismatch. - */ - cmd->residual[2] = dbc_dcmd + residual; - cmd->residual[3] = NCR53c7x0_read32(DNAD_REG) - residual; - - /* - * The third and final instruction is a jump to the instruction - * which follows the instruction which had to be 'split' - */ - if (dsp != cmd->residual + 2) { - cmd->residual[4] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) - << 24) | DBC_TCI_TRUE; - cmd->residual[5] = virt_to_bus(dsp_next); - } - - /* - * For the sake of simplicity, transfer control to the - * conditional CALL at the start of the residual buffer. - */ - hostdata->dsp = cmd->residual; - hostdata->dsp_changed = 1; - action = ACTION_CONTINUE; - } else { - where = "non-BMI dynamic DSA code"; - action = ACTION_ABORT_PRINT; - } - } else if (dsp == (hostdata->script + hostdata->E_select_msgout / 4 + 2)) { - /* RGH 290697: Added +2 above, to compensate for the script - * instruction which disables the selection timer. */ - /* Release ATN */ - NCR53c7x0_write8 (SOCL_REG, 0); - switch (sbcl) { - /* - * Some devices (SQ555 come to mind) grab the IDENTIFY message - * sent on selection, and decide to go into COMMAND OUT phase - * rather than accepting the rest of the messages or rejecting - * them. Handle these devices gracefully. - */ - case SBCL_PHASE_CMDOUT: - hostdata->dsp = dsp + 2 /* two _words_ */; - hostdata->dsp_changed = 1; - printk ("scsi%d : target %d ignored SDTR and went into COMMAND OUT\n", - host->host_no, cmd->cmd->device->id); - cmd->flags &= ~CMD_FLAG_SDTR; - action = ACTION_CONTINUE; - break; - case SBCL_PHASE_MSGIN: - hostdata->dsp = hostdata->script + hostdata->E_msg_in / - sizeof(u32); - hostdata->dsp_changed = 1; - action = ACTION_CONTINUE; - break; - default: - where="select message out"; - action = ACTION_ABORT_PRINT; - } - /* - * Some SCSI devices will interpret a command as they read the bytes - * off the SCSI bus, and may decide that the command is Bogus before - * they've read the entire command off the bus. - */ - } else if (dsp == hostdata->script + hostdata->E_cmdout_cmdout / sizeof - (u32)) { - hostdata->dsp = hostdata->script + hostdata->E_data_transfer / - sizeof (u32); - hostdata->dsp_changed = 1; - action = ACTION_CONTINUE; - /* FIXME : we need to handle message reject, etc. within msg_respond. */ -#ifdef notyet - } else if (dsp == hostdata->script + hostdata->E_reply_message) { - switch (sbcl) { - /* Any other phase mismatches abort the currently executing command. */ -#endif - } else { - where = "unknown location"; - action = ACTION_ABORT_PRINT; - } - - /* Flush DMA FIFO */ - if (!hostdata->dstat_valid) { - hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); - hostdata->dstat_valid = 1; - } - if (!(hostdata->dstat & DSTAT_DFE)) { - /* Really need to check this out for 710 RGH */ - NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF); - while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF); - hostdata->dstat |= DSTAT_DFE; - } - - switch (action) { - case ACTION_ABORT_PRINT: - printk("scsi%d : %s : unexpected phase %s.\n", - host->host_no, where ? where : "unknown location", - sbcl_to_phase(sbcl)); - print_lots (host); - /* Fall through to ACTION_ABORT */ - case ACTION_ABORT: - abort_connected (host); - break; - case ACTION_CONTINUE: - break; - } - -#if 0 - if (hostdata->dsp_changed) { - printk("scsi%d: new dsp 0x%p\n", host->host_no, hostdata->dsp); - print_insn (host, hostdata->dsp, "", 1); - } -#endif -} - -/* - * Function : static void intr_bf (struct Scsi_Host *host, - * struct NCR53c7x0_cmd *cmd) - * - * Purpose : handle BUS FAULT interrupts - * - * Inputs : host, cmd - host and NCR command causing the interrupt, cmd - * may be NULL. - */ - -static void -intr_bf (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - u32 *dsp, - *next_dsp, /* Current dsp */ - *dsa, - dbc_dcmd; /* DCMD (high eight bits) + DBC */ - char *reason = NULL; - /* Default behavior is for a silent error, with a retry until we've - exhausted retries. */ - enum {MAYBE, ALWAYS, NEVER} retry = MAYBE; - int report = 0; - NCR53c7x0_local_setup(host); - - dbc_dcmd = NCR53c7x0_read32 (DBC_REG); - next_dsp = bus_to_virt (NCR53c7x0_read32(DSP_REG)); - dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff); -/* FIXME - check chip type */ - dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG)); - - /* - * Bus faults can be caused by either a Bad Address or - * Target Abort. We should check the Received Target Abort - * bit of the PCI status register and Master Abort Bit. - * - * - Master Abort bit indicates that no device claimed - * the address with DEVSEL within five clocks - * - * - Target Abort bit indicates that a target claimed it, - * but changed its mind once it saw the byte enables. - * - */ - - /* 53c710, not PCI system */ - report = 1; - reason = "Unknown"; - -#ifndef notyet - report = 1; -#endif - if (report && reason) - { - printk(KERN_ALERT "scsi%d : BUS FAULT reason = %s\n", - host->host_no, reason ? reason : "unknown"); - print_lots (host); - } - -#ifndef notyet - retry = NEVER; -#endif - - /* - * TODO : we should attempt to recover from any spurious bus - * faults. After X retries, we should figure that things are - * sufficiently wedged, and call NCR53c7xx_reset. - * - * This code should only get executed once we've decided that we - * cannot retry. - */ - - if (retry == NEVER) { - printk(KERN_ALERT " mail richard@sleepie.demon.co.uk\n"); - FATAL (host); - } -} - -/* - * Function : static void intr_dma (struct Scsi_Host *host, - * struct NCR53c7x0_cmd *cmd) - * - * Purpose : handle all DMA interrupts, indicated by the setting - * of the DIP bit in the ISTAT register. - * - * Inputs : host, cmd - host and NCR command causing the interrupt, cmd - * may be NULL. - */ - -static void -intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned char dstat; /* DSTAT */ - u32 *dsp, - *next_dsp, /* Current dsp */ - *dsa, - dbc_dcmd; /* DCMD (high eight bits) + DBC */ - int tmp; - unsigned long flags; - NCR53c7x0_local_setup(host); - - if (!hostdata->dstat_valid) { - hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); - hostdata->dstat_valid = 1; - } - - dstat = hostdata->dstat; - - if (hostdata->options & OPTION_DEBUG_INTR) - printk("scsi%d : DSTAT=0x%x\n", host->host_no, (int) dstat); - - dbc_dcmd = NCR53c7x0_read32 (DBC_REG); - next_dsp = bus_to_virt(NCR53c7x0_read32(DSP_REG)); - dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff); -/* XXX - check chip type */ - dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); - - /* - * DSTAT_ABRT is the aborted interrupt. This is set whenever the - * SCSI chip is aborted. - * - * With NCR53c700 and NCR53c700-66 style chips, we should only - * get this when the chip is currently running the accept - * reselect/select code and we have set the abort bit in the - * ISTAT register. - * - */ - - if (dstat & DSTAT_ABRT) { -#if 0 - /* XXX - add code here to deal with normal abort */ - if ((hostdata->options & OPTION_700) && (hostdata->state == - STATE_ABORTING)) { - } else -#endif - { - printk(KERN_ALERT "scsi%d : unexpected abort interrupt at\n" - " ", host->host_no); - print_insn (host, dsp, KERN_ALERT "s ", 1); - FATAL (host); - } - } - - /* - * DSTAT_SSI is the single step interrupt. Should be generated - * whenever we have single stepped or are tracing. - */ - - if (dstat & DSTAT_SSI) { - if (hostdata->options & OPTION_DEBUG_TRACE) { - /* Don't print instr. until we write DSP at end of intr function */ - } else if (hostdata->options & OPTION_DEBUG_SINGLE) { - print_insn (host, dsp, "s ", 0); - local_irq_save(flags); -/* XXX - should we do this, or can we get away with writing dsp? */ - - NCR53c7x0_write8 (DCNTL_REG, (NCR53c7x0_read8(DCNTL_REG) & - ~DCNTL_SSM) | DCNTL_STD); - local_irq_restore(flags); - } else { - printk(KERN_ALERT "scsi%d : unexpected single step interrupt at\n" - " ", host->host_no); - print_insn (host, dsp, KERN_ALERT "", 1); - printk(KERN_ALERT " mail drew@PoohSticks.ORG\n"); - FATAL (host); - } - } - - /* - * DSTAT_IID / DSTAT_OPC (same bit, same meaning, only the name - * is different) is generated whenever an illegal instruction is - * encountered. - * - * XXX - we may want to emulate INTFLY here, so we can use - * the same SCSI SCRIPT (tm) for NCR53c710 through NCR53c810 - * chips. - */ - - if (dstat & DSTAT_OPC) { - /* - * Ascertain if this IID interrupts occurred before or after a STO - * interrupt. Since the interrupt handling code now leaves - * DSP unmodified until _after_ all stacked interrupts have been - * processed, reading the DSP returns the original DSP register. - * This means that if dsp lies between the select code, and - * message out following the selection code (where the IID interrupt - * would have to have occurred by due to the implicit wait for REQ), - * we have an IID interrupt resulting from a STO condition and - * can ignore it. - */ - - if (((dsp >= (hostdata->script + hostdata->E_select / sizeof(u32))) && - (dsp <= (hostdata->script + hostdata->E_select_msgout / - sizeof(u32) + 8))) || (hostdata->test_running == 2)) { - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : ignoring DSTAT_IID for SSTAT_STO\n", - host->host_no); - if (hostdata->expecting_iid) { - hostdata->expecting_iid = 0; - hostdata->idle = 1; - if (hostdata->test_running == 2) { - hostdata->test_running = 0; - hostdata->test_completed = 3; - } else if (cmd) - abnormal_finished (cmd, DID_BAD_TARGET << 16); - } else { - hostdata->expecting_sto = 1; - } - /* - * We can't guarantee we'll be able to execute the WAIT DISCONNECT - * instruction within the 3.4us of bus free and arbitration delay - * that a target can RESELECT in and assert REQ after we've dropped - * ACK. If this happens, we'll get an illegal instruction interrupt. - * Doing away with the WAIT DISCONNECT instructions broke everything, - * so instead I'll settle for moving one WAIT DISCONNECT a few - * instructions closer to the CLEAR ACK before it to minimize the - * chances of this happening, and handle it if it occurs anyway. - * - * Simply continue with what we were doing, and control should - * be transferred to the schedule routine which will ultimately - * pass control onto the reselection or selection (not yet) - * code. - */ - } else if (dbc_dcmd == 0x48000000 && (NCR53c7x0_read8 (SBCL_REG) & - SBCL_REQ)) { - if (!(hostdata->options & OPTION_NO_PRINT_RACE)) - { - printk("scsi%d: REQ before WAIT DISCONNECT IID\n", - host->host_no); - hostdata->options |= OPTION_NO_PRINT_RACE; - } - } else { - printk(KERN_ALERT "scsi%d : invalid instruction\n", host->host_no); - print_lots (host); - printk(KERN_ALERT " mail Richard@sleepie.demon.co.uk with ALL\n" - " boot messages and diagnostic output\n"); - FATAL (host); - } - } - - /* - * DSTAT_BF are bus fault errors. DSTAT_800_BF is valid for 710 also. - */ - - if (dstat & DSTAT_800_BF) { - intr_bf (host, cmd); - } - - - /* - * DSTAT_SIR interrupts are generated by the execution of - * the INT instruction. Since the exact values available - * are determined entirely by the SCSI script running, - * and are local to a particular script, a unique handler - * is called for each script. - */ - - if (dstat & DSTAT_SIR) { - if (hostdata->options & OPTION_DEBUG_INTR) - printk ("scsi%d : DSTAT_SIR\n", host->host_no); - switch ((tmp = hostdata->dstat_sir_intr (host, cmd))) { - case SPECIFIC_INT_NOTHING: - case SPECIFIC_INT_RESTART: - break; - case SPECIFIC_INT_ABORT: - abort_connected(host); - break; - case SPECIFIC_INT_PANIC: - printk(KERN_ALERT "scsi%d : failure at ", host->host_no); - print_insn (host, dsp, KERN_ALERT "", 1); - printk(KERN_ALERT " dstat_sir_intr() returned SPECIFIC_INT_PANIC\n"); - FATAL (host); - break; - case SPECIFIC_INT_BREAK: - intr_break (host, cmd); - break; - default: - printk(KERN_ALERT "scsi%d : failure at ", host->host_no); - print_insn (host, dsp, KERN_ALERT "", 1); - printk(KERN_ALERT" dstat_sir_intr() returned unknown value %d\n", - tmp); - FATAL (host); - } - } -} - -/* - * Function : static int print_insn (struct Scsi_Host *host, - * u32 *insn, int kernel) - * - * Purpose : print numeric representation of the instruction pointed - * to by insn to the debugging or kernel message buffer - * as appropriate. - * - * If desired, a user level program can interpret this - * information. - * - * Inputs : host, insn - host, pointer to instruction, prefix - - * string to prepend, kernel - use printk instead of debugging buffer. - * - * Returns : size, in u32s, of instruction printed. - */ - -/* - * FIXME: should change kernel parameter so that it takes an ENUM - * specifying severity - either KERN_ALERT or KERN_PANIC so - * all panic messages are output with the same severity. - */ - -static int -print_insn (struct Scsi_Host *host, const u32 *insn, - const char *prefix, int kernel) { - char buf[160], /* Temporary buffer and pointer. ICKY - arbitrary length. */ - - - *tmp; - unsigned char dcmd; /* dcmd register for *insn */ - int size; - - /* - * Check to see if the instruction pointer is not bogus before - * indirecting through it; avoiding red-zone at start of - * memory. - * - * FIXME: icky magic needs to happen here on non-intel boxes which - * don't have kernel memory mapped in like this. Might be reasonable - * to use vverify()? - */ - - if (virt_to_phys((void *)insn) < PAGE_SIZE || - virt_to_phys((void *)(insn + 8)) > virt_to_phys(high_memory) || - ((((dcmd = (insn[0] >> 24) & 0xff) & DCMD_TYPE_MMI) == DCMD_TYPE_MMI) && - virt_to_phys((void *)(insn + 12)) > virt_to_phys(high_memory))) { - size = 0; - sprintf (buf, "%s%p: address out of range\n", - prefix, insn); - } else { -/* - * FIXME : (void *) cast in virt_to_bus should be unnecessary, because - * it should take const void * as argument. - */ -#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000) - sprintf(buf, "%s0x%lx (virt 0x%p) : 0x%08x 0x%08x (virt 0x%p)", - (prefix ? prefix : ""), virt_to_bus((void *) insn), insn, - insn[0], insn[1], bus_to_virt (insn[1])); -#else - /* Remove virtual addresses to reduce output, as they are the same */ - sprintf(buf, "%s0x%x (+%x) : 0x%08x 0x%08x", - (prefix ? prefix : ""), (u32)insn, ((u32)insn - - (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4, - insn[0], insn[1]); -#endif - tmp = buf + strlen(buf); - if ((dcmd & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) { -#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000) - sprintf (tmp, " 0x%08x (virt 0x%p)\n", insn[2], - bus_to_virt(insn[2])); -#else - /* Remove virtual addr to reduce output, as it is the same */ - sprintf (tmp, " 0x%08x\n", insn[2]); -#endif - size = 3; - } else { - sprintf (tmp, "\n"); - size = 2; - } - } - - if (kernel) - printk ("%s", buf); -#ifdef NCR_DEBUG - else { - size_t len = strlen(buf); - debugger_kernel_write(host, buf, len); - } -#endif - return size; -} - -/* - * Function : int NCR53c7xx_abort (Scsi_Cmnd *cmd) - * - * Purpose : Abort an errant SCSI command, doing all necessary - * cleanup of the issue_queue, running_list, shared Linux/NCR - * dsa issue and reconnect queues. - * - * Inputs : cmd - command to abort, code - entire result field - * - * Returns : 0 on success, -1 on failure. - */ - -int -NCR53c7xx_abort (Scsi_Cmnd *cmd) { - NCR53c7x0_local_declare(); - struct Scsi_Host *host = cmd->device->host; - struct NCR53c7x0_hostdata *hostdata = host ? (struct NCR53c7x0_hostdata *) - host->hostdata[0] : NULL; - unsigned long flags; - struct NCR53c7x0_cmd *curr, **prev; - Scsi_Cmnd *me, **last; -#if 0 - static long cache_pid = -1; -#endif - - - if (!host) { - printk ("Bogus SCSI command pid %ld; no host structure\n", - cmd->pid); - return SCSI_ABORT_ERROR; - } else if (!hostdata) { - printk ("Bogus SCSI host %d; no hostdata\n", host->host_no); - return SCSI_ABORT_ERROR; - } - NCR53c7x0_local_setup(host); - -/* - * CHECK : I don't think that reading ISTAT will unstack any interrupts, - * since we need to write the INTF bit to clear it, and SCSI/DMA - * interrupts don't clear until we read SSTAT/SIST and DSTAT registers. - * - * See that this is the case. Appears to be correct on the 710, at least. - * - * I suspect that several of our failures may be coming from a new fatal - * interrupt (possibly due to a phase mismatch) happening after we've left - * the interrupt handler, but before the PIC has had the interrupt condition - * cleared. - */ - - if (NCR53c7x0_read8(hostdata->istat) & (ISTAT_DIP|ISTAT_SIP)) { - printk ("scsi%d : dropped interrupt for command %ld\n", host->host_no, - cmd->pid); - NCR53c7x0_intr (host->irq, NULL, NULL); - return SCSI_ABORT_BUSY; - } - - local_irq_save(flags); -#if 0 - if (cache_pid == cmd->pid) - panic ("scsi%d : bloody fetus %d\n", host->host_no, cmd->pid); - else - cache_pid = cmd->pid; -#endif - - -/* - * The command could be hiding in the issue_queue. This would be very - * nice, as commands can't be moved from the high level driver's issue queue - * into the shared queue until an interrupt routine is serviced, and this - * moving is atomic. - * - * If this is the case, we don't have to worry about anything - we simply - * pull the command out of the old queue, and call it aborted. - */ - - for (me = (Scsi_Cmnd *) hostdata->issue_queue, - last = (Scsi_Cmnd **) &(hostdata->issue_queue); - me && me != cmd; last = (Scsi_Cmnd **)&(me->SCp.ptr), - me = (Scsi_Cmnd *)me->SCp.ptr); - - if (me) { - *last = (Scsi_Cmnd *) me->SCp.ptr; - if (me->host_scribble) { - ((struct NCR53c7x0_cmd *)me->host_scribble)->next = hostdata->free; - hostdata->free = (struct NCR53c7x0_cmd *) me->host_scribble; - me->host_scribble = NULL; - } - cmd->result = DID_ABORT << 16; - cmd->scsi_done(cmd); - printk ("scsi%d : found command %ld in Linux issue queue\n", - host->host_no, me->pid); - local_irq_restore(flags); - run_process_issue_queue(); - return SCSI_ABORT_SUCCESS; - } - -/* - * That failing, the command could be in our list of already executing - * commands. If this is the case, drastic measures are called for. - */ - - for (curr = (struct NCR53c7x0_cmd *) hostdata->running_list, - prev = (struct NCR53c7x0_cmd **) &(hostdata->running_list); - curr && curr->cmd != cmd; prev = (struct NCR53c7x0_cmd **) - &(curr->next), curr = (struct NCR53c7x0_cmd *) curr->next); - - if (curr) { - if ((curr->result & 0xff) != 0xff && (curr->result & 0xff00) != 0xff00) { - cmd->result = curr->result; - if (prev) - *prev = (struct NCR53c7x0_cmd *) curr->next; - curr->next = (struct NCR53c7x0_cmd *) hostdata->free; - cmd->host_scribble = NULL; - hostdata->free = curr; - cmd->scsi_done(cmd); - printk ("scsi%d : found finished command %ld in running list\n", - host->host_no, cmd->pid); - local_irq_restore(flags); - return SCSI_ABORT_NOT_RUNNING; - } else { - printk ("scsi%d : DANGER : command running, can not abort.\n", - cmd->device->host->host_no); - local_irq_restore(flags); - return SCSI_ABORT_BUSY; - } - } - -/* - * And if we couldn't find it in any of our queues, it must have been - * a dropped interrupt. - */ - - curr = (struct NCR53c7x0_cmd *) cmd->host_scribble; - if (curr) { - curr->next = hostdata->free; - hostdata->free = curr; - cmd->host_scribble = NULL; - } - - if (curr == NULL || ((curr->result & 0xff00) == 0xff00) || - ((curr->result & 0xff) == 0xff)) { - printk ("scsi%d : did this command ever run?\n", host->host_no); - cmd->result = DID_ABORT << 16; - } else { - printk ("scsi%d : probably lost INTFLY, normal completion\n", - host->host_no); - cmd->result = curr->result; -/* - * FIXME : We need to add an additional flag which indicates if a - * command was ever counted as BUSY, so if we end up here we can - * decrement the busy count if and only if it is necessary. - */ - --hostdata->busy[cmd->device->id][cmd->device->lun]; - } - local_irq_restore(flags); - cmd->scsi_done(cmd); - -/* - * We need to run process_issue_queue since termination of this command - * may allow another queued command to execute first? - */ - return SCSI_ABORT_NOT_RUNNING; -} - -/* - * Function : int NCR53c7xx_reset (Scsi_Cmnd *cmd) - * - * Purpose : perform a hard reset of the SCSI bus and NCR - * chip. - * - * Inputs : cmd - command which caused the SCSI RESET - * - * Returns : 0 on success. - */ - -int -NCR53c7xx_reset (Scsi_Cmnd *cmd, unsigned int reset_flags) { - NCR53c7x0_local_declare(); - unsigned long flags; - int found = 0; - struct NCR53c7x0_cmd * c; - Scsi_Cmnd *tmp; - /* - * When we call scsi_done(), it's going to wake up anything sleeping on the - * resources which were in use by the aborted commands, and we'll start to - * get new commands. - * - * We can't let this happen until after we've re-initialized the driver - * structures, and can't reinitialize those structures until after we've - * dealt with their contents. - * - * So, we need to find all of the commands which were running, stick - * them on a linked list of completed commands (we'll use the host_scribble - * pointer), do our reinitialization, and then call the done function for - * each command. - */ - Scsi_Cmnd *nuke_list = NULL; - struct Scsi_Host *host = cmd->device->host; - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) host->hostdata[0]; - - NCR53c7x0_local_setup(host); - local_irq_save(flags); - ncr_halt (host); - print_lots (host); - dump_events (host, 30); - ncr_scsi_reset (host); - for (tmp = nuke_list = return_outstanding_commands (host, 1 /* free */, - 0 /* issue */ ); tmp; tmp = (Scsi_Cmnd *) tmp->SCp.buffer) - if (tmp == cmd) { - found = 1; - break; - } - - /* - * If we didn't find the command which caused this reset in our running - * list, then we've lost it. See that it terminates normally anyway. - */ - if (!found) { - c = (struct NCR53c7x0_cmd *) cmd->host_scribble; - if (c) { - cmd->host_scribble = NULL; - c->next = hostdata->free; - hostdata->free = c; - } else - printk ("scsi%d: lost command %ld\n", host->host_no, cmd->pid); - cmd->SCp.buffer = (struct scatterlist *) nuke_list; - nuke_list = cmd; - } - - NCR53c7x0_driver_init (host); - hostdata->soft_reset (host); - if (hostdata->resets == 0) - disable(host); - else if (hostdata->resets != -1) - --hostdata->resets; - local_irq_restore(flags); - for (; nuke_list; nuke_list = tmp) { - tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer; - nuke_list->result = DID_RESET << 16; - nuke_list->scsi_done (nuke_list); - } - local_irq_restore(flags); - return SCSI_RESET_SUCCESS; -} - -/* - * The NCR SDMS bios follows Annex A of the SCSI-CAM draft, and - * therefore shares the scsicam_bios_param function. - */ - -/* - * Function : int insn_to_offset (Scsi_Cmnd *cmd, u32 *insn) - * - * Purpose : convert instructions stored at NCR pointer into data - * pointer offset. - * - * Inputs : cmd - SCSI command; insn - pointer to instruction. Either current - * DSP, or saved data pointer. - * - * Returns : offset on success, -1 on failure. - */ - - -static int -insn_to_offset (Scsi_Cmnd *cmd, u32 *insn) { - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) cmd->device->host->hostdata[0]; - struct NCR53c7x0_cmd *ncmd = - (struct NCR53c7x0_cmd *) cmd->host_scribble; - int offset = 0, buffers; - struct scatterlist *segment; - char *ptr; - int found = 0; - -/* - * With the current code implementation, if the insn is inside dynamically - * generated code, the data pointer will be the instruction preceding - * the next transfer segment. - */ - - if (!check_address ((unsigned long) ncmd, sizeof (struct NCR53c7x0_cmd)) && - ((insn >= ncmd->data_transfer_start && - insn < ncmd->data_transfer_end) || - (insn >= ncmd->residual && - insn < (ncmd->residual + - sizeof(ncmd->residual))))) { - ptr = bus_to_virt(insn[3]); - - if ((buffers = cmd->use_sg)) { - for (offset = 0, - segment = (struct scatterlist *) cmd->request_buffer; - buffers && !((found = ((ptr >= (char *)page_address(segment->page)+segment->offset) && - (ptr < ((char *)page_address(segment->page)+segment->offset+segment->length))))); - --buffers, offset += segment->length, ++segment) -#if 0 - printk("scsi%d: comparing 0x%p to 0x%p\n", - cmd->device->host->host_no, saved, page_address(segment->page+segment->offset)); -#else - ; -#endif - offset += ptr - ((char *)page_address(segment->page)+segment->offset); - } else { - found = 1; - offset = ptr - (char *) (cmd->request_buffer); - } - } else if ((insn >= hostdata->script + - hostdata->E_data_transfer / sizeof(u32)) && - (insn <= hostdata->script + - hostdata->E_end_data_transfer / sizeof(u32))) { - found = 1; - offset = 0; - } - return found ? offset : -1; -} - - - -/* - * Function : void print_progress (Scsi_Cmnd *cmd) - * - * Purpose : print the current location of the saved data pointer - * - * Inputs : cmd - command we are interested in - * - */ - -static void -print_progress (Scsi_Cmnd *cmd) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_cmd *ncmd = - (struct NCR53c7x0_cmd *) cmd->host_scribble; - int offset, i; - char *where; - u32 *ptr; - NCR53c7x0_local_setup (cmd->device->host); - - if (check_address ((unsigned long) ncmd,sizeof (struct NCR53c7x0_cmd)) == 0) - { - printk("\nNCR53c7x0_cmd fields:\n"); - printk(" bounce.len=0x%x, addr=0x%0x, buf[]=0x%02x %02x %02x %02x\n", - ncmd->bounce.len, ncmd->bounce.addr, ncmd->bounce.buf[0], - ncmd->bounce.buf[1], ncmd->bounce.buf[2], ncmd->bounce.buf[3]); - printk(" result=%04x, cdb[0]=0x%02x\n", ncmd->result, ncmd->cmnd[0]); - } - - for (i = 0; i < 2; ++i) { - if (check_address ((unsigned long) ncmd, - sizeof (struct NCR53c7x0_cmd)) == -1) - continue; - if (!i) { - where = "saved"; - ptr = bus_to_virt(ncmd->saved_data_pointer); - } else { - where = "active"; - ptr = bus_to_virt (NCR53c7x0_read32 (DSP_REG) - - NCR53c7x0_insn_size (NCR53c7x0_read8 (DCMD_REG)) * - sizeof(u32)); - } - offset = insn_to_offset (cmd, ptr); - - if (offset != -1) - printk ("scsi%d : %s data pointer at offset %d\n", - cmd->device->host->host_no, where, offset); - else { - int size; - printk ("scsi%d : can't determine %s data pointer offset\n", - cmd->device->host->host_no, where); - if (ncmd) { - size = print_insn (cmd->device->host, - bus_to_virt(ncmd->saved_data_pointer), "", 1); - print_insn (cmd->device->host, - bus_to_virt(ncmd->saved_data_pointer) + size * sizeof(u32), - "", 1); - } - } - } -} - - -static void -print_dsa (struct Scsi_Host *host, u32 *dsa, const char *prefix) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - int i, len; - char *ptr; - Scsi_Cmnd *cmd; - - if (check_address ((unsigned long) dsa, hostdata->dsa_end - - hostdata->dsa_start) == -1) { - printk("scsi%d : bad dsa virt 0x%p\n", host->host_no, dsa); - return; - } - printk("%sscsi%d : dsa at phys 0x%lx (virt 0x%p)\n" - " + %d : dsa_msgout length = %u, data = 0x%x (virt 0x%p)\n" , - prefix ? prefix : "", - host->host_no, virt_to_bus (dsa), dsa, hostdata->dsa_msgout, - dsa[hostdata->dsa_msgout / sizeof(u32)], - dsa[hostdata->dsa_msgout / sizeof(u32) + 1], - bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1])); - - /* - * Only print messages if they're sane in length so we don't - * blow the kernel printk buffer on something which won't buy us - * anything. - */ - - if (dsa[hostdata->dsa_msgout / sizeof(u32)] < - sizeof (hostdata->free->select)) - for (i = dsa[hostdata->dsa_msgout / sizeof(u32)], - ptr = bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1]); - i > 0 && !check_address ((unsigned long) ptr, 1); - ptr += len, i -= len) { - printk(" "); - len = spi_print_msg(ptr); - printk("\n"); - if (!len) - break; - } - - printk(" + %d : select_indirect = 0x%x\n", - hostdata->dsa_select, dsa[hostdata->dsa_select / sizeof(u32)]); - cmd = (Scsi_Cmnd *) bus_to_virt(dsa[hostdata->dsa_cmnd / sizeof(u32)]); - printk(" + %d : dsa_cmnd = 0x%x ", hostdata->dsa_cmnd, - (u32) virt_to_bus(cmd)); - /* XXX Maybe we should access cmd->host_scribble->result here. RGH */ - if (cmd) { - printk(" result = 0x%x, target = %d, lun = %d, cmd = ", - cmd->result, cmd->device->id, cmd->device->lun); - __scsi_print_command(cmd->cmnd); - } else - printk("\n"); - printk(" + %d : dsa_next = 0x%x\n", hostdata->dsa_next, - dsa[hostdata->dsa_next / sizeof(u32)]); - if (cmd) { - printk("scsi%d target %d : sxfer_sanity = 0x%x, scntl3_sanity = 0x%x\n" - " script : ", - host->host_no, cmd->device->id, - hostdata->sync[cmd->device->id].sxfer_sanity, - hostdata->sync[cmd->device->id].scntl3_sanity); - for (i = 0; i < (sizeof(hostdata->sync[cmd->device->id].script) / 4); ++i) - printk ("0x%x ", hostdata->sync[cmd->device->id].script[i]); - printk ("\n"); - print_progress (cmd); - } -} -/* - * Function : void print_queues (Scsi_Host *host) - * - * Purpose : print the contents of the NCR issue and reconnect queues - * - * Inputs : host - SCSI host we are interested in - * - */ - -static void -print_queues (struct Scsi_Host *host) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - u32 *dsa, *next_dsa; - volatile u32 *ncrcurrent; - int left; - Scsi_Cmnd *cmd, *next_cmd; - unsigned long flags; - - printk ("scsi%d : issue queue\n", host->host_no); - - for (left = host->can_queue, cmd = (Scsi_Cmnd *) hostdata->issue_queue; - left >= 0 && cmd; - cmd = next_cmd) { - next_cmd = (Scsi_Cmnd *) cmd->SCp.ptr; - local_irq_save(flags); - if (cmd->host_scribble) { - if (check_address ((unsigned long) (cmd->host_scribble), - sizeof (cmd->host_scribble)) == -1) - printk ("scsi%d: scsi pid %ld bad pointer to NCR53c7x0_cmd\n", - host->host_no, cmd->pid); - /* print_dsa does sanity check on address, no need to check */ - else - print_dsa (host, ((struct NCR53c7x0_cmd *) cmd->host_scribble) - -> dsa, ""); - } else - printk ("scsi%d : scsi pid %ld for target %d lun %d has no NCR53c7x0_cmd\n", - host->host_no, cmd->pid, cmd->device->id, cmd->device->lun); - local_irq_restore(flags); - } - - if (left <= 0) { - printk ("scsi%d : loop detected in issue queue\n", - host->host_no); - } - - /* - * Traverse the NCR reconnect and start DSA structures, printing out - * each element until we hit the end or detect a loop. Currently, - * the reconnect structure is a linked list; and the start structure - * is an array. Eventually, the reconnect structure will become a - * list as well, since this simplifies the code. - */ - - printk ("scsi%d : schedule dsa array :\n", host->host_no); - for (left = host->can_queue, ncrcurrent = hostdata->schedule; - left > 0; ncrcurrent += 2, --left) - if (ncrcurrent[0] != hostdata->NOP_insn) -/* FIXME : convert pointer to dsa_begin to pointer to dsa. */ - print_dsa (host, bus_to_virt (ncrcurrent[1] - - (hostdata->E_dsa_code_begin - - hostdata->E_dsa_code_template)), ""); - printk ("scsi%d : end schedule dsa array\n", host->host_no); - - printk ("scsi%d : reconnect_dsa_head :\n", host->host_no); - - for (left = host->can_queue, - dsa = bus_to_virt (hostdata->reconnect_dsa_head); - left >= 0 && dsa; - dsa = next_dsa) { - local_irq_save(flags); - if (check_address ((unsigned long) dsa, sizeof(dsa)) == -1) { - printk ("scsi%d: bad DSA pointer 0x%p", host->host_no, - dsa); - next_dsa = NULL; - } - else - { - next_dsa = bus_to_virt(dsa[hostdata->dsa_next / sizeof(u32)]); - print_dsa (host, dsa, ""); - } - local_irq_restore(flags); - } - printk ("scsi%d : end reconnect_dsa_head\n", host->host_no); - if (left < 0) - printk("scsi%d: possible loop in ncr reconnect list\n", - host->host_no); -} - -static void -print_lots (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) host->hostdata[0]; - u32 *dsp_next, *dsp, *dsa, dbc_dcmd; - unsigned char dcmd, sbcl; - int i, size; - NCR53c7x0_local_setup(host); - - if ((dsp_next = bus_to_virt(NCR53c7x0_read32 (DSP_REG)))) { - dbc_dcmd = NCR53c7x0_read32(DBC_REG); - dcmd = (dbc_dcmd & 0xff000000) >> 24; - dsp = dsp_next - NCR53c7x0_insn_size(dcmd); - dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); - sbcl = NCR53c7x0_read8 (SBCL_REG); - - /* - * For the 53c710, the following will report value 0 for SCNTL3 - * and STEST0 - we don't have these registers. - */ - printk ("scsi%d : DCMD|DBC=0x%x, DNAD=0x%x (virt 0x%p)\n" - " DSA=0x%lx (virt 0x%p)\n" - " DSPS=0x%x, TEMP=0x%x (virt 0x%p), DMODE=0x%x\n" - " SXFER=0x%x, SCNTL3=0x%x\n" - " %s%s%sphase=%s, %d bytes in SCSI FIFO\n" - " SCRATCH=0x%x, saved2_dsa=0x%0lx\n", - host->host_no, dbc_dcmd, NCR53c7x0_read32(DNAD_REG), - bus_to_virt(NCR53c7x0_read32(DNAD_REG)), - virt_to_bus(dsa), dsa, - NCR53c7x0_read32(DSPS_REG), NCR53c7x0_read32(TEMP_REG), - bus_to_virt (NCR53c7x0_read32(TEMP_REG)), - (int) NCR53c7x0_read8(hostdata->dmode), - (int) NCR53c7x0_read8(SXFER_REG), - ((hostdata->chip / 100) == 8) ? - (int) NCR53c7x0_read8(SCNTL3_REG_800) : 0, - (sbcl & SBCL_BSY) ? "BSY " : "", - (sbcl & SBCL_SEL) ? "SEL " : "", - (sbcl & SBCL_REQ) ? "REQ " : "", - sstat2_to_phase(NCR53c7x0_read8 (((hostdata->chip / 100) == 8) ? - SSTAT1_REG : SSTAT2_REG)), - (NCR53c7x0_read8 ((hostdata->chip / 100) == 8 ? - SSTAT1_REG : SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT, - ((hostdata->chip / 100) == 8) ? NCR53c7x0_read8 (STEST0_REG_800) : - NCR53c7x0_read32(SCRATCHA_REG_800), - hostdata->saved2_dsa); - printk ("scsi%d : DSP 0x%lx (virt 0x%p) ->\n", host->host_no, - virt_to_bus(dsp), dsp); - for (i = 6; i > 0; --i, dsp += size) - size = print_insn (host, dsp, "", 1); - if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) { - if ((hostdata->chip / 100) == 8) - printk ("scsi%d : connected (SDID=0x%x, SSID=0x%x)\n", - host->host_no, NCR53c7x0_read8 (SDID_REG_800), - NCR53c7x0_read8 (SSID_REG_800)); - else - printk ("scsi%d : connected (SDID=0x%x)\n", - host->host_no, NCR53c7x0_read8 (SDID_REG_700)); - print_dsa (host, dsa, ""); - } - -#if 1 - print_queues (host); -#endif - } -} - -/* - * Function : static int shutdown (struct Scsi_Host *host) - * - * Purpose : does a clean (we hope) shutdown of the NCR SCSI - * chip. Use prior to dumping core, unloading the NCR driver, - * - * Returns : 0 on success - */ -static int -shutdown (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - unsigned long flags; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - NCR53c7x0_local_setup(host); - local_irq_save(flags); -/* Get in a state where we can reset the SCSI bus */ - ncr_halt (host); - ncr_scsi_reset (host); - hostdata->soft_reset(host); - - disable (host); - local_irq_restore(flags); - return 0; -} - -/* - * Function : void ncr_scsi_reset (struct Scsi_Host *host) - * - * Purpose : reset the SCSI bus. - */ - -static void -ncr_scsi_reset (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - unsigned long flags; - NCR53c7x0_local_setup(host); - local_irq_save(flags); - NCR53c7x0_write8(SCNTL1_REG, SCNTL1_RST); - udelay(25); /* Minimum amount of time to assert RST */ - NCR53c7x0_write8(SCNTL1_REG, 0); - local_irq_restore(flags); -} - -/* - * Function : void hard_reset (struct Scsi_Host *host) - * - */ - -static void -hard_reset (struct Scsi_Host *host) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned long flags; - local_irq_save(flags); - ncr_scsi_reset(host); - NCR53c7x0_driver_init (host); - if (hostdata->soft_reset) - hostdata->soft_reset (host); - local_irq_restore(flags); -} - - -/* - * Function : Scsi_Cmnd *return_outstanding_commands (struct Scsi_Host *host, - * int free, int issue) - * - * Purpose : return a linked list (using the SCp.buffer field as next, - * so we don't perturb hostdata. We don't use a field of the - * NCR53c7x0_cmd structure since we may not have allocated one - * for the command causing the reset.) of Scsi_Cmnd structures that - * had propagated below the Linux issue queue level. If free is set, - * free the NCR53c7x0_cmd structures which are associated with - * the Scsi_Cmnd structures, and clean up any internal - * NCR lists that the commands were on. If issue is set, - * also return commands in the issue queue. - * - * Returns : linked list of commands - * - * NOTE : the caller should insure that the NCR chip is halted - * if the free flag is set. - */ - -static Scsi_Cmnd * -return_outstanding_commands (struct Scsi_Host *host, int free, int issue) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - struct NCR53c7x0_cmd *c; - int i; - u32 *ncrcurrent; - Scsi_Cmnd *list = NULL, *tmp; - for (c = (struct NCR53c7x0_cmd *) hostdata->running_list; c; - c = (struct NCR53c7x0_cmd *) c->next) { - if (c->cmd->SCp.buffer) { - printk ("scsi%d : loop detected in running list!\n", host->host_no); - break; - } else { - printk ("Duh? Bad things happening in the NCR driver\n"); - break; - } - - c->cmd->SCp.buffer = (struct scatterlist *) list; - list = c->cmd; - if (free) { - c->next = hostdata->free; - hostdata->free = c; - } - } - - if (free) { - for (i = 0, ncrcurrent = (u32 *) hostdata->schedule; - i < host->can_queue; ++i, ncrcurrent += 2) { - ncrcurrent[0] = hostdata->NOP_insn; - ncrcurrent[1] = 0xdeadbeef; - } - hostdata->ncrcurrent = NULL; - } - - if (issue) { - for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp; tmp = tmp->next) { - if (tmp->SCp.buffer) { - printk ("scsi%d : loop detected in issue queue!\n", - host->host_no); - break; - } - tmp->SCp.buffer = (struct scatterlist *) list; - list = tmp; - } - if (free) - hostdata->issue_queue = NULL; - - } - return list; -} - -/* - * Function : static int disable (struct Scsi_Host *host) - * - * Purpose : disables the given NCR host, causing all commands - * to return a driver error. Call this so we can unload the - * module during development and try again. Eventually, - * we should be able to find clean workarounds for these - * problems. - * - * Inputs : host - hostadapter to twiddle - * - * Returns : 0 on success. - */ - -static int -disable (struct Scsi_Host *host) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - unsigned long flags; - Scsi_Cmnd *nuke_list, *tmp; - local_irq_save(flags); - if (hostdata->state != STATE_HALTED) - ncr_halt (host); - nuke_list = return_outstanding_commands (host, 1 /* free */, 1 /* issue */); - hard_reset (host); - hostdata->state = STATE_DISABLED; - local_irq_restore(flags); - printk ("scsi%d : nuking commands\n", host->host_no); - for (; nuke_list; nuke_list = tmp) { - tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer; - nuke_list->result = DID_ERROR << 16; - nuke_list->scsi_done(nuke_list); - } - printk ("scsi%d : done. \n", host->host_no); - printk (KERN_ALERT "scsi%d : disabled. Unload and reload\n", - host->host_no); - return 0; -} - -/* - * Function : static int ncr_halt (struct Scsi_Host *host) - * - * Purpose : halts the SCSI SCRIPTS(tm) processor on the NCR chip - * - * Inputs : host - SCSI chip to halt - * - * Returns : 0 on success - */ - -static int -ncr_halt (struct Scsi_Host *host) { - NCR53c7x0_local_declare(); - unsigned long flags; - unsigned char istat, tmp; - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - int stage; - NCR53c7x0_local_setup(host); - - local_irq_save(flags); - /* Stage 0 : eat all interrupts - Stage 1 : set ABORT - Stage 2 : eat all but abort interrupts - Stage 3 : eat all interrupts - */ - for (stage = 0;;) { - if (stage == 1) { - NCR53c7x0_write8(hostdata->istat, ISTAT_ABRT); - ++stage; - } - istat = NCR53c7x0_read8 (hostdata->istat); - if (istat & ISTAT_SIP) { - tmp = NCR53c7x0_read8(SSTAT0_REG); - } else if (istat & ISTAT_DIP) { - tmp = NCR53c7x0_read8(DSTAT_REG); - if (stage == 2) { - if (tmp & DSTAT_ABRT) { - NCR53c7x0_write8(hostdata->istat, 0); - ++stage; - } else { - printk(KERN_ALERT "scsi%d : could not halt NCR chip\n", - host->host_no); - disable (host); - } - } - } - if (!(istat & (ISTAT_SIP|ISTAT_DIP))) { - if (stage == 0) - ++stage; - else if (stage == 3) - break; - } - } - hostdata->state = STATE_HALTED; - local_irq_restore(flags); -#if 0 - print_lots (host); -#endif - return 0; -} - -/* - * Function: event_name (int event) - * - * Purpose: map event enum into user-readable strings. - */ - -static const char * -event_name (int event) { - switch (event) { - case EVENT_NONE: return "none"; - case EVENT_ISSUE_QUEUE: return "to issue queue"; - case EVENT_START_QUEUE: return "to start queue"; - case EVENT_SELECT: return "selected"; - case EVENT_DISCONNECT: return "disconnected"; - case EVENT_RESELECT: return "reselected"; - case EVENT_COMPLETE: return "completed"; - case EVENT_IDLE: return "idle"; - case EVENT_SELECT_FAILED: return "select failed"; - case EVENT_BEFORE_SELECT: return "before select"; - case EVENT_RESELECT_FAILED: return "reselect failed"; - default: return "unknown"; - } -} - -/* - * Function : void dump_events (struct Scsi_Host *host, count) - * - * Purpose : print last count events which have occurred. - */ -static void -dump_events (struct Scsi_Host *host, int count) { - struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) - host->hostdata[0]; - struct NCR53c7x0_event event; - int i; - unsigned long flags; - if (hostdata->events) { - if (count > hostdata->event_size) - count = hostdata->event_size; - for (i = hostdata->event_index; count > 0; - i = (i ? i - 1 : hostdata->event_size -1), --count) { -/* - * By copying the event we're currently examining with interrupts - * disabled, we can do multiple printk(), etc. operations and - * still be guaranteed that they're happening on the same - * event structure. - */ - local_irq_save(flags); -#if 0 - event = hostdata->events[i]; -#else - memcpy ((void *) &event, (void *) &(hostdata->events[i]), - sizeof(event)); -#endif - - local_irq_restore(flags); - printk ("scsi%d : %s event %d at %ld secs %ld usecs target %d lun %d\n", - host->host_no, event_name (event.event), count, - (long) event.time.tv_sec, (long) event.time.tv_usec, - event.target, event.lun); - if (event.dsa) - printk (" event for dsa 0x%lx (virt 0x%p)\n", - virt_to_bus(event.dsa), event.dsa); - if (event.pid != -1) { - printk (" event for pid %ld ", event.pid); - __scsi_print_command (event.cmnd); - } - } - } -} - -/* - * Function: check_address - * - * Purpose: Check to see if a possibly corrupt pointer will fault the - * kernel. - * - * Inputs: addr - address; size - size of area - * - * Returns: 0 if area is OK, -1 on error. - * - * NOTES: should be implemented in terms of vverify on kernels - * that have it. - */ - -static int -check_address (unsigned long addr, int size) { - return (virt_to_phys((void *)addr) < PAGE_SIZE || virt_to_phys((void *)(addr + size)) > virt_to_phys(high_memory) ? -1 : 0); -} - -#ifdef MODULE -int -NCR53c7x0_release(struct Scsi_Host *host) { - struct NCR53c7x0_hostdata *hostdata = - (struct NCR53c7x0_hostdata *) host->hostdata[0]; - struct NCR53c7x0_cmd *cmd, *tmp; - shutdown (host); - if (host->irq != SCSI_IRQ_NONE) - { - int irq_count; - struct Scsi_Host *tmp; - for (irq_count = 0, tmp = first_host; tmp; tmp = tmp->next) - if (tmp->hostt == the_template && tmp->irq == host->irq) - ++irq_count; - if (irq_count == 1) - free_irq(host->irq, NULL); - } - if (host->dma_channel != DMA_NONE) - free_dma(host->dma_channel); - if (host->io_port) - release_region(host->io_port, host->n_io_port); - - for (cmd = (struct NCR53c7x0_cmd *) hostdata->free; cmd; cmd = tmp, - --hostdata->num_cmds) { - tmp = (struct NCR53c7x0_cmd *) cmd->next; - /* - * If we're going to loop, try to stop it to get a more accurate - * count of the leaked commands. - */ - cmd->next = NULL; - if (cmd->free) - cmd->free ((void *) cmd->real, cmd->size); - } - if (hostdata->num_cmds) - printk ("scsi%d : leaked %d NCR53c7x0_cmd structures\n", - host->host_no, hostdata->num_cmds); - - vfree(hostdata->events); - - /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which - * XXX may be invalid (CONFIG_060_WRITETHROUGH) - */ - kernel_set_cachemode((void *)hostdata, 8192, IOMAP_FULL_CACHING); - free_pages ((u32)hostdata, 1); - return 1; -} -#endif /* def MODULE */ diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.h linux-2.6.22-591/drivers/scsi/53c7xx.h --- linux-2.6.22-570/drivers/scsi/53c7xx.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c7xx.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,1608 +0,0 @@ -/* - * 53c710 driver. Modified from Drew Eckhardts driver - * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] - * - * I have left the code for the 53c8xx family in here, because it didn't - * seem worth removing it. The possibility of IO_MAPPED chips rather - * than MEMORY_MAPPED remains, in case someone wants to add support for - * 53c710 chips on Intel PCs (some older machines have them on the - * motherboard). - * - * NOTE THERE MAY BE PROBLEMS WITH CASTS IN read8 AND Co. - */ - -/* - * NCR 53c{7,8}0x0 driver, header file - * - * Sponsored by - * iX Multiuser Multitasking Magazine - * Hannover, Germany - * hm@ix.de - * - * Copyright 1993, 1994, 1995 Drew Eckhardt - * Visionary Computing - * (Unix and Linux consulting and custom programming) - * drew@PoohSticks.ORG - * +1 (303) 786-7975 - * - * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. - * - * PRE-ALPHA - * - * For more information, please consult - * - * NCR 53C700/53C700-66 - * SCSI I/O Processor - * Data Manual - * - * NCR 53C810 - * PCI-SCSI I/O Processor - * Data Manual - * - * NCR Microelectronics - * 1635 Aeroplaza Drive - * Colorado Springs, CO 80916 - * +1 (719) 578-3400 - * - * Toll free literature number - * +1 (800) 334-5454 - * - */ - -#ifndef NCR53c710_H -#define NCR53c710_H - -#ifndef HOSTS_C - -/* SCSI control 0 rw, default = 0xc0 */ -#define SCNTL0_REG 0x00 -#define SCNTL0_ARB1 0x80 /* 0 0 = simple arbitration */ -#define SCNTL0_ARB2 0x40 /* 1 1 = full arbitration */ -#define SCNTL0_STRT 0x20 /* Start Sequence */ -#define SCNTL0_WATN 0x10 /* Select with ATN */ -#define SCNTL0_EPC 0x08 /* Enable parity checking */ -/* Bit 2 is reserved on 800 series chips */ -#define SCNTL0_EPG_700 0x04 /* Enable parity generation */ -#define SCNTL0_AAP 0x02 /* ATN/ on parity error */ -#define SCNTL0_TRG 0x01 /* Target mode */ - -/* SCSI control 1 rw, default = 0x00 */ - -#define SCNTL1_REG 0x01 -#define SCNTL1_EXC 0x80 /* Extra Clock Cycle of Data setup */ -#define SCNTL1_ADB 0x40 /* contents of SODL on bus */ -#define SCNTL1_ESR_700 0x20 /* Enable SIOP response to selection - and reselection */ -#define SCNTL1_DHP_800 0x20 /* Disable halt on parity error or ATN - target mode only */ -#define SCNTL1_CON 0x10 /* Connected */ -#define SCNTL1_RST 0x08 /* SCSI RST/ */ -#define SCNTL1_AESP 0x04 /* Force bad parity */ -#define SCNTL1_SND_700 0x02 /* Start SCSI send */ -#define SCNTL1_IARB_800 0x02 /* Immediate Arbitration, start - arbitration immediately after - busfree is detected */ -#define SCNTL1_RCV_700 0x01 /* Start SCSI receive */ -#define SCNTL1_SST_800 0x01 /* Start SCSI transfer */ - -/* SCSI control 2 rw, */ - -#define SCNTL2_REG_800 0x02 -#define SCNTL2_800_SDU 0x80 /* SCSI disconnect unexpected */ - -/* SCSI control 3 rw */ - -#define SCNTL3_REG_800 0x03 -#define SCNTL3_800_SCF_SHIFT 4 -#define SCNTL3_800_SCF_MASK 0x70 -#define SCNTL3_800_SCF2 0x40 /* Synchronous divisor */ -#define SCNTL3_800_SCF1 0x20 /* 0x00 = SCLK/3 */ -#define SCNTL3_800_SCF0 0x10 /* 0x10 = SCLK/1 */ - /* 0x20 = SCLK/1.5 - 0x30 = SCLK/2 - 0x40 = SCLK/3 */ - -#define SCNTL3_800_CCF_SHIFT 0 -#define SCNTL3_800_CCF_MASK 0x07 -#define SCNTL3_800_CCF2 0x04 /* 0x00 50.01 to 66 */ -#define SCNTL3_800_CCF1 0x02 /* 0x01 16.67 to 25 */ -#define SCNTL3_800_CCF0 0x01 /* 0x02 25.01 - 37.5 - 0x03 37.51 - 50 - 0x04 50.01 - 66 */ - -/* - * SCSI destination ID rw - the appropriate bit is set for the selected - * target ID. This is written by the SCSI SCRIPTS processor. - * default = 0x00 - */ -#define SDID_REG_700 0x02 -#define SDID_REG_800 0x06 - -#define GP_REG_800 0x07 /* General purpose IO */ -#define GP_800_IO1 0x02 -#define GP_800_IO2 0x01 - -/* SCSI interrupt enable rw, default = 0x00 */ -#define SIEN_REG_700 0x03 -#define SIEN0_REG_800 0x40 -#define SIEN_MA 0x80 /* Phase mismatch (ini) or ATN (tgt) */ -#define SIEN_FC 0x40 /* Function complete */ -#define SIEN_700_STO 0x20 /* Selection or reselection timeout */ -#define SIEN_800_SEL 0x20 /* Selected */ -#define SIEN_700_SEL 0x10 /* Selected or reselected */ -#define SIEN_800_RESEL 0x10 /* Reselected */ -#define SIEN_SGE 0x08 /* SCSI gross error */ -#define SIEN_UDC 0x04 /* Unexpected disconnect */ -#define SIEN_RST 0x02 /* SCSI RST/ received */ -#define SIEN_PAR 0x01 /* Parity error */ - -/* - * SCSI chip ID rw - * NCR53c700 : - * When arbitrating, the highest bit is used, when reselection or selection - * occurs, the chip responds to all IDs for which a bit is set. - * default = 0x00 - * NCR53c810 : - * Uses bit mapping - */ -#define SCID_REG 0x04 -/* Bit 7 is reserved on 800 series chips */ -#define SCID_800_RRE 0x40 /* Enable response to reselection */ -#define SCID_800_SRE 0x20 /* Enable response to selection */ -/* Bits four and three are reserved on 800 series chips */ -#define SCID_800_ENC_MASK 0x07 /* Encoded SCSI ID */ - -/* SCSI transfer rw, default = 0x00 */ -#define SXFER_REG 0x05 -#define SXFER_DHP 0x80 /* Disable halt on parity */ - -#define SXFER_TP2 0x40 /* Transfer period msb */ -#define SXFER_TP1 0x20 -#define SXFER_TP0 0x10 /* lsb */ -#define SXFER_TP_MASK 0x70 -/* FIXME : SXFER_TP_SHIFT == 5 is right for '8xx chips */ -#define SXFER_TP_SHIFT 5 -#define SXFER_TP_4 0x00 /* Divisors */ -#define SXFER_TP_5 0x10<<1 -#define SXFER_TP_6 0x20<<1 -#define SXFER_TP_7 0x30<<1 -#define SXFER_TP_8 0x40<<1 -#define SXFER_TP_9 0x50<<1 -#define SXFER_TP_10 0x60<<1 -#define SXFER_TP_11 0x70<<1 - -#define SXFER_MO3 0x08 /* Max offset msb */ -#define SXFER_MO2 0x04 -#define SXFER_MO1 0x02 -#define SXFER_MO0 0x01 /* lsb */ -#define SXFER_MO_MASK 0x0f -#define SXFER_MO_SHIFT 0 - -/* - * SCSI output data latch rw - * The contents of this register are driven onto the SCSI bus when - * the Assert Data Bus bit of the SCNTL1 register is set and - * the CD, IO, and MSG bits of the SOCL register match the SCSI phase - */ -#define SODL_REG_700 0x06 -#define SODL_REG_800 0x54 - - -/* - * SCSI output control latch rw, default = 0 - * Note that when the chip is being manually programmed as an initiator, - * the MSG, CD, and IO bits must be set correctly for the phase the target - * is driving the bus in. Otherwise no data transfer will occur due to - * phase mismatch. - */ - -#define SOCL_REG 0x07 -#define SOCL_REQ 0x80 /* REQ */ -#define SOCL_ACK 0x40 /* ACK */ -#define SOCL_BSY 0x20 /* BSY */ -#define SOCL_SEL 0x10 /* SEL */ -#define SOCL_ATN 0x08 /* ATN */ -#define SOCL_MSG 0x04 /* MSG */ -#define SOCL_CD 0x02 /* C/D */ -#define SOCL_IO 0x01 /* I/O */ - -/* - * SCSI first byte received latch ro - * This register contains the first byte received during a block MOVE - * SCSI SCRIPTS instruction, including - * - * Initiator mode Target mode - * Message in Command - * Status Message out - * Data in Data out - * - * It also contains the selecting or reselecting device's ID and our - * ID. - * - * Note that this is the register the various IF conditionals can - * operate on. - */ -#define SFBR_REG 0x08 - -/* - * SCSI input data latch ro - * In initiator mode, data is latched into this register on the rising - * edge of REQ/. In target mode, data is latched on the rising edge of - * ACK/ - */ -#define SIDL_REG_700 0x09 -#define SIDL_REG_800 0x50 - -/* - * SCSI bus data lines ro - * This register reflects the instantaneous status of the SCSI data - * lines. Note that SCNTL0 must be set to disable parity checking, - * otherwise reading this register will latch new parity. - */ -#define SBDL_REG_700 0x0a -#define SBDL_REG_800 0x58 - -#define SSID_REG_800 0x0a -#define SSID_800_VAL 0x80 /* Exactly two bits asserted at sel */ -#define SSID_800_ENCID_MASK 0x07 /* Device which performed operation */ - - -/* - * SCSI bus control lines rw, - * instantaneous readout of control lines - */ -#define SBCL_REG 0x0b -#define SBCL_REQ 0x80 /* REQ ro */ -#define SBCL_ACK 0x40 /* ACK ro */ -#define SBCL_BSY 0x20 /* BSY ro */ -#define SBCL_SEL 0x10 /* SEL ro */ -#define SBCL_ATN 0x08 /* ATN ro */ -#define SBCL_MSG 0x04 /* MSG ro */ -#define SBCL_CD 0x02 /* C/D ro */ -#define SBCL_IO 0x01 /* I/O ro */ -#define SBCL_PHASE_CMDOUT SBCL_CD -#define SBCL_PHASE_DATAIN SBCL_IO -#define SBCL_PHASE_DATAOUT 0 -#define SBCL_PHASE_MSGIN (SBCL_CD|SBCL_IO|SBCL_MSG) -#define SBCL_PHASE_MSGOUT (SBCL_CD|SBCL_MSG) -#define SBCL_PHASE_STATIN (SBCL_CD|SBCL_IO) -#define SBCL_PHASE_MASK (SBCL_CD|SBCL_IO|SBCL_MSG) -/* - * Synchronous SCSI Clock Control bits - * 0 - set by DCNTL - * 1 - SCLK / 1.0 - * 2 - SCLK / 1.5 - * 3 - SCLK / 2.0 - */ -#define SBCL_SSCF1 0x02 /* wo, -66 only */ -#define SBCL_SSCF0 0x01 /* wo, -66 only */ -#define SBCL_SSCF_MASK 0x03 - -/* - * XXX note : when reading the DSTAT and STAT registers to clear interrupts, - * insure that 10 clocks elapse between the two - */ -/* DMA status ro */ -#define DSTAT_REG 0x0c -#define DSTAT_DFE 0x80 /* DMA FIFO empty */ -#define DSTAT_800_MDPE 0x40 /* Master Data Parity Error */ -#define DSTAT_800_BF 0x20 /* Bus Fault */ -#define DSTAT_ABRT 0x10 /* Aborted - set on error */ -#define DSTAT_SSI 0x08 /* SCRIPTS single step interrupt */ -#define DSTAT_SIR 0x04 /* SCRIPTS interrupt received - - set when INT instruction is - executed */ -#define DSTAT_WTD 0x02 /* Watchdog timeout detected */ -#define DSTAT_OPC 0x01 /* Illegal instruction */ -#define DSTAT_800_IID 0x01 /* Same thing, different name */ - - -/* NCR53c800 moves this stuff into SIST0 */ -#define SSTAT0_REG 0x0d /* SCSI status 0 ro */ -#define SIST0_REG_800 0x42 -#define SSTAT0_MA 0x80 /* ini : phase mismatch, - * tgt : ATN/ asserted - */ -#define SSTAT0_CMP 0x40 /* function complete */ -#define SSTAT0_700_STO 0x20 /* Selection or reselection timeout */ -#define SIST0_800_SEL 0x20 /* Selected */ -#define SSTAT0_700_SEL 0x10 /* Selected or reselected */ -#define SIST0_800_RSL 0x10 /* Reselected */ -#define SSTAT0_SGE 0x08 /* SCSI gross error */ -#define SSTAT0_UDC 0x04 /* Unexpected disconnect */ -#define SSTAT0_RST 0x02 /* SCSI RST/ received */ -#define SSTAT0_PAR 0x01 /* Parity error */ - -/* And uses SSTAT0 for what was SSTAT1 */ - -#define SSTAT1_REG 0x0e /* SCSI status 1 ro */ -#define SSTAT1_ILF 0x80 /* SIDL full */ -#define SSTAT1_ORF 0x40 /* SODR full */ -#define SSTAT1_OLF 0x20 /* SODL full */ -#define SSTAT1_AIP 0x10 /* Arbitration in progress */ -#define SSTAT1_LOA 0x08 /* Lost arbitration */ -#define SSTAT1_WOA 0x04 /* Won arbitration */ -#define SSTAT1_RST 0x02 /* Instant readout of RST/ */ -#define SSTAT1_SDP 0x01 /* Instant readout of SDP/ */ - -#define SSTAT2_REG 0x0f /* SCSI status 2 ro */ -#define SSTAT2_FF3 0x80 /* number of bytes in synchronous */ -#define SSTAT2_FF2 0x40 /* data FIFO */ -#define SSTAT2_FF1 0x20 -#define SSTAT2_FF0 0x10 -#define SSTAT2_FF_MASK 0xf0 -#define SSTAT2_FF_SHIFT 4 - -/* - * Latched signals, latched on the leading edge of REQ/ for initiators, - * ACK/ for targets. - */ -#define SSTAT2_SDP 0x08 /* SDP */ -#define SSTAT2_MSG 0x04 /* MSG */ -#define SSTAT2_CD 0x02 /* C/D */ -#define SSTAT2_IO 0x01 /* I/O */ -#define SSTAT2_PHASE_CMDOUT SSTAT2_CD -#define SSTAT2_PHASE_DATAIN SSTAT2_IO -#define SSTAT2_PHASE_DATAOUT 0 -#define SSTAT2_PHASE_MSGIN (SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG) -#define SSTAT2_PHASE_MSGOUT (SSTAT2_CD|SSTAT2_MSG) -#define SSTAT2_PHASE_STATIN (SSTAT2_CD|SSTAT2_IO) -#define SSTAT2_PHASE_MASK (SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG) - - -/* NCR53c700-66 only */ -#define SCRATCHA_REG_00 0x10 /* through 0x13 Scratch A rw */ -/* NCR53c710 and higher */ -#define DSA_REG 0x10 /* DATA structure address */ - -#define CTEST0_REG_700 0x14 /* Chip test 0 ro */ -#define CTEST0_REG_800 0x18 /* Chip test 0 rw, general purpose */ -/* 0x80 - 0x04 are reserved */ -#define CTEST0_700_RTRG 0x02 /* Real target mode */ -#define CTEST0_700_DDIR 0x01 /* Data direction, 1 = - * SCSI bus to host, 0 = - * host to SCSI. - */ - -#define CTEST1_REG_700 0x15 /* Chip test 1 ro */ -#define CTEST1_REG_800 0x19 /* Chip test 1 ro */ -#define CTEST1_FMT3 0x80 /* Identify which byte lanes are empty */ -#define CTEST1_FMT2 0x40 /* in the DMA FIFO */ -#define CTEST1_FMT1 0x20 -#define CTEST1_FMT0 0x10 - -#define CTEST1_FFL3 0x08 /* Identify which bytes lanes are full */ -#define CTEST1_FFL2 0x04 /* in the DMA FIFO */ -#define CTEST1_FFL1 0x02 -#define CTEST1_FFL0 0x01 - -#define CTEST2_REG_700 0x16 /* Chip test 2 ro */ -#define CTEST2_REG_800 0x1a /* Chip test 2 ro */ - -#define CTEST2_800_DDIR 0x80 /* 1 = SCSI->host */ -#define CTEST2_800_SIGP 0x40 /* A copy of SIGP in ISTAT. - Reading this register clears */ -#define CTEST2_800_CIO 0x20 /* Configured as IO */. -#define CTEST2_800_CM 0x10 /* Configured as memory */ - -/* 0x80 - 0x40 are reserved on 700 series chips */ -#define CTEST2_700_SOFF 0x20 /* SCSI Offset Compare, - * As an initiator, this bit is - * one when the synchronous offset - * is zero, as a target this bit - * is one when the synchronous - * offset is at the maximum - * defined in SXFER - */ -#define CTEST2_700_SFP 0x10 /* SCSI FIFO parity bit, - * reading CTEST3 unloads a byte - * from the FIFO and sets this - */ -#define CTEST2_700_DFP 0x08 /* DMA FIFO parity bit, - * reading CTEST6 unloads a byte - * from the FIFO and sets this - */ -#define CTEST2_TEOP 0x04 /* SCSI true end of process, - * indicates a totally finished - * transfer - */ -#define CTEST2_DREQ 0x02 /* Data request signal */ -/* 0x01 is reserved on 700 series chips */ -#define CTEST2_800_DACK 0x01 - -/* - * Chip test 3 ro - * Unloads the bottom byte of the eight deep SCSI synchronous FIFO, - * check SSTAT2 FIFO full bits to determine size. Note that a GROSS - * error results if a read is attempted on this register. Also note - * that 16 and 32 bit reads of this register will cause corruption. - */ -#define CTEST3_REG_700 0x17 -/* Chip test 3 rw */ -#define CTEST3_REG_800 0x1b -#define CTEST3_800_V3 0x80 /* Chip revision */ -#define CTEST3_800_V2 0x40 -#define CTEST3_800_V1 0x20 -#define CTEST3_800_V0 0x10 -#define CTEST3_800_FLF 0x08 /* Flush DMA FIFO */ -#define CTEST3_800_CLF 0x04 /* Clear DMA FIFO */ -#define CTEST3_800_FM 0x02 /* Fetch mode pin */ -/* bit 0 is reserved on 800 series chips */ - -#define CTEST4_REG_700 0x18 /* Chip test 4 rw */ -#define CTEST4_REG_800 0x21 /* Chip test 4 rw */ -/* 0x80 is reserved on 700 series chips */ -#define CTEST4_800_BDIS 0x80 /* Burst mode disable */ -#define CTEST4_ZMOD 0x40 /* High impedance mode */ -#define CTEST4_SZM 0x20 /* SCSI bus high impedance */ -#define CTEST4_700_SLBE 0x10 /* SCSI loopback enabled */ -#define CTEST4_800_SRTM 0x10 /* Shadow Register Test Mode */ -#define CTEST4_700_SFWR 0x08 /* SCSI FIFO write enable, - * redirects writes from SODL - * to the SCSI FIFO. - */ -#define CTEST4_800_MPEE 0x08 /* Enable parity checking - during master cycles on PCI - bus */ - -/* - * These bits send the contents of the CTEST6 register to the appropriate - * byte lane of the 32 bit DMA FIFO. Normal operation is zero, otherwise - * the high bit means the low two bits select the byte lane. - */ -#define CTEST4_FBL2 0x04 -#define CTEST4_FBL1 0x02 -#define CTEST4_FBL0 0x01 -#define CTEST4_FBL_MASK 0x07 -#define CTEST4_FBL_0 0x04 /* Select DMA FIFO byte lane 0 */ -#define CTEST4_FBL_1 0x05 /* Select DMA FIFO byte lane 1 */ -#define CTEST4_FBL_2 0x06 /* Select DMA FIFO byte lane 2 */ -#define CTEST4_FBL_3 0x07 /* Select DMA FIFO byte lane 3 */ -#define CTEST4_800_SAVE (CTEST4_800_BDIS) - - -#define CTEST5_REG_700 0x19 /* Chip test 5 rw */ -#define CTEST5_REG_800 0x22 /* Chip test 5 rw */ -/* - * Clock Address Incrementor. When set, it increments the - * DNAD register to the next bus size boundary. It automatically - * resets itself when the operation is complete. - */ -#define CTEST5_ADCK 0x80 -/* - * Clock Byte Counter. When set, it decrements the DBC register to - * the next bus size boundary. - */ -#define CTEST5_BBCK 0x40 -/* - * Reset SCSI Offset. Setting this bit to 1 clears the current offset - * pointer in the SCSI synchronous offset counter (SSTAT). This bit - * is set to 1 if a SCSI Gross Error Condition occurs. The offset should - * be cleared when a synchronous transfer fails. When written, it is - * automatically cleared after the SCSI synchronous offset counter is - * reset. - */ -/* Bit 5 is reserved on 800 series chips */ -#define CTEST5_700_ROFF 0x20 -/* - * Master Control for Set or Reset pulses. When 1, causes the low - * four bits of register to set when set, 0 causes the low bits to - * clear when set. - */ -#define CTEST5_MASR 0x10 -#define CTEST5_DDIR 0x08 /* DMA direction */ -/* - * Bits 2-0 are reserved on 800 series chips - */ -#define CTEST5_700_EOP 0x04 /* End of process */ -#define CTEST5_700_DREQ 0x02 /* Data request */ -#define CTEST5_700_DACK 0x01 /* Data acknowledge */ - -/* - * Chip test 6 rw - writing to this register writes to the byte - * lane in the DMA FIFO as determined by the FBL bits in the CTEST4 - * register. - */ -#define CTEST6_REG_700 0x1a -#define CTEST6_REG_800 0x23 - -#define CTEST7_REG 0x1b /* Chip test 7 rw */ -/* 0x80 - 0x40 are reserved on NCR53c700 and NCR53c700-66 chips */ -#define CTEST7_10_CDIS 0x80 /* Cache burst disable */ -#define CTEST7_10_SC1 0x40 /* Snoop control bits */ -#define CTEST7_10_SC0 0x20 -#define CTEST7_10_SC_MASK 0x60 -/* 0x20 is reserved on the NCR53c700 */ -#define CTEST7_0060_FM 0x20 /* Fetch mode */ -#define CTEST7_STD 0x10 /* Selection timeout disable */ -#define CTEST7_DFP 0x08 /* DMA FIFO parity bit for CTEST6 */ -#define CTEST7_EVP 0x04 /* 1 = host bus even parity, 0 = odd */ -#define CTEST7_10_TT1 0x02 /* Transfer type */ -#define CTEST7_00_DC 0x02 /* Set to drive DC low during instruction - fetch */ -#define CTEST7_DIFF 0x01 /* Differential mode */ - -#define CTEST7_SAVE ( CTEST7_EVP | CTEST7_DIFF ) - - -#define TEMP_REG 0x1c /* through 0x1f Temporary stack rw */ - -#define DFIFO_REG 0x20 /* DMA FIFO rw */ -/* - * 0x80 is reserved on the NCR53c710, the CLF and FLF bits have been - * moved into the CTEST8 register. - */ -#define DFIFO_00_FLF 0x80 /* Flush DMA FIFO to memory */ -#define DFIFO_00_CLF 0x40 /* Clear DMA and SCSI FIFOs */ -#define DFIFO_BO6 0x40 -#define DFIFO_BO5 0x20 -#define DFIFO_BO4 0x10 -#define DFIFO_BO3 0x08 -#define DFIFO_BO2 0x04 -#define DFIFO_BO1 0x02 -#define DFIFO_BO0 0x01 -#define DFIFO_10_BO_MASK 0x7f /* 7 bit counter */ -#define DFIFO_00_BO_MASK 0x3f /* 6 bit counter */ - -/* - * Interrupt status rw - * Note that this is the only register which can be read while SCSI - * SCRIPTS are being executed. - */ -#define ISTAT_REG_700 0x21 -#define ISTAT_REG_800 0x14 -#define ISTAT_ABRT 0x80 /* Software abort, write - *1 to abort, wait for interrupt. */ -/* 0x40 and 0x20 are reserved on NCR53c700 and NCR53c700-66 chips */ -#define ISTAT_10_SRST 0x40 /* software reset */ -#define ISTAT_10_SIGP 0x20 /* signal script */ -/* 0x10 is reserved on NCR53c700 series chips */ -#define ISTAT_800_SEM 0x10 /* semaphore */ -#define ISTAT_CON 0x08 /* 1 when connected */ -#define ISTAT_800_INTF 0x04 /* Interrupt on the fly */ -#define ISTAT_700_PRE 0x04 /* Pointer register empty. - * Set to 1 when DSPS and DSP - * registers are empty in pipeline - * mode, always set otherwise. - */ -#define ISTAT_SIP 0x02 /* SCSI interrupt pending from - * SCSI portion of SIOP see - * SSTAT0 - */ -#define ISTAT_DIP 0x01 /* DMA interrupt pending - * see DSTAT - */ - -/* NCR53c700-66 and NCR53c710 only */ -#define CTEST8_REG 0x22 /* Chip test 8 rw */ -#define CTEST8_0066_EAS 0x80 /* Enable alternate SCSI clock, - * ie read from SCLK/ rather than CLK/ - */ -#define CTEST8_0066_EFM 0x40 /* Enable fetch and master outputs */ -#define CTEST8_0066_GRP 0x20 /* Generate Receive Parity for - * pass through. This insures that - * bad parity won't reach the host - * bus. - */ -#define CTEST8_0066_TE 0x10 /* TolerANT enable. Enable - * active negation, should only - * be used for slow SCSI - * non-differential. - */ -#define CTEST8_0066_HSC 0x08 /* Halt SCSI clock */ -#define CTEST8_0066_SRA 0x04 /* Shorten REQ/ACK filtering, - * must be set for fast SCSI-II - * speeds. - */ -#define CTEST8_0066_DAS 0x02 /* Disable automatic target/initiator - * switching. - */ -#define CTEST8_0066_LDE 0x01 /* Last disconnect enable. - * The status of pending - * disconnect is maintained by - * the core, eliminating - * the possibility of missing a - * selection or reselection - * while waiting to fetch a - * WAIT DISCONNECT opcode. - */ - -#define CTEST8_10_V3 0x80 /* Chip revision */ -#define CTEST8_10_V2 0x40 -#define CTEST8_10_V1 0x20 -#define CTEST8_10_V0 0x10 -#define CTEST8_10_V_MASK 0xf0 -#define CTEST8_10_FLF 0x08 /* Flush FIFOs */ -#define CTEST8_10_CLF 0x04 /* Clear FIFOs */ -#define CTEST8_10_FM 0x02 /* Fetch pin mode */ -#define CTEST8_10_SM 0x01 /* Snoop pin mode */ - - -/* - * The CTEST9 register may be used to differentiate between a - * NCR53c700 and a NCR53c710. - * - * Write 0xff to this register. - * Read it. - * If the contents are 0xff, it is a NCR53c700 - * If the contents are 0x00, it is a NCR53c700-66 first revision - * If the contents are some other value, it is some other NCR53c700-66 - */ -#define CTEST9_REG_00 0x23 /* Chip test 9 ro */ -#define LCRC_REG_10 0x23 - -/* - * 0x24 through 0x27 are the DMA byte counter register. Instructions - * write their high 8 bits into the DCMD register, the low 24 bits into - * the DBC register. - * - * Function is dependent on the command type being executed. - */ - - -#define DBC_REG 0x24 -/* - * For Block Move Instructions, DBC is a 24 bit quantity representing - * the number of bytes to transfer. - * For Transfer Control Instructions, DBC is bit fielded as follows : - */ -/* Bits 20 - 23 should be clear */ -#define DBC_TCI_TRUE (1 << 19) /* Jump when true */ -#define DBC_TCI_COMPARE_DATA (1 << 18) /* Compare data */ -#define DBC_TCI_COMPARE_PHASE (1 << 17) /* Compare phase with DCMD field */ -#define DBC_TCI_WAIT_FOR_VALID (1 << 16) /* Wait for REQ */ -/* Bits 8 - 15 are reserved on some implementations ? */ -#define DBC_TCI_MASK_MASK 0xff00 /* Mask for data compare */ -#define DBC_TCI_MASK_SHIFT 8 -#define DBC_TCI_DATA_MASK 0xff /* Data to be compared */ -#define DBC_TCI_DATA_SHIFT 0 - -#define DBC_RWRI_IMMEDIATE_MASK 0xff00 /* Immediate data */ -#define DBC_RWRI_IMMEDIATE_SHIFT 8 /* Amount to shift */ -#define DBC_RWRI_ADDRESS_MASK 0x3f0000 /* Register address */ -#define DBC_RWRI_ADDRESS_SHIFT 16 - - -/* - * DMA command r/w - */ -#define DCMD_REG 0x27 -#define DCMD_TYPE_MASK 0xc0 /* Masks off type */ -#define DCMD_TYPE_BMI 0x00 /* Indicates a Block Move instruction */ -#define DCMD_BMI_IO 0x01 /* I/O, CD, and MSG bits selecting */ -#define DCMD_BMI_CD 0x02 /* the phase for the block MOVE */ -#define DCMD_BMI_MSG 0x04 /* instruction */ - -#define DCMD_BMI_OP_MASK 0x18 /* mask for opcode */ -#define DCMD_BMI_OP_MOVE_T 0x00 /* MOVE */ -#define DCMD_BMI_OP_MOVE_I 0x08 /* MOVE Initiator */ - -#define DCMD_BMI_INDIRECT 0x20 /* Indirect addressing */ - -#define DCMD_TYPE_TCI 0x80 /* Indicates a Transfer Control - instruction */ -#define DCMD_TCI_IO 0x01 /* I/O, CD, and MSG bits selecting */ -#define DCMD_TCI_CD 0x02 /* the phase for the block MOVE */ -#define DCMD_TCI_MSG 0x04 /* instruction */ -#define DCMD_TCI_OP_MASK 0x38 /* mask for opcode */ -#define DCMD_TCI_OP_JUMP 0x00 /* JUMP */ -#define DCMD_TCI_OP_CALL 0x08 /* CALL */ -#define DCMD_TCI_OP_RETURN 0x10 /* RETURN */ -#define DCMD_TCI_OP_INT 0x18 /* INT */ - -#define DCMD_TYPE_RWRI 0x40 /* Indicates I/O or register Read/Write - instruction */ -#define DCMD_RWRI_OPC_MASK 0x38 /* Opcode mask */ -#define DCMD_RWRI_OPC_WRITE 0x28 /* Write SFBR to register */ -#define DCMD_RWRI_OPC_READ 0x30 /* Read register to SFBR */ -#define DCMD_RWRI_OPC_MODIFY 0x38 /* Modify in place */ - -#define DCMD_RWRI_OP_MASK 0x07 -#define DCMD_RWRI_OP_MOVE 0x00 -#define DCMD_RWRI_OP_SHL 0x01 -#define DCMD_RWRI_OP_OR 0x02 -#define DCMD_RWRI_OP_XOR 0x03 -#define DCMD_RWRI_OP_AND 0x04 -#define DCMD_RWRI_OP_SHR 0x05 -#define DCMD_RWRI_OP_ADD 0x06 -#define DCMD_RWRI_OP_ADDC 0x07 - -#define DCMD_TYPE_MMI 0xc0 /* Indicates a Memory Move instruction - (three words) */ - - -#define DNAD_REG 0x28 /* through 0x2b DMA next address for - data */ -#define DSP_REG 0x2c /* through 0x2f DMA SCRIPTS pointer rw */ -#define DSPS_REG 0x30 /* through 0x33 DMA SCRIPTS pointer - save rw */ -#define DMODE_REG_00 0x34 /* DMA mode rw */ -#define DMODE_00_BL1 0x80 /* Burst length bits */ -#define DMODE_00_BL0 0x40 -#define DMODE_BL_MASK 0xc0 -/* Burst lengths (800) */ -#define DMODE_BL_2 0x00 /* 2 transfer */ -#define DMODE_BL_4 0x40 /* 4 transfers */ -#define DMODE_BL_8 0x80 /* 8 transfers */ -#define DMODE_BL_16 0xc0 /* 16 transfers */ - -#define DMODE_10_BL_1 0x00 /* 1 transfer */ -#define DMODE_10_BL_2 0x40 /* 2 transfers */ -#define DMODE_10_BL_4 0x80 /* 4 transfers */ -#define DMODE_10_BL_8 0xc0 /* 8 transfers */ -#define DMODE_10_FC2 0x20 /* Driven to FC2 pin */ -#define DMODE_10_FC1 0x10 /* Driven to FC1 pin */ -#define DMODE_710_PD 0x08 /* Program/data on FC0 pin */ -#define DMODE_710_UO 0x02 /* User prog. output */ - -#define DMODE_700_BW16 0x20 /* Host buswidth = 16 */ -#define DMODE_700_286 0x10 /* 286 mode */ -#define DMODE_700_IOM 0x08 /* Transfer to IO port */ -#define DMODE_700_FAM 0x04 /* Fixed address mode */ -#define DMODE_700_PIPE 0x02 /* Pipeline mode disables - * automatic fetch / exec - */ -#define DMODE_MAN 0x01 /* Manual start mode, - * requires a 1 to be written - * to the start DMA bit in the DCNTL - * register to run scripts - */ - -#define DMODE_700_SAVE ( DMODE_00_BL_MASK | DMODE_00_BW16 | DMODE_00_286 ) - -/* NCR53c800 series only */ -#define SCRATCHA_REG_800 0x34 /* through 0x37 Scratch A rw */ -/* NCR53c710 only */ -#define SCRATCHB_REG_10 0x34 /* through 0x37 scratch B rw */ - -#define DMODE_REG_10 0x38 /* DMA mode rw, NCR53c710 and newer */ -#define DMODE_800_SIOM 0x20 /* Source IO = 1 */ -#define DMODE_800_DIOM 0x10 /* Destination IO = 1 */ -#define DMODE_800_ERL 0x08 /* Enable Read Line */ - -/* 35-38 are reserved on 700 and 700-66 series chips */ -#define DIEN_REG 0x39 /* DMA interrupt enable rw */ -/* 0x80, 0x40, and 0x20 are reserved on 700-series chips */ -#define DIEN_800_MDPE 0x40 /* Master data parity error */ -#define DIEN_800_BF 0x20 /* BUS fault */ -#define DIEN_700_BF 0x20 /* BUS fault */ -#define DIEN_ABRT 0x10 /* Enable aborted interrupt */ -#define DIEN_SSI 0x08 /* Enable single step interrupt */ -#define DIEN_SIR 0x04 /* Enable SCRIPTS INT command - * interrupt - */ -/* 0x02 is reserved on 800 series chips */ -#define DIEN_700_WTD 0x02 /* Enable watchdog timeout interrupt */ -#define DIEN_700_OPC 0x01 /* Enable illegal instruction - * interrupt - */ -#define DIEN_800_IID 0x01 /* Same meaning, different name */ - -/* - * DMA watchdog timer rw - * set in 16 CLK input periods. - */ -#define DWT_REG 0x3a - -/* DMA control rw */ -#define DCNTL_REG 0x3b -#define DCNTL_700_CF1 0x80 /* Clock divisor bits */ -#define DCNTL_700_CF0 0x40 -#define DCNTL_700_CF_MASK 0xc0 -/* Clock divisors Divisor SCLK range (MHZ) */ -#define DCNTL_700_CF_2 0x00 /* 2.0 37.51-50.00 */ -#define DCNTL_700_CF_1_5 0x40 /* 1.5 25.01-37.50 */ -#define DCNTL_700_CF_1 0x80 /* 1.0 16.67-25.00 */ -#define DCNTL_700_CF_3 0xc0 /* 3.0 50.01-66.67 (53c700-66) */ - -#define DCNTL_700_S16 0x20 /* Load scripts 16 bits at a time */ -#define DCNTL_SSM 0x10 /* Single step mode */ -#define DCNTL_700_LLM 0x08 /* Low level mode, can only be set - * after selection */ -#define DCNTL_800_IRQM 0x08 /* Totem pole IRQ pin */ -#define DCNTL_STD 0x04 /* Start DMA / SCRIPTS */ -/* 0x02 is reserved */ -#define DCNTL_00_RST 0x01 /* Software reset, resets everything - * but 286 mode bit in DMODE. On the - * NCR53c710, this bit moved to CTEST8 - */ -#define DCNTL_10_COM 0x01 /* 700 software compatibility mode */ -#define DCNTL_10_EA 0x20 /* Enable Ack - needed for MVME16x */ - -#define DCNTL_700_SAVE ( DCNTL_CF_MASK | DCNTL_S16) - - -/* NCR53c700-66 only */ -#define SCRATCHB_REG_00 0x3c /* through 0x3f scratch b rw */ -#define SCRATCHB_REG_800 0x5c /* through 0x5f scratch b rw */ -/* NCR53c710 only */ -#define ADDER_REG_10 0x3c /* Adder, NCR53c710 only */ - -#define SIEN1_REG_800 0x41 -#define SIEN1_800_STO 0x04 /* selection/reselection timeout */ -#define SIEN1_800_GEN 0x02 /* general purpose timer */ -#define SIEN1_800_HTH 0x01 /* handshake to handshake */ - -#define SIST1_REG_800 0x43 -#define SIST1_800_STO 0x04 /* selection/reselection timeout */ -#define SIST1_800_GEN 0x02 /* general purpose timer */ -#define SIST1_800_HTH 0x01 /* handshake to handshake */ - -#define SLPAR_REG_800 0x44 /* Parity */ - -#define MACNTL_REG_800 0x46 /* Memory access control */ -#define MACNTL_800_TYP3 0x80 -#define MACNTL_800_TYP2 0x40 -#define MACNTL_800_TYP1 0x20 -#define MACNTL_800_TYP0 0x10 -#define MACNTL_800_DWR 0x08 -#define MACNTL_800_DRD 0x04 -#define MACNTL_800_PSCPT 0x02 -#define MACNTL_800_SCPTS 0x01 - -#define GPCNTL_REG_800 0x47 /* General Purpose Pin Control */ - -/* Timeouts are expressed such that 0=off, 1=100us, doubling after that */ -#define STIME0_REG_800 0x48 /* SCSI Timer Register 0 */ -#define STIME0_800_HTH_MASK 0xf0 /* Handshake to Handshake timeout */ -#define STIME0_800_HTH_SHIFT 4 -#define STIME0_800_SEL_MASK 0x0f /* Selection timeout */ -#define STIME0_800_SEL_SHIFT 0 - -#define STIME1_REG_800 0x49 -#define STIME1_800_GEN_MASK 0x0f /* General purpose timer */ - -#define RESPID_REG_800 0x4a /* Response ID, bit fielded. 8 - bits on narrow chips, 16 on WIDE */ - -#define STEST0_REG_800 0x4c -#define STEST0_800_SLT 0x08 /* Selection response logic test */ -#define STEST0_800_ART 0x04 /* Arbitration priority encoder test */ -#define STEST0_800_SOZ 0x02 /* Synchronous offset zero */ -#define STEST0_800_SOM 0x01 /* Synchronous offset maximum */ - -#define STEST1_REG_800 0x4d -#define STEST1_800_SCLK 0x80 /* Disable SCSI clock */ - -#define STEST2_REG_800 0x4e -#define STEST2_800_SCE 0x80 /* Enable SOCL/SODL */ -#define STEST2_800_ROF 0x40 /* Reset SCSI sync offset */ -#define STEST2_800_SLB 0x10 /* Enable SCSI loopback mode */ -#define STEST2_800_SZM 0x08 /* SCSI high impedance mode */ -#define STEST2_800_EXT 0x02 /* Extend REQ/ACK filter 30 to 60ns */ -#define STEST2_800_LOW 0x01 /* SCSI low level mode */ - -#define STEST3_REG_800 0x4f -#define STEST3_800_TE 0x80 /* Enable active negation */ -#define STEST3_800_STR 0x40 /* SCSI FIFO test read */ -#define STEST3_800_HSC 0x20 /* Halt SCSI clock */ -#define STEST3_800_DSI 0x10 /* Disable single initiator response */ -#define STEST3_800_TTM 0x04 /* Time test mode */ -#define STEST3_800_CSF 0x02 /* Clear SCSI FIFO */ -#define STEST3_800_STW 0x01 /* SCSI FIFO test write */ - -#define OPTION_PARITY 0x1 /* Enable parity checking */ -#define OPTION_TAGGED_QUEUE 0x2 /* Enable SCSI-II tagged queuing */ -#define OPTION_700 0x8 /* Always run NCR53c700 scripts */ -#define OPTION_INTFLY 0x10 /* Use INTFLY interrupts */ -#define OPTION_DEBUG_INTR 0x20 /* Debug interrupts */ -#define OPTION_DEBUG_INIT_ONLY 0x40 /* Run initialization code and - simple test code, return - DID_NO_CONNECT if any SCSI - commands are attempted. */ -#define OPTION_DEBUG_READ_ONLY 0x80 /* Return DID_ERROR if any - SCSI write is attempted */ -#define OPTION_DEBUG_TRACE 0x100 /* Animated trace mode, print - each address and instruction - executed to debug buffer. */ -#define OPTION_DEBUG_SINGLE 0x200 /* stop after executing one - instruction */ -#define OPTION_SYNCHRONOUS 0x400 /* Enable sync SCSI. */ -#define OPTION_MEMORY_MAPPED 0x800 /* NCR registers have valid - memory mapping */ -#define OPTION_IO_MAPPED 0x1000 /* NCR registers have valid - I/O mapping */ -#define OPTION_DEBUG_PROBE_ONLY 0x2000 /* Probe only, don't even init */ -#define OPTION_DEBUG_TESTS_ONLY 0x4000 /* Probe, init, run selected tests */ -#define OPTION_DEBUG_TEST0 0x08000 /* Run test 0 */ -#define OPTION_DEBUG_TEST1 0x10000 /* Run test 1 */ -#define OPTION_DEBUG_TEST2 0x20000 /* Run test 2 */ -#define OPTION_DEBUG_DUMP 0x40000 /* Dump commands */ -#define OPTION_DEBUG_TARGET_LIMIT 0x80000 /* Only talk to target+luns specified */ -#define OPTION_DEBUG_NCOMMANDS_LIMIT 0x100000 /* Limit the number of commands */ -#define OPTION_DEBUG_SCRIPT 0x200000 /* Print when checkpoints are passed */ -#define OPTION_DEBUG_FIXUP 0x400000 /* print fixup values */ -#define OPTION_DEBUG_DSA 0x800000 -#define OPTION_DEBUG_CORRUPTION 0x1000000 /* Detect script corruption */ -#define OPTION_DEBUG_SDTR 0x2000000 /* Debug SDTR problem */ -#define OPTION_DEBUG_MISMATCH 0x4000000 /* Debug phase mismatches */ -#define OPTION_DISCONNECT 0x8000000 /* Allow disconnect */ -#define OPTION_DEBUG_DISCONNECT 0x10000000 -#define OPTION_ALWAYS_SYNCHRONOUS 0x20000000 /* Negotiate sync. transfers - on power up */ -#define OPTION_DEBUG_QUEUES 0x80000000 -#define OPTION_DEBUG_ALLOCATION 0x100000000LL -#define OPTION_DEBUG_SYNCHRONOUS 0x200000000LL /* Sanity check SXFER and - SCNTL3 registers */ -#define OPTION_NO_ASYNC 0x400000000LL /* Don't automagically send - SDTR for async transfers when - we haven't been told to do - a synchronous transfer. */ -#define OPTION_NO_PRINT_RACE 0x800000000LL /* Don't print message when - the reselect/WAIT DISCONNECT - race condition hits */ -#if !defined(PERM_OPTIONS) -#define PERM_OPTIONS 0 -#endif - -/* - * Some data which is accessed by the NCR chip must be 4-byte aligned. - * For some hosts the default is less than that (eg. 68K uses 2-byte). - * Alignment has only been forced where it is important; also if one - * 32 bit structure field is aligned then it is assumed that following - * 32 bit fields are also aligned. Take care when adding fields - * which are other than 32 bit. - */ - -struct NCR53c7x0_synchronous { - u32 select_indirect /* Value used for indirect selection */ - __attribute__ ((aligned (4))); - u32 sscf_710; /* Used to set SSCF bits for 710 */ - u32 script[8]; /* Size ?? Script used when target is - reselected */ - unsigned char synchronous_want[5]; /* Per target desired SDTR */ -/* - * Set_synchronous programs these, select_indirect and current settings after - * int_debug_should show a match. - */ - unsigned char sxfer_sanity, scntl3_sanity; -}; - -#define CMD_FLAG_SDTR 1 /* Initiating synchronous - transfer negotiation */ -#define CMD_FLAG_WDTR 2 /* Initiating wide transfer - negotiation */ -#define CMD_FLAG_DID_SDTR 4 /* did SDTR */ -#define CMD_FLAG_DID_WDTR 8 /* did WDTR */ - -struct NCR53c7x0_table_indirect { - u32 count; - void *address; -}; - -enum ncr_event { - EVENT_NONE = 0, -/* - * Order is IMPORTANT, since these must correspond to the event interrupts - * in 53c7,8xx.scr - */ - - EVENT_ISSUE_QUEUE = 0x5000000, /* 0 Command was added to issue queue */ - EVENT_START_QUEUE, /* 1 Command moved to start queue */ - EVENT_SELECT, /* 2 Command completed selection */ - EVENT_DISCONNECT, /* 3 Command disconnected */ - EVENT_RESELECT, /* 4 Command reselected */ - EVENT_COMPLETE, /* 5 Command completed */ - EVENT_IDLE, /* 6 */ - EVENT_SELECT_FAILED, /* 7 */ - EVENT_BEFORE_SELECT, /* 8 */ - EVENT_RESELECT_FAILED /* 9 */ -}; - -struct NCR53c7x0_event { - enum ncr_event event; /* What type of event */ - unsigned char target; - unsigned char lun; - struct timeval time; - u32 *dsa; /* What's in the DSA register now (virt) */ -/* - * A few things from that SCSI pid so we know what happened after - * the Scsi_Cmnd structure in question may have disappeared. - */ - unsigned long pid; /* The SCSI PID which caused this - event */ - unsigned char cmnd[12]; -}; - -/* - * Things in the NCR53c7x0_cmd structure are split into two parts : - * - * 1. A fixed portion, for things which are not accessed directly by static NCR - * code (ie, are referenced only by the Linux side of the driver, - * or only by dynamically generated code). - * - * 2. The DSA portion, for things which are accessed directly by static NCR - * code. - * - * This is a little ugly, but it - * 1. Avoids conflicts between the NCR code's picture of the structure, and - * Linux code's idea of what it looks like. - * - * 2. Minimizes the pain in the Linux side of the code needed - * to calculate real dsa locations for things, etc. - * - */ - -struct NCR53c7x0_cmd { - void *real; /* Real, unaligned address for - free function */ - void (* free)(void *, int); /* Command to deallocate; NULL - for structures allocated with - scsi_register, etc. */ - Scsi_Cmnd *cmd; /* Associated Scsi_Cmnd - structure, Scsi_Cmnd points - at NCR53c7x0_cmd using - host_scribble structure */ - - int size; /* scsi_malloc'd size of this - structure */ - - int flags; /* CMD_* flags */ - - unsigned char cmnd[12]; /* CDB, copied from Scsi_Cmnd */ - int result; /* Copy to Scsi_Cmnd when done */ - - struct { /* Private non-cached bounce buffer */ - unsigned char buf[256]; - u32 addr; - u32 len; - } bounce; - -/* - * SDTR and WIDE messages are an either/or affair - * in this message, since we will go into message out and send - * _the whole mess_ without dropping out of message out to - * let the target go into message in after sending the first - * message. - */ - - unsigned char select[11]; /* Select message, includes - IDENTIFY - (optional) QUEUE TAG - (optional) SDTR or WDTR - */ - - - volatile struct NCR53c7x0_cmd *next; /* Linux maintained lists (free, - running, eventually finished */ - - - u32 *data_transfer_start; /* Start of data transfer routines */ - u32 *data_transfer_end; /* Address after end of data transfer o - routines */ -/* - * The following three fields were moved from the DSA proper to here - * since only dynamically generated NCR code refers to them, meaning - * we don't need dsa_* absolutes, and it is simpler to let the - * host code refer to them directly. - */ - -/* - * HARD CODED : residual and saved_residual need to agree with the sizes - * used in NCR53c7,8xx.scr. - * - * FIXME: we want to consider the case where we have odd-length - * scatter/gather buffers and a WIDE transfer, in which case - * we'll need to use the CHAIN MOVE instruction. Ick. - */ - u32 residual[6] __attribute__ ((aligned (4))); - /* Residual data transfer which - allows pointer code to work - right. - - [0-1] : Conditional call to - appropriate other transfer - routine. - [2-3] : Residual block transfer - instruction. - [4-5] : Jump to instruction - after splice. - */ - u32 saved_residual[6]; /* Copy of old residual, so we - can get another partial - transfer and still recover - */ - - u32 saved_data_pointer; /* Saved data pointer */ - - u32 dsa_next_addr; /* _Address_ of dsa_next field - in this dsa for RISCy - style constant. */ - - u32 dsa_addr; /* Address of dsa; RISCy style - constant */ - - u32 dsa[0]; /* Variable length (depending - on host type, number of scatter / - gather buffers, etc). */ -}; - -struct NCR53c7x0_break { - u32 *address, old_instruction[2]; - struct NCR53c7x0_break *next; - unsigned char old_size; /* Size of old instruction */ -}; - -/* Indicates that the NCR is not executing code */ -#define STATE_HALTED 0 -/* - * Indicates that the NCR is executing the wait for select / reselect - * script. Only used when running NCR53c700 compatible scripts, only - * state during which an ABORT is _not_ considered an error condition. - */ -#define STATE_WAITING 1 -/* Indicates that the NCR is executing other code. */ -#define STATE_RUNNING 2 -/* - * Indicates that the NCR was being aborted. - */ -#define STATE_ABORTING 3 -/* Indicates that the NCR was successfully aborted. */ -#define STATE_ABORTED 4 -/* Indicates that the NCR has been disabled due to a fatal error */ -#define STATE_DISABLED 5 - -/* - * Where knowledge of SCSI SCRIPT(tm) specified values are needed - * in an interrupt handler, an interrupt handler exists for each - * different SCSI script so we don't have name space problems. - * - * Return values of these handlers are as follows : - */ -#define SPECIFIC_INT_NOTHING 0 /* don't even restart */ -#define SPECIFIC_INT_RESTART 1 /* restart at the next instruction */ -#define SPECIFIC_INT_ABORT 2 /* recoverable error, abort cmd */ -#define SPECIFIC_INT_PANIC 3 /* unrecoverable error, panic */ -#define SPECIFIC_INT_DONE 4 /* normal command completion */ -#define SPECIFIC_INT_BREAK 5 /* break point encountered */ - -struct NCR53c7x0_hostdata { - int size; /* Size of entire Scsi_Host - structure */ - int board; /* set to board type, useful if - we have host specific things, - ie, a general purpose I/O - bit is being used to enable - termination, etc. */ - - int chip; /* set to chip type; 700-66 is - 700-66, rest are last three - digits of part number */ - - char valid_ids[8]; /* Valid SCSI ID's for adapter */ - - u32 *dsp; /* dsp to restart with after - all stacked interrupts are - handled. */ - - unsigned dsp_changed:1; /* Has dsp changed within this - set of stacked interrupts ? */ - - unsigned char dstat; /* Most recent value of dstat */ - unsigned dstat_valid:1; - - unsigned expecting_iid:1; /* Expect IID interrupt */ - unsigned expecting_sto:1; /* Expect STO interrupt */ - - /* - * The code stays cleaner if we use variables with function - * pointers and offsets that are unique for the different - * scripts rather than having a slew of switch(hostdata->chip) - * statements. - * - * It also means that the #defines from the SCSI SCRIPTS(tm) - * don't have to be visible outside of the script-specific - * instructions, preventing name space pollution. - */ - - void (* init_fixup)(struct Scsi_Host *host); - void (* init_save_regs)(struct Scsi_Host *host); - void (* dsa_fixup)(struct NCR53c7x0_cmd *cmd); - void (* soft_reset)(struct Scsi_Host *host); - int (* run_tests)(struct Scsi_Host *host); - - /* - * Called when DSTAT_SIR is set, indicating an interrupt generated - * by the INT instruction, where values are unique for each SCSI - * script. Should return one of the SPEC_* values. - */ - - int (* dstat_sir_intr)(struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd); - - int dsa_len; /* Size of DSA structure */ - - /* - * Location of DSA fields for the SCSI SCRIPT corresponding to this - * chip. - */ - - s32 dsa_start; - s32 dsa_end; - s32 dsa_next; - s32 dsa_prev; - s32 dsa_cmnd; - s32 dsa_select; - s32 dsa_msgout; - s32 dsa_cmdout; - s32 dsa_dataout; - s32 dsa_datain; - s32 dsa_msgin; - s32 dsa_msgout_other; - s32 dsa_write_sync; - s32 dsa_write_resume; - s32 dsa_check_reselect; - s32 dsa_status; - s32 dsa_saved_pointer; - s32 dsa_jump_dest; - - /* - * Important entry points that generic fixup code needs - * to know about, fixed up. - */ - - s32 E_accept_message; - s32 E_command_complete; - s32 E_data_transfer; - s32 E_dsa_code_template; - s32 E_dsa_code_template_end; - s32 E_end_data_transfer; - s32 E_msg_in; - s32 E_initiator_abort; - s32 E_other_transfer; - s32 E_other_in; - s32 E_other_out; - s32 E_target_abort; - s32 E_debug_break; - s32 E_reject_message; - s32 E_respond_message; - s32 E_select; - s32 E_select_msgout; - s32 E_test_0; - s32 E_test_1; - s32 E_test_2; - s32 E_test_3; - s32 E_dsa_zero; - s32 E_cmdout_cmdout; - s32 E_wait_reselect; - s32 E_dsa_code_begin; - - long long options; /* Bitfielded set of options enabled */ - volatile u32 test_completed; /* Test completed */ - int test_running; /* Test currently running */ - s32 test_source - __attribute__ ((aligned (4))); - volatile s32 test_dest; - - volatile int state; /* state of driver, only used for - OPTION_700 */ - - unsigned char dmode; /* - * set to the address of the DMODE - * register for this chip. - */ - unsigned char istat; /* - * set to the address of the ISTAT - * register for this chip. - */ - - int scsi_clock; /* - * SCSI clock in HZ. 0 may be used - * for unknown, although this will - * disable synchronous negotiation. - */ - - volatile int intrs; /* Number of interrupts */ - volatile int resets; /* Number of SCSI resets */ - unsigned char saved_dmode; - unsigned char saved_ctest4; - unsigned char saved_ctest7; - unsigned char saved_dcntl; - unsigned char saved_scntl3; - - unsigned char this_id_mask; - - /* Debugger information */ - struct NCR53c7x0_break *breakpoints, /* Linked list of all break points */ - *breakpoint_current; /* Current breakpoint being stepped - through, NULL if we are running - normally. */ -#ifdef NCR_DEBUG - int debug_size; /* Size of debug buffer */ - volatile int debug_count; /* Current data count */ - volatile char *debug_buf; /* Output ring buffer */ - volatile char *debug_write; /* Current write pointer */ - volatile char *debug_read; /* Current read pointer */ -#endif /* def NCR_DEBUG */ - - /* XXX - primitive debugging junk, remove when working ? */ - int debug_print_limit; /* Number of commands to print - out exhaustive debugging - information for if - OPTION_DEBUG_DUMP is set */ - - unsigned char debug_lun_limit[16]; /* If OPTION_DEBUG_TARGET_LIMIT - set, puke if commands are sent - to other target/lun combinations */ - - int debug_count_limit; /* Number of commands to execute - before puking to limit debugging - output */ - - - volatile unsigned idle:1; /* set to 1 if idle */ - - /* - * Table of synchronous+wide transfer parameters set on a per-target - * basis. - */ - - volatile struct NCR53c7x0_synchronous sync[16] - __attribute__ ((aligned (4))); - - volatile Scsi_Cmnd *issue_queue - __attribute__ ((aligned (4))); - /* waiting to be issued by - Linux driver */ - volatile struct NCR53c7x0_cmd *running_list; - /* commands running, maintained - by Linux driver */ - - volatile struct NCR53c7x0_cmd *ncrcurrent; /* currently connected - nexus, ONLY valid for - NCR53c700/NCR53c700-66 - */ - - volatile struct NCR53c7x0_cmd *spare; /* pointer to spare, - allocated at probe time, - which we can use for - initialization */ - volatile struct NCR53c7x0_cmd *free; - int max_cmd_size; /* Maximum size of NCR53c7x0_cmd - based on number of - scatter/gather segments, etc. - */ - volatile int num_cmds; /* Number of commands - allocated */ - volatile int extra_allocate; - volatile unsigned char cmd_allocated[16]; /* Have we allocated commands - for this target yet? If not, - do so ASAP */ - volatile unsigned char busy[16][8]; /* number of commands - executing on each target - */ - /* - * Eventually, I'll switch to a coroutine for calling - * cmd->done(cmd), etc. so that we can overlap interrupt - * processing with this code for maximum performance. - */ - - volatile struct NCR53c7x0_cmd *finished_queue; - - /* Shared variables between SCRIPT and host driver */ - volatile u32 *schedule - __attribute__ ((aligned (4))); /* Array of JUMPs to dsa_begin - routines of various DSAs. - When not in use, replace - with jump to next slot */ - - - volatile unsigned char msg_buf[16]; /* buffer for messages - other than the command - complete message */ - - /* Per-target default synchronous and WIDE messages */ - volatile unsigned char synchronous_want[16][5]; - volatile unsigned char wide_want[16][4]; - - /* Bit fielded set of targets we want to speak synchronously with */ - volatile u16 initiate_sdtr; - /* Bit fielded set of targets we want to speak wide with */ - volatile u16 initiate_wdtr; - /* Bit fielded list of targets we've talked to. */ - volatile u16 talked_to; - - /* Array of bit-fielded lun lists that we need to request_sense */ - volatile unsigned char request_sense[16]; - - u32 addr_reconnect_dsa_head - __attribute__ ((aligned (4))); /* RISCy style constant, - address of following */ - volatile u32 reconnect_dsa_head; - /* Data identifying nexus we are trying to match during reselection */ - volatile unsigned char reselected_identify; /* IDENTIFY message */ - volatile unsigned char reselected_tag; /* second byte of queue tag - message or 0 */ - - /* These were static variables before we moved them */ - - s32 NCR53c7xx_zero - __attribute__ ((aligned (4))); - s32 NCR53c7xx_sink; - u32 NOP_insn; - char NCR53c7xx_msg_reject; - char NCR53c7xx_msg_abort; - char NCR53c7xx_msg_nop; - - /* - * Following item introduced by RGH to support NCRc710, which is - * VERY brain-dead when it come to memory moves - */ - - /* DSA save area used only by the NCR chip */ - volatile unsigned long saved2_dsa - __attribute__ ((aligned (4))); - - volatile unsigned long emulated_intfly - __attribute__ ((aligned (4))); - - volatile int event_size, event_index; - volatile struct NCR53c7x0_event *events; - - /* If we need to generate code to kill off the currently connected - command, this is where we do it. Should have a BMI instruction - to source or sink the current data, followed by a JUMP - to abort_connected */ - - u32 *abort_script; - - int script_count; /* Size of script in words */ - u32 script[0]; /* Relocated SCSI script */ - -}; - -#define SCSI_IRQ_NONE 255 -#define DMA_NONE 255 -#define IRQ_AUTO 254 -#define DMA_AUTO 254 - -#define BOARD_GENERIC 0 - -#define NCR53c7x0_insn_size(insn) \ - (((insn) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI ? 3 : 2) - - -#define NCR53c7x0_local_declare() \ - volatile unsigned char *NCR53c7x0_address_memory; \ - unsigned int NCR53c7x0_address_io; \ - int NCR53c7x0_memory_mapped - -#define NCR53c7x0_local_setup(host) \ - NCR53c7x0_address_memory = (void *) (host)->base; \ - NCR53c7x0_address_io = (unsigned int) (host)->io_port; \ - NCR53c7x0_memory_mapped = ((struct NCR53c7x0_hostdata *) \ - host->hostdata[0])-> options & OPTION_MEMORY_MAPPED - -#ifdef BIG_ENDIAN -/* These could be more efficient, given that we are always memory mapped, - * but they don't give the same problems as the write macros, so leave - * them. */ -#ifdef __mc68000__ -#define NCR53c7x0_read8(address) \ - ((unsigned int)raw_inb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) ) - -#define NCR53c7x0_read16(address) \ - ((unsigned int)raw_inw((u32)NCR53c7x0_address_memory + ((u32)(address)^2))) -#else -#define NCR53c7x0_read8(address) \ - (NCR53c7x0_memory_mapped ? \ - (unsigned int)readb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) : \ - inb(NCR53c7x0_address_io + (address))) - -#define NCR53c7x0_read16(address) \ - (NCR53c7x0_memory_mapped ? \ - (unsigned int)readw((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) : \ - inw(NCR53c7x0_address_io + (address))) -#endif /* mc68000 */ -#else -#define NCR53c7x0_read8(address) \ - (NCR53c7x0_memory_mapped ? \ - (unsigned int)readb((u32)NCR53c7x0_address_memory + (u32)(address)) : \ - inb(NCR53c7x0_address_io + (address))) - -#define NCR53c7x0_read16(address) \ - (NCR53c7x0_memory_mapped ? \ - (unsigned int)readw((u32)NCR53c7x0_address_memory + (u32)(address)) : \ - inw(NCR53c7x0_address_io + (address))) -#endif - -#ifdef __mc68000__ -#define NCR53c7x0_read32(address) \ - ((unsigned int) raw_inl((u32)NCR53c7x0_address_memory + (u32)(address))) -#else -#define NCR53c7x0_read32(address) \ - (NCR53c7x0_memory_mapped ? \ - (unsigned int) readl((u32)NCR53c7x0_address_memory + (u32)(address)) : \ - inl(NCR53c7x0_address_io + (address))) -#endif /* mc68000*/ - -#ifdef BIG_ENDIAN -/* If we are big-endian, then we are not Intel, so probably don't have - * an i/o map as well as a memory map. So, let's assume memory mapped. - * Also, I am having terrible problems trying to persuade the compiler - * not to lay down code which does a read after write for these macros. - * If you remove 'volatile' from writeb() and friends it is ok.... - */ - -#define NCR53c7x0_write8(address,value) \ - *(volatile unsigned char *) \ - ((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) = (value) - -#define NCR53c7x0_write16(address,value) \ - *(volatile unsigned short *) \ - ((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) = (value) - -#define NCR53c7x0_write32(address,value) \ - *(volatile unsigned long *) \ - ((u32)NCR53c7x0_address_memory + ((u32)(address))) = (value) - -#else - -#define NCR53c7x0_write8(address,value) \ - (NCR53c7x0_memory_mapped ? \ - ({writeb((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \ - outb((value), NCR53c7x0_address_io + (address))) - -#define NCR53c7x0_write16(address,value) \ - (NCR53c7x0_memory_mapped ? \ - ({writew((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \ - outw((value), NCR53c7x0_address_io + (address))) - -#define NCR53c7x0_write32(address,value) \ - (NCR53c7x0_memory_mapped ? \ - ({writel((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \ - outl((value), NCR53c7x0_address_io + (address))) - -#endif - -/* Patch arbitrary 32 bit words in the script */ -#define patch_abs_32(script, offset, symbol, value) \ - for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \ - (u32)); ++i) { \ - (script)[A_##symbol##_used[i] - (offset)] += (value); \ - if (hostdata->options & OPTION_DEBUG_FIXUP) \ - printk("scsi%d : %s reference %d at 0x%x in %s is now 0x%x\n",\ - host->host_no, #symbol, i, A_##symbol##_used[i] - \ - (int)(offset), #script, (script)[A_##symbol##_used[i] - \ - (offset)]); \ - } - -/* Patch read/write instruction immediate field */ -#define patch_abs_rwri_data(script, offset, symbol, value) \ - for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \ - (u32)); ++i) \ - (script)[A_##symbol##_used[i] - (offset)] = \ - ((script)[A_##symbol##_used[i] - (offset)] & \ - ~DBC_RWRI_IMMEDIATE_MASK) | \ - (((value) << DBC_RWRI_IMMEDIATE_SHIFT) & \ - DBC_RWRI_IMMEDIATE_MASK) - -/* Patch transfer control instruction data field */ -#define patch_abs_tci_data(script, offset, symbol, value) \ - for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \ - (u32)); ++i) \ - (script)[A_##symbol##_used[i] - (offset)] = \ - ((script)[A_##symbol##_used[i] - (offset)] & \ - ~DBC_TCI_DATA_MASK) | \ - (((value) << DBC_TCI_DATA_SHIFT) & \ - DBC_TCI_DATA_MASK) - -/* Patch field in dsa structure (assignment should be +=?) */ -#define patch_dsa_32(dsa, symbol, word, value) \ - { \ - (dsa)[(hostdata->##symbol - hostdata->dsa_start) / sizeof(u32) \ - + (word)] = (value); \ - if (hostdata->options & OPTION_DEBUG_DSA) \ - printk("scsi : dsa %s symbol %s(%d) word %d now 0x%x\n", \ - #dsa, #symbol, hostdata->##symbol, \ - (word), (u32) (value)); \ - } - -/* Paranoid people could use panic() here. */ -#define FATAL(host) shutdown((host)); - -extern int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip, - unsigned long base, int io_port, int irq, int dma, - long long options, int clock); - -#endif /* NCR53c710_C */ -#endif /* NCR53c710_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.scr linux-2.6.22-591/drivers/scsi/53c7xx.scr --- linux-2.6.22-570/drivers/scsi/53c7xx.scr 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c7xx.scr 1969-12-31 19:00:00.000000000 -0500 @@ -1,1591 +0,0 @@ -#undef DEBUG -#undef EVENTS -#undef NO_SELECTION_TIMEOUT -#define BIG_ENDIAN - -; 53c710 driver. Modified from Drew Eckhardts driver -; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] -; -; I have left the script for the 53c8xx family in here, as it is likely -; to be useful to see what I changed when bug hunting. - -; NCR 53c810 driver, main script -; Sponsored by -; iX Multiuser Multitasking Magazine -; hm@ix.de -; -; Copyright 1993, 1994, 1995 Drew Eckhardt -; Visionary Computing -; (Unix and Linux consulting and custom programming) -; drew@PoohSticks.ORG -; +1 (303) 786-7975 -; -; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. -; -; PRE-ALPHA -; -; For more information, please consult -; -; NCR 53C810 -; PCI-SCSI I/O Processor -; Data Manual -; -; NCR 53C710 -; SCSI I/O Processor -; Programmers Guide -; -; NCR Microelectronics -; 1635 Aeroplaza Drive -; Colorado Springs, CO 80916 -; 1+ (719) 578-3400 -; -; Toll free literature number -; +1 (800) 334-5454 -; -; IMPORTANT : This code is self modifying due to the limitations of -; the NCR53c7,8xx series chips. Persons debugging this code with -; the remote debugger should take this into account, and NOT set -; breakpoints in modified instructions. -; -; Design: -; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard -; microcontroller using a simple instruction set. -; -; So, to minimize the effects of interrupt latency, and to maximize -; throughput, this driver offloads the practical maximum amount -; of processing to the SCSI chip while still maintaining a common -; structure. -; -; Where tradeoffs were needed between efficiency on the older -; chips and the newer NCR53c800 series, the NCR53c800 series -; was chosen. -; -; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully -; automate SCSI transfers without host processor intervention, this -; isn't the case with the NCR53c710 and newer chips which allow -; -; - reads and writes to the internal registers from within the SCSI -; scripts, allowing the SCSI SCRIPTS(tm) code to save processor -; state so that multiple threads of execution are possible, and also -; provide an ALU for loop control, etc. -; -; - table indirect addressing for some instructions. This allows -; pointers to be located relative to the DSA ((Data Structure -; Address) register. -; -; These features make it possible to implement a mailbox style interface, -; where the same piece of code is run to handle I/O for multiple threads -; at once minimizing our need to relocate code. Since the NCR53c700/ -; NCR53c800 series have a unique combination of features, making a -; a standard ingoing/outgoing mailbox system, costly, I've modified it. -; -; - Mailboxes are a mixture of code and data. This lets us greatly -; simplify the NCR53c810 code and do things that would otherwise -; not be possible. -; -; The saved data pointer is now implemented as follows : -; -; Control flow has been architected such that if control reaches -; munge_save_data_pointer, on a restore pointers message or -; reconnection, a jump to the address formerly in the TEMP register -; will allow the SCSI command to resume execution. -; - -; -; Note : the DSA structures must be aligned on 32 bit boundaries, -; since the source and destination of MOVE MEMORY instructions -; must share the same alignment and this is the alignment of the -; NCR registers. -; - -; For some systems (MVME166, for example) dmode is always the same, so don't -; waste time writing it - -#if 1 -#define DMODE_MEMORY_TO_NCR -#define DMODE_MEMORY_TO_MEMORY -#define DMODE_NCR_TO_MEMORY -#else -#define DMODE_MEMORY_TO_NCR MOVE dmode_memory_to_ncr TO DMODE -#define DMODE_MEMORY_TO_MEMORY MOVE dmode_memory_to_memory TO DMODE -#define DMODE_NCR_TO_MEMORY MOVE dmode_ncr_to_memory TO DMODE -#endif - -ABSOLUTE dsa_temp_lun = 0 ; Patch to lun for current dsa -ABSOLUTE dsa_temp_next = 0 ; Patch to dsa next for current dsa -ABSOLUTE dsa_temp_addr_next = 0 ; Patch to address of dsa next address - ; for current dsa -ABSOLUTE dsa_temp_sync = 0 ; Patch to address of per-target - ; sync routine -ABSOLUTE dsa_sscf_710 = 0 ; Patch to address of per-target - ; sscf value (53c710) -ABSOLUTE dsa_temp_target = 0 ; Patch to id for current dsa -ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command - ; saved data pointer -ABSOLUTE dsa_temp_addr_residual = 0 ; Patch to address of per-command - ; current residual code -ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command - ; saved residual code -ABSOLUTE dsa_temp_addr_new_value = 0 ; Address of value for JUMP operand -ABSOLUTE dsa_temp_addr_array_value = 0 ; Address to copy to -ABSOLUTE dsa_temp_addr_dsa_value = 0 ; Address of this DSA value - -; -; Once a device has initiated reselection, we need to compare it -; against the singly linked list of commands which have disconnected -; and are pending reselection. These commands are maintained in -; an unordered singly linked list of DSA structures, through the -; DSA pointers at their 'centers' headed by the reconnect_dsa_head -; pointer. -; -; To avoid complications in removing commands from the list, -; I minimize the amount of expensive (at eight operations per -; addition @ 500-600ns each) pointer operations which must -; be done in the NCR driver by precomputing them on the -; host processor during dsa structure generation. -; -; The fixed-up per DSA code knows how to recognize the nexus -; associated with the corresponding SCSI command, and modifies -; the source and destination pointers for the MOVE MEMORY -; instruction which is executed when reselected_ok is called -; to remove the command from the list. Similarly, DSA is -; loaded with the address of the next DSA structure and -; reselected_check_next is called if a failure occurs. -; -; Perhaps more concisely, the net effect of the mess is -; -; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head, -; src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) { -; src = &dsa->next; -; if (target_id == dsa->id && target_lun == dsa->lun) { -; *dest = *src; -; break; -; } -; } -; -; if (!dsa) -; error (int_err_unexpected_reselect); -; else -; longjmp (dsa->jump_resume, 0); -; -; - -#if (CHIP != 700) && (CHIP != 70066) -; Define DSA structure used for mailboxes -ENTRY dsa_code_template -dsa_code_template: -ENTRY dsa_code_begin -dsa_code_begin: -; RGH: Don't care about TEMP and DSA here - DMODE_MEMORY_TO_NCR - MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch - DMODE_MEMORY_TO_MEMORY -#if (CHIP == 710) - MOVE MEMORY 4, addr_scratch, saved_dsa - ; We are about to go and select the device, so must set SSCF bits - MOVE MEMORY 4, dsa_sscf_710, addr_scratch -#ifdef BIG_ENDIAN - MOVE SCRATCH3 TO SFBR -#else - MOVE SCRATCH0 TO SFBR -#endif - MOVE SFBR TO SBCL - MOVE MEMORY 4, saved_dsa, addr_dsa -#else - CALL scratch_to_dsa -#endif - CALL select -; Handle the phase mismatch which may have resulted from the -; MOVE FROM dsa_msgout if we returned here. The CLEAR ATN -; may or may not be necessary, and we should update script_asm.pl -; to handle multiple pieces. - CLEAR ATN - CLEAR ACK - -; Replace second operand with address of JUMP instruction dest operand -; in schedule table for this DSA. Becomes dsa_jump_dest in 53c7,8xx.c. -ENTRY dsa_code_fix_jump -dsa_code_fix_jump: - MOVE MEMORY 4, NOP_insn, 0 - JUMP select_done - -; wrong_dsa loads the DSA register with the value of the dsa_next -; field. -; -wrong_dsa: -#if (CHIP == 710) -; NOTE DSA is corrupt when we arrive here! -#endif -; Patch the MOVE MEMORY INSTRUCTION such that -; the destination address is the address of the OLD -; next pointer. -; - MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8 - DMODE_MEMORY_TO_NCR -; -; Move the _contents_ of the next pointer into the DSA register as -; the next I_T_L or I_T_L_Q tupple to check against the established -; nexus. -; - MOVE MEMORY 4, dsa_temp_next, addr_scratch - DMODE_MEMORY_TO_MEMORY -#if (CHIP == 710) - MOVE MEMORY 4, addr_scratch, saved_dsa - MOVE MEMORY 4, saved_dsa, addr_dsa -#else - CALL scratch_to_dsa -#endif - JUMP reselected_check_next - -ABSOLUTE dsa_save_data_pointer = 0 -ENTRY dsa_code_save_data_pointer -dsa_code_save_data_pointer: -#if (CHIP == 710) - ; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt - ; We MUST return with DSA correct - MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer -; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h - MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual - CLEAR ACK -#ifdef DEBUG - INT int_debug_saved -#endif - MOVE MEMORY 4, saved_dsa, addr_dsa - JUMP jump_temp -#else - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_temp, dsa_temp_addr_saved_pointer - DMODE_MEMORY_TO_MEMORY -; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h - MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual - CLEAR ACK -#ifdef DEBUG - INT int_debug_saved -#endif - RETURN -#endif -ABSOLUTE dsa_restore_pointers = 0 -ENTRY dsa_code_restore_pointers -dsa_code_restore_pointers: -#if (CHIP == 710) - ; TEMP and DSA are corrupt when we get here, but who cares! - MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4 -; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h - MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual - CLEAR ACK - ; Restore DSA, note we don't care about TEMP - MOVE MEMORY 4, saved_dsa, addr_dsa -#ifdef DEBUG - INT int_debug_restored -#endif - JUMP jump_temp -#else - DMODE_MEMORY_TO_NCR - MOVE MEMORY 4, dsa_temp_addr_saved_pointer, addr_temp - DMODE_MEMORY_TO_MEMORY -; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h - MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual - CLEAR ACK -#ifdef DEBUG - INT int_debug_restored -#endif - RETURN -#endif - -ABSOLUTE dsa_check_reselect = 0 -; dsa_check_reselect determines whether or not the current target and -; lun match the current DSA -ENTRY dsa_code_check_reselect -dsa_code_check_reselect: -#if (CHIP == 710) - /* Arrives here with DSA correct */ - /* Assumes we are always ID 7 */ - MOVE LCRC TO SFBR ; LCRC has our ID and his ID bits set - JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80 -#else - MOVE SSID TO SFBR ; SSID contains 3 bit target ID -; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips - JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0xf8 -#endif -; -; Hack - move to scratch first, since SFBR is not writeable -; via the CPU and hence a MOVE MEMORY instruction. -; - DMODE_MEMORY_TO_NCR - MOVE MEMORY 1, reselected_identify, addr_scratch - DMODE_MEMORY_TO_MEMORY -#ifdef BIG_ENDIAN - ; BIG ENDIAN ON MVME16x - MOVE SCRATCH3 TO SFBR -#else - MOVE SCRATCH0 TO SFBR -#endif -; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips -; Are you sure about that? richard@sleepie.demon.co.uk - JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8 -; Patch the MOVE MEMORY INSTRUCTION such that -; the source address is the address of this dsa's -; next pointer. - MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4 - CALL reselected_ok -#if (CHIP == 710) -; Restore DSA following memory moves in reselected_ok -; dsa_temp_sync doesn't really care about DSA, but it has an -; optional debug INT so a valid DSA is a good idea. - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif - CALL dsa_temp_sync -; Release ACK on the IDENTIFY message _after_ we've set the synchronous -; transfer parameters! - CLEAR ACK -; Implicitly restore pointers on reselection, so a RETURN -; will transfer control back to the right spot. - CALL REL (dsa_code_restore_pointers) - RETURN -ENTRY dsa_zero -dsa_zero: -ENTRY dsa_code_template_end -dsa_code_template_end: - -; Perform sanity check for dsa_fields_start == dsa_code_template_end - -; dsa_zero, puke. - -ABSOLUTE dsa_fields_start = 0 ; Sanity marker - ; pad 48 bytes (fix this RSN) -ABSOLUTE dsa_next = 48 ; len 4 Next DSA - ; del 4 Previous DSA address -ABSOLUTE dsa_cmnd = 56 ; len 4 Scsi_Cmnd * for this thread. -ABSOLUTE dsa_select = 60 ; len 4 Device ID, Period, Offset for - ; table indirect select -ABSOLUTE dsa_msgout = 64 ; len 8 table indirect move parameter for - ; select message -ABSOLUTE dsa_cmdout = 72 ; len 8 table indirect move parameter for - ; command -ABSOLUTE dsa_dataout = 80 ; len 4 code pointer for dataout -ABSOLUTE dsa_datain = 84 ; len 4 code pointer for datain -ABSOLUTE dsa_msgin = 88 ; len 8 table indirect move for msgin -ABSOLUTE dsa_status = 96 ; len 8 table indirect move for status byte -ABSOLUTE dsa_msgout_other = 104 ; len 8 table indirect for normal message out - ; (Synchronous transfer negotiation, etc). -ABSOLUTE dsa_end = 112 - -ABSOLUTE schedule = 0 ; Array of JUMP dsa_begin or JUMP (next), - ; terminated by a call to JUMP wait_reselect - -; Linked lists of DSA structures -ABSOLUTE reconnect_dsa_head = 0 ; Link list of DSAs which can reconnect -ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing - ; address of reconnect_dsa_head - -; These select the source and destination of a MOVE MEMORY instruction -ABSOLUTE dmode_memory_to_memory = 0x0 -ABSOLUTE dmode_memory_to_ncr = 0x0 -ABSOLUTE dmode_ncr_to_memory = 0x0 - -ABSOLUTE addr_scratch = 0x0 -ABSOLUTE addr_temp = 0x0 -#if (CHIP == 710) -ABSOLUTE saved_dsa = 0x0 -ABSOLUTE emulfly = 0x0 -ABSOLUTE addr_dsa = 0x0 -#endif -#endif /* CHIP != 700 && CHIP != 70066 */ - -; Interrupts - -; MSB indicates type -; 0 handle error condition -; 1 handle message -; 2 handle normal condition -; 3 debugging interrupt -; 4 testing interrupt -; Next byte indicates specific error - -; XXX not yet implemented, I'm not sure if I want to - -; Next byte indicates the routine the error occurred in -; The LSB indicates the specific place the error occurred - -ABSOLUTE int_err_unexpected_phase = 0x00000000 ; Unexpected phase encountered -ABSOLUTE int_err_selected = 0x00010000 ; SELECTED (nee RESELECTED) -ABSOLUTE int_err_unexpected_reselect = 0x00020000 -ABSOLUTE int_err_check_condition = 0x00030000 -ABSOLUTE int_err_no_phase = 0x00040000 -ABSOLUTE int_msg_wdtr = 0x01000000 ; WDTR message received -ABSOLUTE int_msg_sdtr = 0x01010000 ; SDTR received -ABSOLUTE int_msg_1 = 0x01020000 ; single byte special message - ; received - -ABSOLUTE int_norm_select_complete = 0x02000000 ; Select complete, reprogram - ; registers. -ABSOLUTE int_norm_reselect_complete = 0x02010000 ; Nexus established -ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete -ABSOLUTE int_norm_disconnected = 0x02030000 ; Disconnected -ABSOLUTE int_norm_aborted =0x02040000 ; Aborted *dsa -ABSOLUTE int_norm_reset = 0x02050000 ; Generated BUS reset. -ABSOLUTE int_norm_emulateintfly = 0x02060000 ; 53C710 Emulated intfly -ABSOLUTE int_debug_break = 0x03000000 ; Break point -#ifdef DEBUG -ABSOLUTE int_debug_scheduled = 0x03010000 ; new I/O scheduled -ABSOLUTE int_debug_idle = 0x03020000 ; scheduler is idle -ABSOLUTE int_debug_dsa_loaded = 0x03030000 ; dsa reloaded -ABSOLUTE int_debug_reselected = 0x03040000 ; NCR reselected -ABSOLUTE int_debug_head = 0x03050000 ; issue head overwritten -ABSOLUTE int_debug_disconnected = 0x03060000 ; disconnected -ABSOLUTE int_debug_disconnect_msg = 0x03070000 ; got message to disconnect -ABSOLUTE int_debug_dsa_schedule = 0x03080000 ; in dsa_schedule -ABSOLUTE int_debug_reselect_check = 0x03090000 ; Check for reselection of DSA -ABSOLUTE int_debug_reselected_ok = 0x030a0000 ; Reselection accepted -#endif -ABSOLUTE int_debug_panic = 0x030b0000 ; Panic driver -#ifdef DEBUG -ABSOLUTE int_debug_saved = 0x030c0000 ; save/restore pointers -ABSOLUTE int_debug_restored = 0x030d0000 -ABSOLUTE int_debug_sync = 0x030e0000 ; Sanity check synchronous - ; parameters. -ABSOLUTE int_debug_datain = 0x030f0000 ; going into data in phase - ; now. -ABSOLUTE int_debug_check_dsa = 0x03100000 ; Sanity check DSA against - ; SDID. -#endif - -ABSOLUTE int_test_1 = 0x04000000 ; Test 1 complete -ABSOLUTE int_test_2 = 0x04010000 ; Test 2 complete -ABSOLUTE int_test_3 = 0x04020000 ; Test 3 complete - - -; These should start with 0x05000000, with low bits incrementing for -; each one. - -#ifdef EVENTS -ABSOLUTE int_EVENT_SELECT = 0 -ABSOLUTE int_EVENT_DISCONNECT = 0 -ABSOLUTE int_EVENT_RESELECT = 0 -ABSOLUTE int_EVENT_COMPLETE = 0 -ABSOLUTE int_EVENT_IDLE = 0 -ABSOLUTE int_EVENT_SELECT_FAILED = 0 -ABSOLUTE int_EVENT_BEFORE_SELECT = 0 -ABSOLUTE int_EVENT_RESELECT_FAILED = 0 -#endif - -ABSOLUTE NCR53c7xx_msg_abort = 0 ; Pointer to abort message -ABSOLUTE NCR53c7xx_msg_reject = 0 ; Pointer to reject message -ABSOLUTE NCR53c7xx_zero = 0 ; long with zero in it, use for source -ABSOLUTE NCR53c7xx_sink = 0 ; long to dump worthless data in -ABSOLUTE NOP_insn = 0 ; NOP instruction - -; Pointer to message, potentially multi-byte -ABSOLUTE msg_buf = 0 - -; Pointer to holding area for reselection information -ABSOLUTE reselected_identify = 0 -ABSOLUTE reselected_tag = 0 - -; Request sense command pointer, it's a 6 byte command, should -; be constant for all commands since we always want 16 bytes of -; sense and we don't need to change any fields as we did under -; SCSI-I when we actually cared about the LUN field. -;EXTERNAL NCR53c7xx_sense ; Request sense command - -#if (CHIP != 700) && (CHIP != 70066) -; dsa_schedule -; PURPOSE : after a DISCONNECT message has been received, and pointers -; saved, insert the current DSA structure at the head of the -; disconnected queue and fall through to the scheduler. -; -; CALLS : OK -; -; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list -; of disconnected commands -; -; MODIFIES : SCRATCH, reconnect_dsa_head -; -; EXITS : always passes control to schedule - -ENTRY dsa_schedule -dsa_schedule: -#ifdef DEBUG - INT int_debug_dsa_schedule -#endif - -; -; Calculate the address of the next pointer within the DSA -; structure of the command that is currently disconnecting -; -#if (CHIP == 710) - ; Read what should be the current DSA from memory - actual DSA - ; register is probably corrupt - MOVE MEMORY 4, saved_dsa, addr_scratch -#else - CALL dsa_to_scratch -#endif - MOVE SCRATCH0 + dsa_next TO SCRATCH0 - MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY - MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY - MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY - -; Point the next field of this DSA structure at the current disconnected -; list - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8 - DMODE_MEMORY_TO_MEMORY -dsa_schedule_insert: - MOVE MEMORY 4, reconnect_dsa_head, 0 - -; And update the head pointer. -#if (CHIP == 710) - ; Read what should be the current DSA from memory - actual DSA - ; register is probably corrupt - MOVE MEMORY 4, saved_dsa, addr_scratch -#else - CALL dsa_to_scratch -#endif - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, reconnect_dsa_head - DMODE_MEMORY_TO_MEMORY -/* Temporarily, see what happens. */ -#ifndef ORIGINAL -#if (CHIP != 710) - MOVE SCNTL2 & 0x7f TO SCNTL2 -#endif - CLEAR ACK -#endif -#if (CHIP == 710) - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif - WAIT DISCONNECT -#ifdef EVENTS - INT int_EVENT_DISCONNECT; -#endif -#ifdef DEBUG - INT int_debug_disconnected -#endif - JUMP schedule -#endif - -; -; select -; -; PURPOSE : establish a nexus for the SCSI command referenced by DSA. -; On success, the current DSA structure is removed from the issue -; queue. Usually, this is entered as a fall-through from schedule, -; although the contingent allegiance handling code will write -; the select entry address to the DSP to restart a command as a -; REQUEST SENSE. A message is sent (usually IDENTIFY, although -; additional SDTR or WDTR messages may be sent). COMMAND OUT -; is handled. -; -; INPUTS : DSA - SCSI command, issue_dsa_head -; -; CALLS : NOT OK -; -; MODIFIES : SCRATCH, issue_dsa_head -; -; EXITS : on reselection or selection, go to select_failed -; otherwise, RETURN so control is passed back to -; dsa_begin. -; - -ENTRY select -select: - -#ifdef EVENTS - INT int_EVENT_BEFORE_SELECT -#endif - -#ifdef DEBUG - INT int_debug_scheduled -#endif - CLEAR TARGET - -; XXX -; -; In effect, SELECTION operations are backgrounded, with execution -; continuing until code which waits for REQ or a fatal interrupt is -; encountered. -; -; So, for more performance, we could overlap the code which removes -; the command from the NCRs issue queue with the selection, but -; at this point I don't want to deal with the error recovery. -; - -#if (CHIP != 700) && (CHIP != 70066) -#if (CHIP == 710) - ; Enable selection timer -#ifdef NO_SELECTION_TIMEOUT - MOVE CTEST7 & 0xff TO CTEST7 -#else - MOVE CTEST7 & 0xef TO CTEST7 -#endif -#endif - SELECT ATN FROM dsa_select, select_failed - JUMP select_msgout, WHEN MSG_OUT -ENTRY select_msgout -select_msgout: -#if (CHIP == 710) - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 -#endif - MOVE FROM dsa_msgout, WHEN MSG_OUT -#else -ENTRY select_msgout - SELECT ATN 0, select_failed -select_msgout: - MOVE 0, 0, WHEN MSGOUT -#endif - -#ifdef EVENTS - INT int_EVENT_SELECT -#endif - RETURN - -; -; select_done -; -; PURPOSE: continue on to normal data transfer; called as the exit -; point from dsa_begin. -; -; INPUTS: dsa -; -; CALLS: OK -; -; - -select_done: -#if (CHIP == 710) -; NOTE DSA is corrupt when we arrive here! - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif - -#ifdef DEBUG -ENTRY select_check_dsa -select_check_dsa: - INT int_debug_check_dsa -#endif - -; After a successful selection, we should get either a CMD phase or -; some transfer request negotiation message. - - JUMP cmdout, WHEN CMD - INT int_err_unexpected_phase, WHEN NOT MSG_IN - -select_msg_in: - CALL msg_in, WHEN MSG_IN - JUMP select_msg_in, WHEN MSG_IN - -cmdout: - INT int_err_unexpected_phase, WHEN NOT CMD -#if (CHIP == 700) - INT int_norm_selected -#endif -ENTRY cmdout_cmdout -cmdout_cmdout: -#if (CHIP != 700) && (CHIP != 70066) - MOVE FROM dsa_cmdout, WHEN CMD -#else - MOVE 0, 0, WHEN CMD -#endif /* (CHIP != 700) && (CHIP != 70066) */ - -; -; data_transfer -; other_out -; other_in -; other_transfer -; -; PURPOSE : handle the main data transfer for a SCSI command in -; several parts. In the first part, data_transfer, DATA_IN -; and DATA_OUT phases are allowed, with the user provided -; code (usually dynamically generated based on the scatter/gather -; list associated with a SCSI command) called to handle these -; phases. -; -; After control has passed to one of the user provided -; DATA_IN or DATA_OUT routines, back calls are made to -; other_transfer_in or other_transfer_out to handle non-DATA IN -; and DATA OUT phases respectively, with the state of the active -; data pointer being preserved in TEMP. -; -; On completion, the user code passes control to other_transfer -; which causes DATA_IN and DATA_OUT to result in unexpected_phase -; interrupts so that data overruns may be trapped. -; -; INPUTS : DSA - SCSI command -; -; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in -; other_transfer -; -; MODIFIES : SCRATCH -; -; EXITS : if STATUS IN is detected, signifying command completion, -; the NCR jumps to command_complete. If MSG IN occurs, a -; CALL is made to msg_in. Otherwise, other_transfer runs in -; an infinite loop. -; - -ENTRY data_transfer -data_transfer: - JUMP cmdout_cmdout, WHEN CMD - CALL msg_in, WHEN MSG_IN - INT int_err_unexpected_phase, WHEN MSG_OUT - JUMP do_dataout, WHEN DATA_OUT - JUMP do_datain, WHEN DATA_IN - JUMP command_complete, WHEN STATUS - JUMP data_transfer -ENTRY end_data_transfer -end_data_transfer: - -; -; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain -; should be fixed up whenever the nexus changes so it can point to the -; correct routine for that command. -; - -#if (CHIP != 700) && (CHIP != 70066) -; Nasty jump to dsa->dataout -do_dataout: -#if (CHIP == 710) - MOVE MEMORY 4, saved_dsa, addr_scratch -#else - CALL dsa_to_scratch -#endif - MOVE SCRATCH0 + dsa_dataout TO SCRATCH0 - MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY - MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY - MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4 - DMODE_MEMORY_TO_MEMORY -dataout_to_jump: - MOVE MEMORY 4, 0, dataout_jump + 4 -#if (CHIP == 710) - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif -dataout_jump: - JUMP 0 - -; Nasty jump to dsa->dsain -do_datain: -#if (CHIP == 710) - MOVE MEMORY 4, saved_dsa, addr_scratch -#else - CALL dsa_to_scratch -#endif - MOVE SCRATCH0 + dsa_datain TO SCRATCH0 - MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY - MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY - MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, datain_to_jump + 4 - DMODE_MEMORY_TO_MEMORY -ENTRY datain_to_jump -datain_to_jump: - MOVE MEMORY 4, 0, datain_jump + 4 -#if (CHIP == 710) - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif -#ifdef DEBUG - INT int_debug_datain -#endif -datain_jump: - JUMP 0 -#endif /* (CHIP != 700) && (CHIP != 70066) */ - - -; Note that other_out and other_in loop until a non-data phase -; is discovered, so we only execute return statements when we -; can go on to the next data phase block move statement. - -ENTRY other_out -other_out: -#if 0 - INT 0x03ffdead -#endif - INT int_err_unexpected_phase, WHEN CMD - JUMP msg_in_restart, WHEN MSG_IN - INT int_err_unexpected_phase, WHEN MSG_OUT - INT int_err_unexpected_phase, WHEN DATA_IN - JUMP command_complete, WHEN STATUS - JUMP other_out, WHEN NOT DATA_OUT -#if (CHIP == 710) -; TEMP should be OK, as we got here from a call in the user dataout code. -#endif - RETURN - -ENTRY other_in -other_in: -#if 0 - INT 0x03ffdead -#endif - INT int_err_unexpected_phase, WHEN CMD - JUMP msg_in_restart, WHEN MSG_IN - INT int_err_unexpected_phase, WHEN MSG_OUT - INT int_err_unexpected_phase, WHEN DATA_OUT - JUMP command_complete, WHEN STATUS - JUMP other_in, WHEN NOT DATA_IN -#if (CHIP == 710) -; TEMP should be OK, as we got here from a call in the user datain code. -#endif - RETURN - - -ENTRY other_transfer -other_transfer: - INT int_err_unexpected_phase, WHEN CMD - CALL msg_in, WHEN MSG_IN - INT int_err_unexpected_phase, WHEN MSG_OUT - INT int_err_unexpected_phase, WHEN DATA_OUT - INT int_err_unexpected_phase, WHEN DATA_IN - JUMP command_complete, WHEN STATUS - JUMP other_transfer - -; -; msg_in_restart -; msg_in -; munge_msg -; -; PURPOSE : process messages from a target. msg_in is called when the -; caller hasn't read the first byte of the message. munge_message -; is called when the caller has read the first byte of the message, -; and left it in SFBR. msg_in_restart is called when the caller -; hasn't read the first byte of the message, and wishes RETURN -; to transfer control back to the address of the conditional -; CALL instruction rather than to the instruction after it. -; -; Various int_* interrupts are generated when the host system -; needs to intervene, as is the case with SDTR, WDTR, and -; INITIATE RECOVERY messages. -; -; When the host system handles one of these interrupts, -; it can respond by reentering at reject_message, -; which rejects the message and returns control to -; the caller of msg_in or munge_msg, accept_message -; which clears ACK and returns control, or reply_message -; which sends the message pointed to by the DSA -; msgout_other table indirect field. -; -; DISCONNECT messages are handled by moving the command -; to the reconnect_dsa_queue. -#if (CHIP == 710) -; NOTE: DSA should be valid when we get here - we cannot save both it -; and TEMP in this routine. -#endif -; -; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg -; only) -; -; CALLS : NO. The TEMP register isn't backed up to allow nested calls. -; -; MODIFIES : SCRATCH, DSA on DISCONNECT -; -; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS, -; and normal return from message handlers running under -; Linux, control is returned to the caller. Receipt -; of DISCONNECT messages pass control to dsa_schedule. -; -ENTRY msg_in_restart -msg_in_restart: -; XXX - hackish -; -; Since it's easier to debug changes to the statically -; compiled code, rather than the dynamically generated -; stuff, such as -; -; MOVE x, y, WHEN data_phase -; CALL other_z, WHEN NOT data_phase -; MOVE x, y, WHEN data_phase -; -; I'd like to have certain routines (notably the message handler) -; restart on the conditional call rather than the next instruction. -; -; So, subtract 8 from the return address - - MOVE TEMP0 + 0xf8 TO TEMP0 - MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY - MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY - MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY - -ENTRY msg_in -msg_in: - MOVE 1, msg_buf, WHEN MSG_IN - -munge_msg: - JUMP munge_extended, IF 0x01 ; EXTENDED MESSAGE - JUMP munge_2, IF 0x20, AND MASK 0xdf ; two byte message -; -; XXX - I've seen a handful of broken SCSI devices which fail to issue -; a SAVE POINTERS message before disconnecting in the middle of -; a transfer, assuming that the DATA POINTER will be implicitly -; restored. -; -; Historically, I've often done an implicit save when the DISCONNECT -; message is processed. We may want to consider having the option of -; doing that here. -; - JUMP munge_save_data_pointer, IF 0x02 ; SAVE DATA POINTER - JUMP munge_restore_pointers, IF 0x03 ; RESTORE POINTERS - JUMP munge_disconnect, IF 0x04 ; DISCONNECT - INT int_msg_1, IF 0x07 ; MESSAGE REJECT - INT int_msg_1, IF 0x0f ; INITIATE RECOVERY -#ifdef EVENTS - INT int_EVENT_SELECT_FAILED -#endif - JUMP reject_message - -munge_2: - JUMP reject_message -; -; The SCSI standard allows targets to recover from transient -; error conditions by backing up the data pointer with a -; RESTORE POINTERS message. -; -; So, we must save and restore the _residual_ code as well as -; the current instruction pointer. Because of this messiness, -; it is simpler to put dynamic code in the dsa for this and to -; just do a simple jump down there. -; - -munge_save_data_pointer: -#if (CHIP == 710) - ; We have something in TEMP here, so first we must save that - MOVE TEMP0 TO SFBR - MOVE SFBR TO SCRATCH0 - MOVE TEMP1 TO SFBR - MOVE SFBR TO SCRATCH1 - MOVE TEMP2 TO SFBR - MOVE SFBR TO SCRATCH2 - MOVE TEMP3 TO SFBR - MOVE SFBR TO SCRATCH3 - MOVE MEMORY 4, addr_scratch, jump_temp + 4 - ; Now restore DSA - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif - MOVE DSA0 + dsa_save_data_pointer TO SFBR - MOVE SFBR TO SCRATCH0 - MOVE DSA1 + 0xff TO SFBR WITH CARRY - MOVE SFBR TO SCRATCH1 - MOVE DSA2 + 0xff TO SFBR WITH CARRY - MOVE SFBR TO SCRATCH2 - MOVE DSA3 + 0xff TO SFBR WITH CARRY - MOVE SFBR TO SCRATCH3 - - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4 - DMODE_MEMORY_TO_MEMORY -jump_dsa_save: - JUMP 0 - -munge_restore_pointers: -#if (CHIP == 710) - ; The code at dsa_restore_pointers will RETURN, but we don't care - ; about TEMP here, as it will overwrite it anyway. -#endif - MOVE DSA0 + dsa_restore_pointers TO SFBR - MOVE SFBR TO SCRATCH0 - MOVE DSA1 + 0xff TO SFBR WITH CARRY - MOVE SFBR TO SCRATCH1 - MOVE DSA2 + 0xff TO SFBR WITH CARRY - MOVE SFBR TO SCRATCH2 - MOVE DSA3 + 0xff TO SFBR WITH CARRY - MOVE SFBR TO SCRATCH3 - - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4 - DMODE_MEMORY_TO_MEMORY -jump_dsa_restore: - JUMP 0 - - -munge_disconnect: -#ifdef DEBUG - INT int_debug_disconnect_msg -#endif - -/* - * Before, we overlapped processing with waiting for disconnect, but - * debugging was beginning to appear messy. Temporarily move things - * to just before the WAIT DISCONNECT. - */ - -#ifdef ORIGINAL -#if (CHIP == 710) -; Following clears Unexpected Disconnect bit. What do we do? -#else - MOVE SCNTL2 & 0x7f TO SCNTL2 -#endif - CLEAR ACK -#endif - -#if (CHIP != 700) && (CHIP != 70066) - JUMP dsa_schedule -#else - WAIT DISCONNECT - INT int_norm_disconnected -#endif - -munge_extended: - CLEAR ACK - INT int_err_unexpected_phase, WHEN NOT MSG_IN - MOVE 1, msg_buf + 1, WHEN MSG_IN - JUMP munge_extended_2, IF 0x02 - JUMP munge_extended_3, IF 0x03 - JUMP reject_message - -munge_extended_2: - CLEAR ACK - MOVE 1, msg_buf + 2, WHEN MSG_IN - JUMP reject_message, IF NOT 0x02 ; Must be WDTR - CLEAR ACK - MOVE 1, msg_buf + 3, WHEN MSG_IN - INT int_msg_wdtr - -munge_extended_3: - CLEAR ACK - MOVE 1, msg_buf + 2, WHEN MSG_IN - JUMP reject_message, IF NOT 0x01 ; Must be SDTR - CLEAR ACK - MOVE 2, msg_buf + 3, WHEN MSG_IN - INT int_msg_sdtr - -ENTRY reject_message -reject_message: - SET ATN - CLEAR ACK - MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT - RETURN - -ENTRY accept_message -accept_message: - CLEAR ATN - CLEAR ACK - RETURN - -ENTRY respond_message -respond_message: - SET ATN - CLEAR ACK - MOVE FROM dsa_msgout_other, WHEN MSG_OUT - RETURN - -; -; command_complete -; -; PURPOSE : handle command termination when STATUS IN is detected by reading -; a status byte followed by a command termination message. -; -; Normal termination results in an INTFLY instruction, and -; the host system can pick out which command terminated by -; examining the MESSAGE and STATUS buffers of all currently -; executing commands; -; -; Abnormal (CHECK_CONDITION) termination results in an -; int_err_check_condition interrupt so that a REQUEST SENSE -; command can be issued out-of-order so that no other command -; clears the contingent allegiance condition. -; -; -; INPUTS : DSA - command -; -; CALLS : OK -; -; EXITS : On successful termination, control is passed to schedule. -; On abnormal termination, the user will usually modify the -; DSA fields and corresponding buffers and return control -; to select. -; - -ENTRY command_complete -command_complete: - MOVE FROM dsa_status, WHEN STATUS -#if (CHIP != 700) && (CHIP != 70066) - MOVE SFBR TO SCRATCH0 ; Save status -#endif /* (CHIP != 700) && (CHIP != 70066) */ -ENTRY command_complete_msgin -command_complete_msgin: - MOVE FROM dsa_msgin, WHEN MSG_IN -; Indicate that we should be expecting a disconnect -#if (CHIP != 710) - MOVE SCNTL2 & 0x7f TO SCNTL2 -#else - ; Above code cleared the Unexpected Disconnect bit, what do we do? -#endif - CLEAR ACK -#if (CHIP != 700) && (CHIP != 70066) - WAIT DISCONNECT - -; -; The SCSI specification states that when a UNIT ATTENTION condition -; is pending, as indicated by a CHECK CONDITION status message, -; the target shall revert to asynchronous transfers. Since -; synchronous transfers parameters are maintained on a per INITIATOR/TARGET -; basis, and returning control to our scheduler could work on a command -; running on another lun on that target using the old parameters, we must -; interrupt the host processor to get them changed, or change them ourselves. -; -; Once SCSI-II tagged queueing is implemented, things will be even more -; hairy, since contingent allegiance conditions exist on a per-target/lun -; basis, and issuing a new command with a different tag would clear it. -; In these cases, we must interrupt the host processor to get a request -; added to the HEAD of the queue with the request sense command, or we -; must automatically issue the request sense command. - -#if 0 - MOVE SCRATCH0 TO SFBR - JUMP command_failed, IF 0x02 -#endif -#if (CHIP == 710) -#if defined(MVME16x_INTFLY) -; For MVME16x (ie CHIP=710) we will force an INTFLY by triggering a software -; interrupt (SW7). We can use SCRATCH, as we are about to jump to -; schedule, which corrupts it anyway. Will probably remove this later, -; but want to check performance effects first. - -#define INTFLY_ADDR 0xfff40070 - - MOVE 0 TO SCRATCH0 - MOVE 0x80 TO SCRATCH1 - MOVE 0 TO SCRATCH2 - MOVE 0 TO SCRATCH3 - MOVE MEMORY 4, addr_scratch, INTFLY_ADDR -#else - INT int_norm_emulateintfly -#endif -#else - INTFLY -#endif -#endif /* (CHIP != 700) && (CHIP != 70066) */ -#if (CHIP == 710) - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif -#ifdef EVENTS - INT int_EVENT_COMPLETE -#endif -#if (CHIP != 700) && (CHIP != 70066) - JUMP schedule -command_failed: - INT int_err_check_condition -#else - INT int_norm_command_complete -#endif - -; -; wait_reselect -; -; PURPOSE : This is essentially the idle routine, where control lands -; when there are no new processes to schedule. wait_reselect -; waits for reselection, selection, and new commands. -; -; When a successful reselection occurs, with the aid -; of fixed up code in each DSA, wait_reselect walks the -; reconnect_dsa_queue, asking each dsa if the target ID -; and LUN match its. -; -; If a match is found, a call is made back to reselected_ok, -; which through the miracles of self modifying code, extracts -; the found DSA from the reconnect_dsa_queue and then -; returns control to the DSAs thread of execution. -; -; INPUTS : NONE -; -; CALLS : OK -; -; MODIFIES : DSA, -; -; EXITS : On successful reselection, control is returned to the -; DSA which called reselected_ok. If the WAIT RESELECT -; was interrupted by a new commands arrival signaled by -; SIG_P, control is passed to schedule. If the NCR is -; selected, the host system is interrupted with an -; int_err_selected which is usually responded to by -; setting DSP to the target_abort address. - -ENTRY wait_reselect -wait_reselect: -#ifdef EVENTS - int int_EVENT_IDLE -#endif -#ifdef DEBUG - int int_debug_idle -#endif - WAIT RESELECT wait_reselect_failed - -reselected: -#ifdef EVENTS - int int_EVENT_RESELECT -#endif - CLEAR TARGET - DMODE_MEMORY_TO_MEMORY - ; Read all data needed to reestablish the nexus - - MOVE 1, reselected_identify, WHEN MSG_IN - ; We used to CLEAR ACK here. -#if (CHIP != 700) && (CHIP != 70066) -#ifdef DEBUG - int int_debug_reselected -#endif - - ; Point DSA at the current head of the disconnected queue. - DMODE_MEMORY_TO_NCR - MOVE MEMORY 4, reconnect_dsa_head, addr_scratch - DMODE_MEMORY_TO_MEMORY -#if (CHIP == 710) - MOVE MEMORY 4, addr_scratch, saved_dsa -#else - CALL scratch_to_dsa -#endif - - ; Fix the update-next pointer so that the reconnect_dsa_head - ; pointer is the one that will be updated if this DSA is a hit - ; and we remove it from the queue. - - MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8 -#if (CHIP == 710) - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif - -ENTRY reselected_check_next -reselected_check_next: -#ifdef DEBUG - INT int_debug_reselect_check -#endif - ; Check for a NULL pointer. - MOVE DSA0 TO SFBR - JUMP reselected_not_end, IF NOT 0 - MOVE DSA1 TO SFBR - JUMP reselected_not_end, IF NOT 0 - MOVE DSA2 TO SFBR - JUMP reselected_not_end, IF NOT 0 - MOVE DSA3 TO SFBR - JUMP reselected_not_end, IF NOT 0 - INT int_err_unexpected_reselect - -reselected_not_end: - ; - ; XXX the ALU is only eight bits wide, and the assembler - ; wont do the dirt work for us. As long as dsa_check_reselect - ; is negative, we need to sign extend with 1 bits to the full - ; 32 bit width of the address. - ; - ; A potential work around would be to have a known alignment - ; of the DSA structure such that the base address plus - ; dsa_check_reselect doesn't require carrying from bytes - ; higher than the LSB. - ; - - MOVE DSA0 TO SFBR - MOVE SFBR + dsa_check_reselect TO SCRATCH0 - MOVE DSA1 TO SFBR - MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY - MOVE DSA2 TO SFBR - MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY - MOVE DSA3 TO SFBR - MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY - - DMODE_NCR_TO_MEMORY - MOVE MEMORY 4, addr_scratch, reselected_check + 4 - DMODE_MEMORY_TO_MEMORY -#if (CHIP == 710) - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa -#endif -reselected_check: - JUMP 0 - - -; -; -#if (CHIP == 710) -; We have problems here - the memory move corrupts TEMP and DSA. This -; routine is called from DSA code, and patched from many places. Scratch -; is probably free when it is called. -; We have to: -; copy temp to scratch, one byte at a time -; write scratch to patch a jump in place of the return -; do the move memory -; jump to the patched in return address -; DSA is corrupt when we get here, and can be left corrupt - -ENTRY reselected_ok -reselected_ok: - MOVE TEMP0 TO SFBR - MOVE SFBR TO SCRATCH0 - MOVE TEMP1 TO SFBR - MOVE SFBR TO SCRATCH1 - MOVE TEMP2 TO SFBR - MOVE SFBR TO SCRATCH2 - MOVE TEMP3 TO SFBR - MOVE SFBR TO SCRATCH3 - MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4 -reselected_ok_patch: - MOVE MEMORY 4, 0, 0 -reselected_ok_jump: - JUMP 0 -#else -ENTRY reselected_ok -reselected_ok: -reselected_ok_patch: - MOVE MEMORY 4, 0, 0 ; Patched : first word - ; is address of - ; successful dsa_next - ; Second word is last - ; unsuccessful dsa_next, - ; starting with - ; dsa_reconnect_head - ; We used to CLEAR ACK here. -#ifdef DEBUG - INT int_debug_reselected_ok -#endif -#ifdef DEBUG - INT int_debug_check_dsa -#endif - RETURN ; Return control to where -#endif -#else - INT int_norm_reselected -#endif /* (CHIP != 700) && (CHIP != 70066) */ - -selected: - INT int_err_selected; - -; -; A select or reselect failure can be caused by one of two conditions : -; 1. SIG_P was set. This will be the case if the user has written -; a new value to a previously NULL head of the issue queue. -; -; 2. The NCR53c810 was selected or reselected by another device. -; -; 3. The bus was already busy since we were selected or reselected -; before starting the command. - -wait_reselect_failed: -#ifdef EVENTS - INT int_EVENT_RESELECT_FAILED -#endif -; Check selected bit. -#if (CHIP == 710) - ; Must work out how to tell if we are selected.... -#else - MOVE SIST0 & 0x20 TO SFBR - JUMP selected, IF 0x20 -#endif -; Reading CTEST2 clears the SIG_P bit in the ISTAT register. - MOVE CTEST2 & 0x40 TO SFBR - JUMP schedule, IF 0x40 -; Check connected bit. -; FIXME: this needs to change if we support target mode - MOVE ISTAT & 0x08 TO SFBR - JUMP reselected, IF 0x08 -; FIXME : Something bogus happened, and we shouldn't fail silently. -#if 0 - JUMP schedule -#else - INT int_debug_panic -#endif - - -select_failed: -#if (CHIP == 710) - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 -#endif -#ifdef EVENTS - int int_EVENT_SELECT_FAILED -#endif -; Otherwise, mask the selected and reselected bits off SIST0 -#if (CHIP ==710) - ; Let's assume we don't get selected for now - MOVE SSTAT0 & 0x10 TO SFBR -#else - MOVE SIST0 & 0x30 TO SFBR - JUMP selected, IF 0x20 -#endif - JUMP reselected, IF 0x10 -; If SIGP is set, the user just gave us another command, and -; we should restart or return to the scheduler. -; Reading CTEST2 clears the SIG_P bit in the ISTAT register. - MOVE CTEST2 & 0x40 TO SFBR - JUMP select, IF 0x40 -; Check connected bit. -; FIXME: this needs to change if we support target mode -; FIXME: is this really necessary? - MOVE ISTAT & 0x08 TO SFBR - JUMP reselected, IF 0x08 -; FIXME : Something bogus happened, and we shouldn't fail silently. -#if 0 - JUMP schedule -#else - INT int_debug_panic -#endif - -; -; test_1 -; test_2 -; -; PURPOSE : run some verification tests on the NCR. test_1 -; copies test_src to test_dest and interrupts the host -; processor, testing for cache coherency and interrupt -; problems in the processes. -; -; test_2 runs a command with offsets relative to the -; DSA on entry, and is useful for miscellaneous experimentation. -; - -; Verify that interrupts are working correctly and that we don't -; have a cache invalidation problem. - -ABSOLUTE test_src = 0, test_dest = 0 -ENTRY test_1 -test_1: - MOVE MEMORY 4, test_src, test_dest - INT int_test_1 - -; -; Run arbitrary commands, with test code establishing a DSA -; - -ENTRY test_2 -test_2: - CLEAR TARGET -#if (CHIP == 710) - ; Enable selection timer -#ifdef NO_SELECTION_TIMEOUT - MOVE CTEST7 & 0xff TO CTEST7 -#else - MOVE CTEST7 & 0xef TO CTEST7 -#endif -#endif - SELECT ATN FROM 0, test_2_fail - JUMP test_2_msgout, WHEN MSG_OUT -ENTRY test_2_msgout -test_2_msgout: -#if (CHIP == 710) - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 -#endif - MOVE FROM 8, WHEN MSG_OUT - MOVE FROM 16, WHEN CMD - MOVE FROM 24, WHEN DATA_IN - MOVE FROM 32, WHEN STATUS - MOVE FROM 40, WHEN MSG_IN -#if (CHIP != 710) - MOVE SCNTL2 & 0x7f TO SCNTL2 -#endif - CLEAR ACK - WAIT DISCONNECT -test_2_fail: -#if (CHIP == 710) - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 -#endif - INT int_test_2 - -ENTRY debug_break -debug_break: - INT int_debug_break - -; -; initiator_abort -; target_abort -; -; PURPOSE : Abort the currently established nexus from with initiator -; or target mode. -; -; - -ENTRY target_abort -target_abort: - SET TARGET - DISCONNECT - CLEAR TARGET - JUMP schedule - -ENTRY initiator_abort -initiator_abort: - SET ATN -; -; The SCSI-I specification says that targets may go into MSG out at -; their leisure upon receipt of the ATN single. On all versions of the -; specification, we can't change phases until REQ transitions true->false, -; so we need to sink/source one byte of data to allow the transition. -; -; For the sake of safety, we'll only source one byte of data in all -; cases, but to accommodate the SCSI-I dain bramage, we'll sink an -; arbitrary number of bytes. - JUMP spew_cmd, WHEN CMD - JUMP eat_msgin, WHEN MSG_IN - JUMP eat_datain, WHEN DATA_IN - JUMP eat_status, WHEN STATUS - JUMP spew_dataout, WHEN DATA_OUT - JUMP sated -spew_cmd: - MOVE 1, NCR53c7xx_zero, WHEN CMD - JUMP sated -eat_msgin: - MOVE 1, NCR53c7xx_sink, WHEN MSG_IN - JUMP eat_msgin, WHEN MSG_IN - JUMP sated -eat_status: - MOVE 1, NCR53c7xx_sink, WHEN STATUS - JUMP eat_status, WHEN STATUS - JUMP sated -eat_datain: - MOVE 1, NCR53c7xx_sink, WHEN DATA_IN - JUMP eat_datain, WHEN DATA_IN - JUMP sated -spew_dataout: - MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT -sated: -#if (CHIP != 710) - MOVE SCNTL2 & 0x7f TO SCNTL2 -#endif - MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT - WAIT DISCONNECT - INT int_norm_aborted - -#if (CHIP != 710) -; -; dsa_to_scratch -; scratch_to_dsa -; -; PURPOSE : -; The NCR chips cannot do a move memory instruction with the DSA register -; as the source or destination. So, we provide a couple of subroutines -; that let us switch between the DSA register and scratch register. -; -; Memory moves to/from the DSPS register also don't work, but we -; don't use them. -; -; - - -dsa_to_scratch: - MOVE DSA0 TO SFBR - MOVE SFBR TO SCRATCH0 - MOVE DSA1 TO SFBR - MOVE SFBR TO SCRATCH1 - MOVE DSA2 TO SFBR - MOVE SFBR TO SCRATCH2 - MOVE DSA3 TO SFBR - MOVE SFBR TO SCRATCH3 - RETURN - -scratch_to_dsa: - MOVE SCRATCH0 TO SFBR - MOVE SFBR TO DSA0 - MOVE SCRATCH1 TO SFBR - MOVE SFBR TO DSA1 - MOVE SCRATCH2 TO SFBR - MOVE SFBR TO DSA2 - MOVE SCRATCH3 TO SFBR - MOVE SFBR TO DSA3 - RETURN -#endif - -#if (CHIP == 710) -; Little patched jump, used to overcome problems with TEMP getting -; corrupted on memory moves. - -jump_temp: - JUMP 0 -#endif diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped linux-2.6.22-591/drivers/scsi/53c7xx_d.h_shipped --- linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c7xx_d.h_shipped 1969-12-31 19:00:00.000000000 -0500 @@ -1,2874 +0,0 @@ -/* DO NOT EDIT - Generated automatically by script_asm.pl */ -static u32 SCRIPT[] = { -/* - - - - - -; 53c710 driver. Modified from Drew Eckhardts driver -; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] -; -; I have left the script for the 53c8xx family in here, as it is likely -; to be useful to see what I changed when bug hunting. - -; NCR 53c810 driver, main script -; Sponsored by -; iX Multiuser Multitasking Magazine -; hm@ix.de -; -; Copyright 1993, 1994, 1995 Drew Eckhardt -; Visionary Computing -; (Unix and Linux consulting and custom programming) -; drew@PoohSticks.ORG -; +1 (303) 786-7975 -; -; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. -; -; PRE-ALPHA -; -; For more information, please consult -; -; NCR 53C810 -; PCI-SCSI I/O Processor -; Data Manual -; -; NCR 53C710 -; SCSI I/O Processor -; Programmers Guide -; -; NCR Microelectronics -; 1635 Aeroplaza Drive -; Colorado Springs, CO 80916 -; 1+ (719) 578-3400 -; -; Toll free literature number -; +1 (800) 334-5454 -; -; IMPORTANT : This code is self modifying due to the limitations of -; the NCR53c7,8xx series chips. Persons debugging this code with -; the remote debugger should take this into account, and NOT set -; breakpoints in modified instructions. -; -; Design: -; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard -; microcontroller using a simple instruction set. -; -; So, to minimize the effects of interrupt latency, and to maximize -; throughput, this driver offloads the practical maximum amount -; of processing to the SCSI chip while still maintaining a common -; structure. -; -; Where tradeoffs were needed between efficiency on the older -; chips and the newer NCR53c800 series, the NCR53c800 series -; was chosen. -; -; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully -; automate SCSI transfers without host processor intervention, this -; isn't the case with the NCR53c710 and newer chips which allow -; -; - reads and writes to the internal registers from within the SCSI -; scripts, allowing the SCSI SCRIPTS(tm) code to save processor -; state so that multiple threads of execution are possible, and also -; provide an ALU for loop control, etc. -; -; - table indirect addressing for some instructions. This allows -; pointers to be located relative to the DSA ((Data Structure -; Address) register. -; -; These features make it possible to implement a mailbox style interface, -; where the same piece of code is run to handle I/O for multiple threads -; at once minimizing our need to relocate code. Since the NCR53c700/ -; NCR53c800 series have a unique combination of features, making a -; a standard ingoing/outgoing mailbox system, costly, I've modified it. -; -; - Mailboxes are a mixture of code and data. This lets us greatly -; simplify the NCR53c810 code and do things that would otherwise -; not be possible. -; -; The saved data pointer is now implemented as follows : -; -; Control flow has been architected such that if control reaches -; munge_save_data_pointer, on a restore pointers message or -; reconnection, a jump to the address formerly in the TEMP register -; will allow the SCSI command to resume execution. -; - -; -; Note : the DSA structures must be aligned on 32 bit boundaries, -; since the source and destination of MOVE MEMORY instructions -; must share the same alignment and this is the alignment of the -; NCR registers. -; - -; For some systems (MVME166, for example) dmode is always the same, so don't -; waste time writing it - - - - - - - - - - - -ABSOLUTE dsa_temp_lun = 0 ; Patch to lun for current dsa -ABSOLUTE dsa_temp_next = 0 ; Patch to dsa next for current dsa -ABSOLUTE dsa_temp_addr_next = 0 ; Patch to address of dsa next address - ; for current dsa -ABSOLUTE dsa_temp_sync = 0 ; Patch to address of per-target - ; sync routine -ABSOLUTE dsa_sscf_710 = 0 ; Patch to address of per-target - ; sscf value (53c710) -ABSOLUTE dsa_temp_target = 0 ; Patch to id for current dsa -ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command - ; saved data pointer -ABSOLUTE dsa_temp_addr_residual = 0 ; Patch to address of per-command - ; current residual code -ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command - ; saved residual code -ABSOLUTE dsa_temp_addr_new_value = 0 ; Address of value for JUMP operand -ABSOLUTE dsa_temp_addr_array_value = 0 ; Address to copy to -ABSOLUTE dsa_temp_addr_dsa_value = 0 ; Address of this DSA value - -; -; Once a device has initiated reselection, we need to compare it -; against the singly linked list of commands which have disconnected -; and are pending reselection. These commands are maintained in -; an unordered singly linked list of DSA structures, through the -; DSA pointers at their 'centers' headed by the reconnect_dsa_head -; pointer. -; -; To avoid complications in removing commands from the list, -; I minimize the amount of expensive (at eight operations per -; addition @ 500-600ns each) pointer operations which must -; be done in the NCR driver by precomputing them on the -; host processor during dsa structure generation. -; -; The fixed-up per DSA code knows how to recognize the nexus -; associated with the corresponding SCSI command, and modifies -; the source and destination pointers for the MOVE MEMORY -; instruction which is executed when reselected_ok is called -; to remove the command from the list. Similarly, DSA is -; loaded with the address of the next DSA structure and -; reselected_check_next is called if a failure occurs. -; -; Perhaps more concisely, the net effect of the mess is -; -; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head, -; src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) { -; src = &dsa->next; -; if (target_id == dsa->id && target_lun == dsa->lun) { -; *dest = *src; -; break; -; } -; } -; -; if (!dsa) -; error (int_err_unexpected_reselect); -; else -; longjmp (dsa->jump_resume, 0); -; -; - - -; Define DSA structure used for mailboxes -ENTRY dsa_code_template -dsa_code_template: -ENTRY dsa_code_begin -dsa_code_begin: -; RGH: Don't care about TEMP and DSA here - - MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch - -at 0x00000000 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - MOVE MEMORY 4, addr_scratch, saved_dsa - -at 0x00000003 : */ 0xc0000004,0x00000000,0x00000000, -/* - ; We are about to go and select the device, so must set SSCF bits - MOVE MEMORY 4, dsa_sscf_710, addr_scratch - -at 0x00000006 : */ 0xc0000004,0x00000000,0x00000000, -/* - - MOVE SCRATCH3 TO SFBR - -at 0x00000009 : */ 0x72370000,0x00000000, -/* - - - - MOVE SFBR TO SBCL - -at 0x0000000b : */ 0x6a0b0000,0x00000000, -/* - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x0000000d : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - CALL select - -at 0x00000010 : */ 0x88080000,0x000001f8, -/* -; Handle the phase mismatch which may have resulted from the -; MOVE FROM dsa_msgout if we returned here. The CLEAR ATN -; may or may not be necessary, and we should update script_asm.pl -; to handle multiple pieces. - CLEAR ATN - -at 0x00000012 : */ 0x60000008,0x00000000, -/* - CLEAR ACK - -at 0x00000014 : */ 0x60000040,0x00000000, -/* - -; Replace second operand with address of JUMP instruction dest operand -; in schedule table for this DSA. Becomes dsa_jump_dest in 53c7,8xx.c. -ENTRY dsa_code_fix_jump -dsa_code_fix_jump: - MOVE MEMORY 4, NOP_insn, 0 - -at 0x00000016 : */ 0xc0000004,0x00000000,0x00000000, -/* - JUMP select_done - -at 0x00000019 : */ 0x80080000,0x00000230, -/* - -; wrong_dsa loads the DSA register with the value of the dsa_next -; field. -; -wrong_dsa: - -; NOTE DSA is corrupt when we arrive here! - -; Patch the MOVE MEMORY INSTRUCTION such that -; the destination address is the address of the OLD -; next pointer. -; - MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8 - -at 0x0000001b : */ 0xc0000004,0x00000000,0x000007ec, -/* - -; -; Move the _contents_ of the next pointer into the DSA register as -; the next I_T_L or I_T_L_Q tupple to check against the established -; nexus. -; - MOVE MEMORY 4, dsa_temp_next, addr_scratch - -at 0x0000001e : */ 0xc0000004,0x00000000,0x00000000, -/* - - - MOVE MEMORY 4, addr_scratch, saved_dsa - -at 0x00000021 : */ 0xc0000004,0x00000000,0x00000000, -/* - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x00000024 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - JUMP reselected_check_next - -at 0x00000027 : */ 0x80080000,0x000006f0, -/* - -ABSOLUTE dsa_save_data_pointer = 0 -ENTRY dsa_code_save_data_pointer -dsa_code_save_data_pointer: - - ; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt - ; We MUST return with DSA correct - MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer - -at 0x00000029 : */ 0xc0000004,0x000009c8,0x00000000, -/* -; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h - MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual - -at 0x0000002c : */ 0xc0000018,0x00000000,0x00000000, -/* - CLEAR ACK - -at 0x0000002f : */ 0x60000040,0x00000000, -/* - - - - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x00000031 : */ 0xc0000004,0x00000000,0x00000000, -/* - JUMP jump_temp - -at 0x00000034 : */ 0x80080000,0x000009c4, -/* - -ABSOLUTE dsa_restore_pointers = 0 -ENTRY dsa_code_restore_pointers -dsa_code_restore_pointers: - - ; TEMP and DSA are corrupt when we get here, but who cares! - MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4 - -at 0x00000036 : */ 0xc0000004,0x00000000,0x000009c8, -/* -; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h - MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual - -at 0x00000039 : */ 0xc0000018,0x00000000,0x00000000, -/* - CLEAR ACK - -at 0x0000003c : */ 0x60000040,0x00000000, -/* - ; Restore DSA, note we don't care about TEMP - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x0000003e : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - JUMP jump_temp - -at 0x00000041 : */ 0x80080000,0x000009c4, -/* - - -ABSOLUTE dsa_check_reselect = 0 -; dsa_check_reselect determines whether or not the current target and -; lun match the current DSA -ENTRY dsa_code_check_reselect -dsa_code_check_reselect: - - - - MOVE LCRC TO SFBR ; LCRC has our ID and his ID bits set - -at 0x00000043 : */ 0x72230000,0x00000000, -/* - JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80 - -at 0x00000045 : */ 0x80848000,0x00ffff50, -/* - - - - - -; -; Hack - move to scratch first, since SFBR is not writeable -; via the CPU and hence a MOVE MEMORY instruction. -; - - MOVE MEMORY 1, reselected_identify, addr_scratch - -at 0x00000047 : */ 0xc0000001,0x00000000,0x00000000, -/* - - - ; BIG ENDIAN ON MVME16x - MOVE SCRATCH3 TO SFBR - -at 0x0000004a : */ 0x72370000,0x00000000, -/* - - - -; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips -; Are you sure about that? richard@sleepie.demon.co.uk - JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8 - -at 0x0000004c : */ 0x8084f800,0x00ffff34, -/* -; Patch the MOVE MEMORY INSTRUCTION such that -; the source address is the address of this dsa's -; next pointer. - MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4 - -at 0x0000004e : */ 0xc0000004,0x00000000,0x000007e8, -/* - CALL reselected_ok - -at 0x00000051 : */ 0x88080000,0x00000798, -/* - -; Restore DSA following memory moves in reselected_ok -; dsa_temp_sync doesn't really care about DSA, but it has an -; optional debug INT so a valid DSA is a good idea. - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x00000053 : */ 0xc0000004,0x00000000,0x00000000, -/* - - CALL dsa_temp_sync - -at 0x00000056 : */ 0x88080000,0x00000000, -/* -; Release ACK on the IDENTIFY message _after_ we've set the synchronous -; transfer parameters! - CLEAR ACK - -at 0x00000058 : */ 0x60000040,0x00000000, -/* -; Implicitly restore pointers on reselection, so a RETURN -; will transfer control back to the right spot. - CALL REL (dsa_code_restore_pointers) - -at 0x0000005a : */ 0x88880000,0x00ffff68, -/* - RETURN - -at 0x0000005c : */ 0x90080000,0x00000000, -/* -ENTRY dsa_zero -dsa_zero: -ENTRY dsa_code_template_end -dsa_code_template_end: - -; Perform sanity check for dsa_fields_start == dsa_code_template_end - -; dsa_zero, puke. - -ABSOLUTE dsa_fields_start = 0 ; Sanity marker - ; pad 48 bytes (fix this RSN) -ABSOLUTE dsa_next = 48 ; len 4 Next DSA - ; del 4 Previous DSA address -ABSOLUTE dsa_cmnd = 56 ; len 4 Scsi_Cmnd * for this thread. -ABSOLUTE dsa_select = 60 ; len 4 Device ID, Period, Offset for - ; table indirect select -ABSOLUTE dsa_msgout = 64 ; len 8 table indirect move parameter for - ; select message -ABSOLUTE dsa_cmdout = 72 ; len 8 table indirect move parameter for - ; command -ABSOLUTE dsa_dataout = 80 ; len 4 code pointer for dataout -ABSOLUTE dsa_datain = 84 ; len 4 code pointer for datain -ABSOLUTE dsa_msgin = 88 ; len 8 table indirect move for msgin -ABSOLUTE dsa_status = 96 ; len 8 table indirect move for status byte -ABSOLUTE dsa_msgout_other = 104 ; len 8 table indirect for normal message out - ; (Synchronous transfer negotiation, etc). -ABSOLUTE dsa_end = 112 - -ABSOLUTE schedule = 0 ; Array of JUMP dsa_begin or JUMP (next), - ; terminated by a call to JUMP wait_reselect - -; Linked lists of DSA structures -ABSOLUTE reconnect_dsa_head = 0 ; Link list of DSAs which can reconnect -ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing - ; address of reconnect_dsa_head - -; These select the source and destination of a MOVE MEMORY instruction -ABSOLUTE dmode_memory_to_memory = 0x0 -ABSOLUTE dmode_memory_to_ncr = 0x0 -ABSOLUTE dmode_ncr_to_memory = 0x0 - -ABSOLUTE addr_scratch = 0x0 -ABSOLUTE addr_temp = 0x0 - -ABSOLUTE saved_dsa = 0x0 -ABSOLUTE emulfly = 0x0 -ABSOLUTE addr_dsa = 0x0 - - - -; Interrupts - -; MSB indicates type -; 0 handle error condition -; 1 handle message -; 2 handle normal condition -; 3 debugging interrupt -; 4 testing interrupt -; Next byte indicates specific error - -; XXX not yet implemented, I'm not sure if I want to - -; Next byte indicates the routine the error occurred in -; The LSB indicates the specific place the error occurred - -ABSOLUTE int_err_unexpected_phase = 0x00000000 ; Unexpected phase encountered -ABSOLUTE int_err_selected = 0x00010000 ; SELECTED (nee RESELECTED) -ABSOLUTE int_err_unexpected_reselect = 0x00020000 -ABSOLUTE int_err_check_condition = 0x00030000 -ABSOLUTE int_err_no_phase = 0x00040000 -ABSOLUTE int_msg_wdtr = 0x01000000 ; WDTR message received -ABSOLUTE int_msg_sdtr = 0x01010000 ; SDTR received -ABSOLUTE int_msg_1 = 0x01020000 ; single byte special message - ; received - -ABSOLUTE int_norm_select_complete = 0x02000000 ; Select complete, reprogram - ; registers. -ABSOLUTE int_norm_reselect_complete = 0x02010000 ; Nexus established -ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete -ABSOLUTE int_norm_disconnected = 0x02030000 ; Disconnected -ABSOLUTE int_norm_aborted =0x02040000 ; Aborted *dsa -ABSOLUTE int_norm_reset = 0x02050000 ; Generated BUS reset. -ABSOLUTE int_norm_emulateintfly = 0x02060000 ; 53C710 Emulated intfly -ABSOLUTE int_debug_break = 0x03000000 ; Break point - -ABSOLUTE int_debug_panic = 0x030b0000 ; Panic driver - - -ABSOLUTE int_test_1 = 0x04000000 ; Test 1 complete -ABSOLUTE int_test_2 = 0x04010000 ; Test 2 complete -ABSOLUTE int_test_3 = 0x04020000 ; Test 3 complete - - -; These should start with 0x05000000, with low bits incrementing for -; each one. - - - -ABSOLUTE NCR53c7xx_msg_abort = 0 ; Pointer to abort message -ABSOLUTE NCR53c7xx_msg_reject = 0 ; Pointer to reject message -ABSOLUTE NCR53c7xx_zero = 0 ; long with zero in it, use for source -ABSOLUTE NCR53c7xx_sink = 0 ; long to dump worthless data in -ABSOLUTE NOP_insn = 0 ; NOP instruction - -; Pointer to message, potentially multi-byte -ABSOLUTE msg_buf = 0 - -; Pointer to holding area for reselection information -ABSOLUTE reselected_identify = 0 -ABSOLUTE reselected_tag = 0 - -; Request sense command pointer, it's a 6 byte command, should -; be constant for all commands since we always want 16 bytes of -; sense and we don't need to change any fields as we did under -; SCSI-I when we actually cared about the LUN field. -;EXTERNAL NCR53c7xx_sense ; Request sense command - - -; dsa_schedule -; PURPOSE : after a DISCONNECT message has been received, and pointers -; saved, insert the current DSA structure at the head of the -; disconnected queue and fall through to the scheduler. -; -; CALLS : OK -; -; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list -; of disconnected commands -; -; MODIFIES : SCRATCH, reconnect_dsa_head -; -; EXITS : always passes control to schedule - -ENTRY dsa_schedule -dsa_schedule: - - - - -; -; Calculate the address of the next pointer within the DSA -; structure of the command that is currently disconnecting -; - - ; Read what should be the current DSA from memory - actual DSA - ; register is probably corrupt - MOVE MEMORY 4, saved_dsa, addr_scratch - -at 0x0000005e : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - MOVE SCRATCH0 + dsa_next TO SCRATCH0 - -at 0x00000061 : */ 0x7e343000,0x00000000, -/* - MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY - -at 0x00000063 : */ 0x7f350000,0x00000000, -/* - MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY - -at 0x00000065 : */ 0x7f360000,0x00000000, -/* - MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY - -at 0x00000067 : */ 0x7f370000,0x00000000, -/* - -; Point the next field of this DSA structure at the current disconnected -; list - - MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8 - -at 0x00000069 : */ 0xc0000004,0x00000000,0x000001b8, -/* - -dsa_schedule_insert: - MOVE MEMORY 4, reconnect_dsa_head, 0 - -at 0x0000006c : */ 0xc0000004,0x00000000,0x00000000, -/* - -; And update the head pointer. - - ; Read what should be the current DSA from memory - actual DSA - ; register is probably corrupt - MOVE MEMORY 4, saved_dsa, addr_scratch - -at 0x0000006f : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - - MOVE MEMORY 4, addr_scratch, reconnect_dsa_head - -at 0x00000072 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - - - - CLEAR ACK - -at 0x00000075 : */ 0x60000040,0x00000000, -/* - - - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x00000077 : */ 0xc0000004,0x00000000,0x00000000, -/* - - WAIT DISCONNECT - -at 0x0000007a : */ 0x48000000,0x00000000, -/* - - - - - - - JUMP schedule - -at 0x0000007c : */ 0x80080000,0x00000000, -/* - - -; -; select -; -; PURPOSE : establish a nexus for the SCSI command referenced by DSA. -; On success, the current DSA structure is removed from the issue -; queue. Usually, this is entered as a fall-through from schedule, -; although the contingent allegiance handling code will write -; the select entry address to the DSP to restart a command as a -; REQUEST SENSE. A message is sent (usually IDENTIFY, although -; additional SDTR or WDTR messages may be sent). COMMAND OUT -; is handled. -; -; INPUTS : DSA - SCSI command, issue_dsa_head -; -; CALLS : NOT OK -; -; MODIFIES : SCRATCH, issue_dsa_head -; -; EXITS : on reselection or selection, go to select_failed -; otherwise, RETURN so control is passed back to -; dsa_begin. -; - -ENTRY select -select: - - - - - - - - - CLEAR TARGET - -at 0x0000007e : */ 0x60000200,0x00000000, -/* - -; XXX -; -; In effect, SELECTION operations are backgrounded, with execution -; continuing until code which waits for REQ or a fatal interrupt is -; encountered. -; -; So, for more performance, we could overlap the code which removes -; the command from the NCRs issue queue with the selection, but -; at this point I don't want to deal with the error recovery. -; - - - - ; Enable selection timer - - - - MOVE CTEST7 & 0xef TO CTEST7 - -at 0x00000080 : */ 0x7c1bef00,0x00000000, -/* - - - SELECT ATN FROM dsa_select, select_failed - -at 0x00000082 : */ 0x4300003c,0x00000828, -/* - JUMP select_msgout, WHEN MSG_OUT - -at 0x00000084 : */ 0x860b0000,0x00000218, -/* -ENTRY select_msgout -select_msgout: - - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 - -at 0x00000086 : */ 0x7a1b1000,0x00000000, -/* - - MOVE FROM dsa_msgout, WHEN MSG_OUT - -at 0x00000088 : */ 0x1e000000,0x00000040, -/* - - - - - - - - - - - RETURN - -at 0x0000008a : */ 0x90080000,0x00000000, -/* - -; -; select_done -; -; PURPOSE: continue on to normal data transfer; called as the exit -; point from dsa_begin. -; -; INPUTS: dsa -; -; CALLS: OK -; -; - -select_done: - -; NOTE DSA is corrupt when we arrive here! - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x0000008c : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - - - - - -; After a successful selection, we should get either a CMD phase or -; some transfer request negotiation message. - - JUMP cmdout, WHEN CMD - -at 0x0000008f : */ 0x820b0000,0x0000025c, -/* - INT int_err_unexpected_phase, WHEN NOT MSG_IN - -at 0x00000091 : */ 0x9f030000,0x00000000, -/* - -select_msg_in: - CALL msg_in, WHEN MSG_IN - -at 0x00000093 : */ 0x8f0b0000,0x0000041c, -/* - JUMP select_msg_in, WHEN MSG_IN - -at 0x00000095 : */ 0x870b0000,0x0000024c, -/* - -cmdout: - INT int_err_unexpected_phase, WHEN NOT CMD - -at 0x00000097 : */ 0x9a030000,0x00000000, -/* - - - -ENTRY cmdout_cmdout -cmdout_cmdout: - - MOVE FROM dsa_cmdout, WHEN CMD - -at 0x00000099 : */ 0x1a000000,0x00000048, -/* - - - - -; -; data_transfer -; other_out -; other_in -; other_transfer -; -; PURPOSE : handle the main data transfer for a SCSI command in -; several parts. In the first part, data_transfer, DATA_IN -; and DATA_OUT phases are allowed, with the user provided -; code (usually dynamically generated based on the scatter/gather -; list associated with a SCSI command) called to handle these -; phases. -; -; After control has passed to one of the user provided -; DATA_IN or DATA_OUT routines, back calls are made to -; other_transfer_in or other_transfer_out to handle non-DATA IN -; and DATA OUT phases respectively, with the state of the active -; data pointer being preserved in TEMP. -; -; On completion, the user code passes control to other_transfer -; which causes DATA_IN and DATA_OUT to result in unexpected_phase -; interrupts so that data overruns may be trapped. -; -; INPUTS : DSA - SCSI command -; -; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in -; other_transfer -; -; MODIFIES : SCRATCH -; -; EXITS : if STATUS IN is detected, signifying command completion, -; the NCR jumps to command_complete. If MSG IN occurs, a -; CALL is made to msg_in. Otherwise, other_transfer runs in -; an infinite loop. -; - -ENTRY data_transfer -data_transfer: - JUMP cmdout_cmdout, WHEN CMD - -at 0x0000009b : */ 0x820b0000,0x00000264, -/* - CALL msg_in, WHEN MSG_IN - -at 0x0000009d : */ 0x8f0b0000,0x0000041c, -/* - INT int_err_unexpected_phase, WHEN MSG_OUT - -at 0x0000009f : */ 0x9e0b0000,0x00000000, -/* - JUMP do_dataout, WHEN DATA_OUT - -at 0x000000a1 : */ 0x800b0000,0x000002a4, -/* - JUMP do_datain, WHEN DATA_IN - -at 0x000000a3 : */ 0x810b0000,0x000002fc, -/* - JUMP command_complete, WHEN STATUS - -at 0x000000a5 : */ 0x830b0000,0x0000065c, -/* - JUMP data_transfer - -at 0x000000a7 : */ 0x80080000,0x0000026c, -/* -ENTRY end_data_transfer -end_data_transfer: - -; -; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain -; should be fixed up whenever the nexus changes so it can point to the -; correct routine for that command. -; - - -; Nasty jump to dsa->dataout -do_dataout: - - MOVE MEMORY 4, saved_dsa, addr_scratch - -at 0x000000a9 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - MOVE SCRATCH0 + dsa_dataout TO SCRATCH0 - -at 0x000000ac : */ 0x7e345000,0x00000000, -/* - MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY - -at 0x000000ae : */ 0x7f350000,0x00000000, -/* - MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY - -at 0x000000b0 : */ 0x7f360000,0x00000000, -/* - MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY - -at 0x000000b2 : */ 0x7f370000,0x00000000, -/* - - MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4 - -at 0x000000b4 : */ 0xc0000004,0x00000000,0x000002e0, -/* - -dataout_to_jump: - MOVE MEMORY 4, 0, dataout_jump + 4 - -at 0x000000b7 : */ 0xc0000004,0x00000000,0x000002f8, -/* - - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x000000ba : */ 0xc0000004,0x00000000,0x00000000, -/* - -dataout_jump: - JUMP 0 - -at 0x000000bd : */ 0x80080000,0x00000000, -/* - -; Nasty jump to dsa->dsain -do_datain: - - MOVE MEMORY 4, saved_dsa, addr_scratch - -at 0x000000bf : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - MOVE SCRATCH0 + dsa_datain TO SCRATCH0 - -at 0x000000c2 : */ 0x7e345400,0x00000000, -/* - MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY - -at 0x000000c4 : */ 0x7f350000,0x00000000, -/* - MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY - -at 0x000000c6 : */ 0x7f360000,0x00000000, -/* - MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY - -at 0x000000c8 : */ 0x7f370000,0x00000000, -/* - - MOVE MEMORY 4, addr_scratch, datain_to_jump + 4 - -at 0x000000ca : */ 0xc0000004,0x00000000,0x00000338, -/* - -ENTRY datain_to_jump -datain_to_jump: - MOVE MEMORY 4, 0, datain_jump + 4 - -at 0x000000cd : */ 0xc0000004,0x00000000,0x00000350, -/* - - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x000000d0 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - -datain_jump: - JUMP 0 - -at 0x000000d3 : */ 0x80080000,0x00000000, -/* - - - -; Note that other_out and other_in loop until a non-data phase -; is discovered, so we only execute return statements when we -; can go on to the next data phase block move statement. - -ENTRY other_out -other_out: - - - - INT int_err_unexpected_phase, WHEN CMD - -at 0x000000d5 : */ 0x9a0b0000,0x00000000, -/* - JUMP msg_in_restart, WHEN MSG_IN - -at 0x000000d7 : */ 0x870b0000,0x000003fc, -/* - INT int_err_unexpected_phase, WHEN MSG_OUT - -at 0x000000d9 : */ 0x9e0b0000,0x00000000, -/* - INT int_err_unexpected_phase, WHEN DATA_IN - -at 0x000000db : */ 0x990b0000,0x00000000, -/* - JUMP command_complete, WHEN STATUS - -at 0x000000dd : */ 0x830b0000,0x0000065c, -/* - JUMP other_out, WHEN NOT DATA_OUT - -at 0x000000df : */ 0x80030000,0x00000354, -/* - -; TEMP should be OK, as we got here from a call in the user dataout code. - - RETURN - -at 0x000000e1 : */ 0x90080000,0x00000000, -/* - -ENTRY other_in -other_in: - - - - INT int_err_unexpected_phase, WHEN CMD - -at 0x000000e3 : */ 0x9a0b0000,0x00000000, -/* - JUMP msg_in_restart, WHEN MSG_IN - -at 0x000000e5 : */ 0x870b0000,0x000003fc, -/* - INT int_err_unexpected_phase, WHEN MSG_OUT - -at 0x000000e7 : */ 0x9e0b0000,0x00000000, -/* - INT int_err_unexpected_phase, WHEN DATA_OUT - -at 0x000000e9 : */ 0x980b0000,0x00000000, -/* - JUMP command_complete, WHEN STATUS - -at 0x000000eb : */ 0x830b0000,0x0000065c, -/* - JUMP other_in, WHEN NOT DATA_IN - -at 0x000000ed : */ 0x81030000,0x0000038c, -/* - -; TEMP should be OK, as we got here from a call in the user datain code. - - RETURN - -at 0x000000ef : */ 0x90080000,0x00000000, -/* - - -ENTRY other_transfer -other_transfer: - INT int_err_unexpected_phase, WHEN CMD - -at 0x000000f1 : */ 0x9a0b0000,0x00000000, -/* - CALL msg_in, WHEN MSG_IN - -at 0x000000f3 : */ 0x8f0b0000,0x0000041c, -/* - INT int_err_unexpected_phase, WHEN MSG_OUT - -at 0x000000f5 : */ 0x9e0b0000,0x00000000, -/* - INT int_err_unexpected_phase, WHEN DATA_OUT - -at 0x000000f7 : */ 0x980b0000,0x00000000, -/* - INT int_err_unexpected_phase, WHEN DATA_IN - -at 0x000000f9 : */ 0x990b0000,0x00000000, -/* - JUMP command_complete, WHEN STATUS - -at 0x000000fb : */ 0x830b0000,0x0000065c, -/* - JUMP other_transfer - -at 0x000000fd : */ 0x80080000,0x000003c4, -/* - -; -; msg_in_restart -; msg_in -; munge_msg -; -; PURPOSE : process messages from a target. msg_in is called when the -; caller hasn't read the first byte of the message. munge_message -; is called when the caller has read the first byte of the message, -; and left it in SFBR. msg_in_restart is called when the caller -; hasn't read the first byte of the message, and wishes RETURN -; to transfer control back to the address of the conditional -; CALL instruction rather than to the instruction after it. -; -; Various int_* interrupts are generated when the host system -; needs to intervene, as is the case with SDTR, WDTR, and -; INITIATE RECOVERY messages. -; -; When the host system handles one of these interrupts, -; it can respond by reentering at reject_message, -; which rejects the message and returns control to -; the caller of msg_in or munge_msg, accept_message -; which clears ACK and returns control, or reply_message -; which sends the message pointed to by the DSA -; msgout_other table indirect field. -; -; DISCONNECT messages are handled by moving the command -; to the reconnect_dsa_queue. - -; NOTE: DSA should be valid when we get here - we cannot save both it -; and TEMP in this routine. - -; -; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg -; only) -; -; CALLS : NO. The TEMP register isn't backed up to allow nested calls. -; -; MODIFIES : SCRATCH, DSA on DISCONNECT -; -; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS, -; and normal return from message handlers running under -; Linux, control is returned to the caller. Receipt -; of DISCONNECT messages pass control to dsa_schedule. -; -ENTRY msg_in_restart -msg_in_restart: -; XXX - hackish -; -; Since it's easier to debug changes to the statically -; compiled code, rather than the dynamically generated -; stuff, such as -; -; MOVE x, y, WHEN data_phase -; CALL other_z, WHEN NOT data_phase -; MOVE x, y, WHEN data_phase -; -; I'd like to have certain routines (notably the message handler) -; restart on the conditional call rather than the next instruction. -; -; So, subtract 8 from the return address - - MOVE TEMP0 + 0xf8 TO TEMP0 - -at 0x000000ff : */ 0x7e1cf800,0x00000000, -/* - MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY - -at 0x00000101 : */ 0x7f1dff00,0x00000000, -/* - MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY - -at 0x00000103 : */ 0x7f1eff00,0x00000000, -/* - MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY - -at 0x00000105 : */ 0x7f1fff00,0x00000000, -/* - -ENTRY msg_in -msg_in: - MOVE 1, msg_buf, WHEN MSG_IN - -at 0x00000107 : */ 0x0f000001,0x00000000, -/* - -munge_msg: - JUMP munge_extended, IF 0x01 ; EXTENDED MESSAGE - -at 0x00000109 : */ 0x800c0001,0x00000574, -/* - JUMP munge_2, IF 0x20, AND MASK 0xdf ; two byte message - -at 0x0000010b : */ 0x800cdf20,0x00000464, -/* -; -; XXX - I've seen a handful of broken SCSI devices which fail to issue -; a SAVE POINTERS message before disconnecting in the middle of -; a transfer, assuming that the DATA POINTER will be implicitly -; restored. -; -; Historically, I've often done an implicit save when the DISCONNECT -; message is processed. We may want to consider having the option of -; doing that here. -; - JUMP munge_save_data_pointer, IF 0x02 ; SAVE DATA POINTER - -at 0x0000010d : */ 0x800c0002,0x0000046c, -/* - JUMP munge_restore_pointers, IF 0x03 ; RESTORE POINTERS - -at 0x0000010f : */ 0x800c0003,0x00000518, -/* - JUMP munge_disconnect, IF 0x04 ; DISCONNECT - -at 0x00000111 : */ 0x800c0004,0x0000056c, -/* - INT int_msg_1, IF 0x07 ; MESSAGE REJECT - -at 0x00000113 : */ 0x980c0007,0x01020000, -/* - INT int_msg_1, IF 0x0f ; INITIATE RECOVERY - -at 0x00000115 : */ 0x980c000f,0x01020000, -/* - - - - JUMP reject_message - -at 0x00000117 : */ 0x80080000,0x00000604, -/* - -munge_2: - JUMP reject_message - -at 0x00000119 : */ 0x80080000,0x00000604, -/* -; -; The SCSI standard allows targets to recover from transient -; error conditions by backing up the data pointer with a -; RESTORE POINTERS message. -; -; So, we must save and restore the _residual_ code as well as -; the current instruction pointer. Because of this messiness, -; it is simpler to put dynamic code in the dsa for this and to -; just do a simple jump down there. -; - -munge_save_data_pointer: - - ; We have something in TEMP here, so first we must save that - MOVE TEMP0 TO SFBR - -at 0x0000011b : */ 0x721c0000,0x00000000, -/* - MOVE SFBR TO SCRATCH0 - -at 0x0000011d : */ 0x6a340000,0x00000000, -/* - MOVE TEMP1 TO SFBR - -at 0x0000011f : */ 0x721d0000,0x00000000, -/* - MOVE SFBR TO SCRATCH1 - -at 0x00000121 : */ 0x6a350000,0x00000000, -/* - MOVE TEMP2 TO SFBR - -at 0x00000123 : */ 0x721e0000,0x00000000, -/* - MOVE SFBR TO SCRATCH2 - -at 0x00000125 : */ 0x6a360000,0x00000000, -/* - MOVE TEMP3 TO SFBR - -at 0x00000127 : */ 0x721f0000,0x00000000, -/* - MOVE SFBR TO SCRATCH3 - -at 0x00000129 : */ 0x6a370000,0x00000000, -/* - MOVE MEMORY 4, addr_scratch, jump_temp + 4 - -at 0x0000012b : */ 0xc0000004,0x00000000,0x000009c8, -/* - ; Now restore DSA - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x0000012e : */ 0xc0000004,0x00000000,0x00000000, -/* - - MOVE DSA0 + dsa_save_data_pointer TO SFBR - -at 0x00000131 : */ 0x76100000,0x00000000, -/* - MOVE SFBR TO SCRATCH0 - -at 0x00000133 : */ 0x6a340000,0x00000000, -/* - MOVE DSA1 + 0xff TO SFBR WITH CARRY - -at 0x00000135 : */ 0x7711ff00,0x00000000, -/* - MOVE SFBR TO SCRATCH1 - -at 0x00000137 : */ 0x6a350000,0x00000000, -/* - MOVE DSA2 + 0xff TO SFBR WITH CARRY - -at 0x00000139 : */ 0x7712ff00,0x00000000, -/* - MOVE SFBR TO SCRATCH2 - -at 0x0000013b : */ 0x6a360000,0x00000000, -/* - MOVE DSA3 + 0xff TO SFBR WITH CARRY - -at 0x0000013d : */ 0x7713ff00,0x00000000, -/* - MOVE SFBR TO SCRATCH3 - -at 0x0000013f : */ 0x6a370000,0x00000000, -/* - - - MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4 - -at 0x00000141 : */ 0xc0000004,0x00000000,0x00000514, -/* - -jump_dsa_save: - JUMP 0 - -at 0x00000144 : */ 0x80080000,0x00000000, -/* - -munge_restore_pointers: - - ; The code at dsa_restore_pointers will RETURN, but we don't care - ; about TEMP here, as it will overwrite it anyway. - - MOVE DSA0 + dsa_restore_pointers TO SFBR - -at 0x00000146 : */ 0x76100000,0x00000000, -/* - MOVE SFBR TO SCRATCH0 - -at 0x00000148 : */ 0x6a340000,0x00000000, -/* - MOVE DSA1 + 0xff TO SFBR WITH CARRY - -at 0x0000014a : */ 0x7711ff00,0x00000000, -/* - MOVE SFBR TO SCRATCH1 - -at 0x0000014c : */ 0x6a350000,0x00000000, -/* - MOVE DSA2 + 0xff TO SFBR WITH CARRY - -at 0x0000014e : */ 0x7712ff00,0x00000000, -/* - MOVE SFBR TO SCRATCH2 - -at 0x00000150 : */ 0x6a360000,0x00000000, -/* - MOVE DSA3 + 0xff TO SFBR WITH CARRY - -at 0x00000152 : */ 0x7713ff00,0x00000000, -/* - MOVE SFBR TO SCRATCH3 - -at 0x00000154 : */ 0x6a370000,0x00000000, -/* - - - MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4 - -at 0x00000156 : */ 0xc0000004,0x00000000,0x00000568, -/* - -jump_dsa_restore: - JUMP 0 - -at 0x00000159 : */ 0x80080000,0x00000000, -/* - - -munge_disconnect: - - - - - - - - - - - - - - - - - - - - - JUMP dsa_schedule - -at 0x0000015b : */ 0x80080000,0x00000178, -/* - - - - - -munge_extended: - CLEAR ACK - -at 0x0000015d : */ 0x60000040,0x00000000, -/* - INT int_err_unexpected_phase, WHEN NOT MSG_IN - -at 0x0000015f : */ 0x9f030000,0x00000000, -/* - MOVE 1, msg_buf + 1, WHEN MSG_IN - -at 0x00000161 : */ 0x0f000001,0x00000001, -/* - JUMP munge_extended_2, IF 0x02 - -at 0x00000163 : */ 0x800c0002,0x000005a4, -/* - JUMP munge_extended_3, IF 0x03 - -at 0x00000165 : */ 0x800c0003,0x000005d4, -/* - JUMP reject_message - -at 0x00000167 : */ 0x80080000,0x00000604, -/* - -munge_extended_2: - CLEAR ACK - -at 0x00000169 : */ 0x60000040,0x00000000, -/* - MOVE 1, msg_buf + 2, WHEN MSG_IN - -at 0x0000016b : */ 0x0f000001,0x00000002, -/* - JUMP reject_message, IF NOT 0x02 ; Must be WDTR - -at 0x0000016d : */ 0x80040002,0x00000604, -/* - CLEAR ACK - -at 0x0000016f : */ 0x60000040,0x00000000, -/* - MOVE 1, msg_buf + 3, WHEN MSG_IN - -at 0x00000171 : */ 0x0f000001,0x00000003, -/* - INT int_msg_wdtr - -at 0x00000173 : */ 0x98080000,0x01000000, -/* - -munge_extended_3: - CLEAR ACK - -at 0x00000175 : */ 0x60000040,0x00000000, -/* - MOVE 1, msg_buf + 2, WHEN MSG_IN - -at 0x00000177 : */ 0x0f000001,0x00000002, -/* - JUMP reject_message, IF NOT 0x01 ; Must be SDTR - -at 0x00000179 : */ 0x80040001,0x00000604, -/* - CLEAR ACK - -at 0x0000017b : */ 0x60000040,0x00000000, -/* - MOVE 2, msg_buf + 3, WHEN MSG_IN - -at 0x0000017d : */ 0x0f000002,0x00000003, -/* - INT int_msg_sdtr - -at 0x0000017f : */ 0x98080000,0x01010000, -/* - -ENTRY reject_message -reject_message: - SET ATN - -at 0x00000181 : */ 0x58000008,0x00000000, -/* - CLEAR ACK - -at 0x00000183 : */ 0x60000040,0x00000000, -/* - MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT - -at 0x00000185 : */ 0x0e000001,0x00000000, -/* - RETURN - -at 0x00000187 : */ 0x90080000,0x00000000, -/* - -ENTRY accept_message -accept_message: - CLEAR ATN - -at 0x00000189 : */ 0x60000008,0x00000000, -/* - CLEAR ACK - -at 0x0000018b : */ 0x60000040,0x00000000, -/* - RETURN - -at 0x0000018d : */ 0x90080000,0x00000000, -/* - -ENTRY respond_message -respond_message: - SET ATN - -at 0x0000018f : */ 0x58000008,0x00000000, -/* - CLEAR ACK - -at 0x00000191 : */ 0x60000040,0x00000000, -/* - MOVE FROM dsa_msgout_other, WHEN MSG_OUT - -at 0x00000193 : */ 0x1e000000,0x00000068, -/* - RETURN - -at 0x00000195 : */ 0x90080000,0x00000000, -/* - -; -; command_complete -; -; PURPOSE : handle command termination when STATUS IN is detected by reading -; a status byte followed by a command termination message. -; -; Normal termination results in an INTFLY instruction, and -; the host system can pick out which command terminated by -; examining the MESSAGE and STATUS buffers of all currently -; executing commands; -; -; Abnormal (CHECK_CONDITION) termination results in an -; int_err_check_condition interrupt so that a REQUEST SENSE -; command can be issued out-of-order so that no other command -; clears the contingent allegiance condition. -; -; -; INPUTS : DSA - command -; -; CALLS : OK -; -; EXITS : On successful termination, control is passed to schedule. -; On abnormal termination, the user will usually modify the -; DSA fields and corresponding buffers and return control -; to select. -; - -ENTRY command_complete -command_complete: - MOVE FROM dsa_status, WHEN STATUS - -at 0x00000197 : */ 0x1b000000,0x00000060, -/* - - MOVE SFBR TO SCRATCH0 ; Save status - -at 0x00000199 : */ 0x6a340000,0x00000000, -/* - -ENTRY command_complete_msgin -command_complete_msgin: - MOVE FROM dsa_msgin, WHEN MSG_IN - -at 0x0000019b : */ 0x1f000000,0x00000058, -/* -; Indicate that we should be expecting a disconnect - - - - ; Above code cleared the Unexpected Disconnect bit, what do we do? - - CLEAR ACK - -at 0x0000019d : */ 0x60000040,0x00000000, -/* - - WAIT DISCONNECT - -at 0x0000019f : */ 0x48000000,0x00000000, -/* - -; -; The SCSI specification states that when a UNIT ATTENTION condition -; is pending, as indicated by a CHECK CONDITION status message, -; the target shall revert to asynchronous transfers. Since -; synchronous transfers parameters are maintained on a per INITIATOR/TARGET -; basis, and returning control to our scheduler could work on a command -; running on another lun on that target using the old parameters, we must -; interrupt the host processor to get them changed, or change them ourselves. -; -; Once SCSI-II tagged queueing is implemented, things will be even more -; hairy, since contingent allegiance conditions exist on a per-target/lun -; basis, and issuing a new command with a different tag would clear it. -; In these cases, we must interrupt the host processor to get a request -; added to the HEAD of the queue with the request sense command, or we -; must automatically issue the request sense command. - - - - - - - - INT int_norm_emulateintfly - -at 0x000001a1 : */ 0x98080000,0x02060000, -/* - - - - - - - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x000001a3 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - - - JUMP schedule - -at 0x000001a6 : */ 0x80080000,0x00000000, -/* -command_failed: - INT int_err_check_condition - -at 0x000001a8 : */ 0x98080000,0x00030000, -/* - - - - -; -; wait_reselect -; -; PURPOSE : This is essentially the idle routine, where control lands -; when there are no new processes to schedule. wait_reselect -; waits for reselection, selection, and new commands. -; -; When a successful reselection occurs, with the aid -; of fixed up code in each DSA, wait_reselect walks the -; reconnect_dsa_queue, asking each dsa if the target ID -; and LUN match its. -; -; If a match is found, a call is made back to reselected_ok, -; which through the miracles of self modifying code, extracts -; the found DSA from the reconnect_dsa_queue and then -; returns control to the DSAs thread of execution. -; -; INPUTS : NONE -; -; CALLS : OK -; -; MODIFIES : DSA, -; -; EXITS : On successful reselection, control is returned to the -; DSA which called reselected_ok. If the WAIT RESELECT -; was interrupted by a new commands arrival signaled by -; SIG_P, control is passed to schedule. If the NCR is -; selected, the host system is interrupted with an -; int_err_selected which is usually responded to by -; setting DSP to the target_abort address. - -ENTRY wait_reselect -wait_reselect: - - - - - - - WAIT RESELECT wait_reselect_failed - -at 0x000001aa : */ 0x50000000,0x00000800, -/* - -reselected: - - - - CLEAR TARGET - -at 0x000001ac : */ 0x60000200,0x00000000, -/* - - ; Read all data needed to reestablish the nexus - - MOVE 1, reselected_identify, WHEN MSG_IN - -at 0x000001ae : */ 0x0f000001,0x00000000, -/* - ; We used to CLEAR ACK here. - - - - - - ; Point DSA at the current head of the disconnected queue. - - MOVE MEMORY 4, reconnect_dsa_head, addr_scratch - -at 0x000001b0 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - MOVE MEMORY 4, addr_scratch, saved_dsa - -at 0x000001b3 : */ 0xc0000004,0x00000000,0x00000000, -/* - - - - - ; Fix the update-next pointer so that the reconnect_dsa_head - ; pointer is the one that will be updated if this DSA is a hit - ; and we remove it from the queue. - - MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8 - -at 0x000001b6 : */ 0xc0000004,0x00000000,0x000007ec, -/* - - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x000001b9 : */ 0xc0000004,0x00000000,0x00000000, -/* - - -ENTRY reselected_check_next -reselected_check_next: - - - - ; Check for a NULL pointer. - MOVE DSA0 TO SFBR - -at 0x000001bc : */ 0x72100000,0x00000000, -/* - JUMP reselected_not_end, IF NOT 0 - -at 0x000001be : */ 0x80040000,0x00000738, -/* - MOVE DSA1 TO SFBR - -at 0x000001c0 : */ 0x72110000,0x00000000, -/* - JUMP reselected_not_end, IF NOT 0 - -at 0x000001c2 : */ 0x80040000,0x00000738, -/* - MOVE DSA2 TO SFBR - -at 0x000001c4 : */ 0x72120000,0x00000000, -/* - JUMP reselected_not_end, IF NOT 0 - -at 0x000001c6 : */ 0x80040000,0x00000738, -/* - MOVE DSA3 TO SFBR - -at 0x000001c8 : */ 0x72130000,0x00000000, -/* - JUMP reselected_not_end, IF NOT 0 - -at 0x000001ca : */ 0x80040000,0x00000738, -/* - INT int_err_unexpected_reselect - -at 0x000001cc : */ 0x98080000,0x00020000, -/* - -reselected_not_end: - ; - ; XXX the ALU is only eight bits wide, and the assembler - ; wont do the dirt work for us. As long as dsa_check_reselect - ; is negative, we need to sign extend with 1 bits to the full - ; 32 bit width of the address. - ; - ; A potential work around would be to have a known alignment - ; of the DSA structure such that the base address plus - ; dsa_check_reselect doesn't require carrying from bytes - ; higher than the LSB. - ; - - MOVE DSA0 TO SFBR - -at 0x000001ce : */ 0x72100000,0x00000000, -/* - MOVE SFBR + dsa_check_reselect TO SCRATCH0 - -at 0x000001d0 : */ 0x6e340000,0x00000000, -/* - MOVE DSA1 TO SFBR - -at 0x000001d2 : */ 0x72110000,0x00000000, -/* - MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY - -at 0x000001d4 : */ 0x6f35ff00,0x00000000, -/* - MOVE DSA2 TO SFBR - -at 0x000001d6 : */ 0x72120000,0x00000000, -/* - MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY - -at 0x000001d8 : */ 0x6f36ff00,0x00000000, -/* - MOVE DSA3 TO SFBR - -at 0x000001da : */ 0x72130000,0x00000000, -/* - MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY - -at 0x000001dc : */ 0x6f37ff00,0x00000000, -/* - - - MOVE MEMORY 4, addr_scratch, reselected_check + 4 - -at 0x000001de : */ 0xc0000004,0x00000000,0x00000794, -/* - - - ; Time to correct DSA following memory move - MOVE MEMORY 4, saved_dsa, addr_dsa - -at 0x000001e1 : */ 0xc0000004,0x00000000,0x00000000, -/* - -reselected_check: - JUMP 0 - -at 0x000001e4 : */ 0x80080000,0x00000000, -/* - - -; -; - -; We have problems here - the memory move corrupts TEMP and DSA. This -; routine is called from DSA code, and patched from many places. Scratch -; is probably free when it is called. -; We have to: -; copy temp to scratch, one byte at a time -; write scratch to patch a jump in place of the return -; do the move memory -; jump to the patched in return address -; DSA is corrupt when we get here, and can be left corrupt - -ENTRY reselected_ok -reselected_ok: - MOVE TEMP0 TO SFBR - -at 0x000001e6 : */ 0x721c0000,0x00000000, -/* - MOVE SFBR TO SCRATCH0 - -at 0x000001e8 : */ 0x6a340000,0x00000000, -/* - MOVE TEMP1 TO SFBR - -at 0x000001ea : */ 0x721d0000,0x00000000, -/* - MOVE SFBR TO SCRATCH1 - -at 0x000001ec : */ 0x6a350000,0x00000000, -/* - MOVE TEMP2 TO SFBR - -at 0x000001ee : */ 0x721e0000,0x00000000, -/* - MOVE SFBR TO SCRATCH2 - -at 0x000001f0 : */ 0x6a360000,0x00000000, -/* - MOVE TEMP3 TO SFBR - -at 0x000001f2 : */ 0x721f0000,0x00000000, -/* - MOVE SFBR TO SCRATCH3 - -at 0x000001f4 : */ 0x6a370000,0x00000000, -/* - MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4 - -at 0x000001f6 : */ 0xc0000004,0x00000000,0x000007f4, -/* -reselected_ok_patch: - MOVE MEMORY 4, 0, 0 - -at 0x000001f9 : */ 0xc0000004,0x00000000,0x00000000, -/* -reselected_ok_jump: - JUMP 0 - -at 0x000001fc : */ 0x80080000,0x00000000, -/* - - - - - -selected: - INT int_err_selected; - -at 0x000001fe : */ 0x98080000,0x00010000, -/* - -; -; A select or reselect failure can be caused by one of two conditions : -; 1. SIG_P was set. This will be the case if the user has written -; a new value to a previously NULL head of the issue queue. -; -; 2. The NCR53c810 was selected or reselected by another device. -; -; 3. The bus was already busy since we were selected or reselected -; before starting the command. - -wait_reselect_failed: - - - -; Check selected bit. - - ; Must work out how to tell if we are selected.... - - - - -; Reading CTEST2 clears the SIG_P bit in the ISTAT register. - MOVE CTEST2 & 0x40 TO SFBR - -at 0x00000200 : */ 0x74164000,0x00000000, -/* - JUMP schedule, IF 0x40 - -at 0x00000202 : */ 0x800c0040,0x00000000, -/* -; Check connected bit. -; FIXME: this needs to change if we support target mode - MOVE ISTAT & 0x08 TO SFBR - -at 0x00000204 : */ 0x74210800,0x00000000, -/* - JUMP reselected, IF 0x08 - -at 0x00000206 : */ 0x800c0008,0x000006b0, -/* -; FIXME : Something bogus happened, and we shouldn't fail silently. - - - - INT int_debug_panic - -at 0x00000208 : */ 0x98080000,0x030b0000, -/* - - - -select_failed: - - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 - -at 0x0000020a : */ 0x7a1b1000,0x00000000, -/* - - - - -; Otherwise, mask the selected and reselected bits off SIST0 - - ; Let's assume we don't get selected for now - MOVE SSTAT0 & 0x10 TO SFBR - -at 0x0000020c : */ 0x740d1000,0x00000000, -/* - - - - - JUMP reselected, IF 0x10 - -at 0x0000020e : */ 0x800c0010,0x000006b0, -/* -; If SIGP is set, the user just gave us another command, and -; we should restart or return to the scheduler. -; Reading CTEST2 clears the SIG_P bit in the ISTAT register. - MOVE CTEST2 & 0x40 TO SFBR - -at 0x00000210 : */ 0x74164000,0x00000000, -/* - JUMP select, IF 0x40 - -at 0x00000212 : */ 0x800c0040,0x000001f8, -/* -; Check connected bit. -; FIXME: this needs to change if we support target mode -; FIXME: is this really necessary? - MOVE ISTAT & 0x08 TO SFBR - -at 0x00000214 : */ 0x74210800,0x00000000, -/* - JUMP reselected, IF 0x08 - -at 0x00000216 : */ 0x800c0008,0x000006b0, -/* -; FIXME : Something bogus happened, and we shouldn't fail silently. - - - - INT int_debug_panic - -at 0x00000218 : */ 0x98080000,0x030b0000, -/* - - -; -; test_1 -; test_2 -; -; PURPOSE : run some verification tests on the NCR. test_1 -; copies test_src to test_dest and interrupts the host -; processor, testing for cache coherency and interrupt -; problems in the processes. -; -; test_2 runs a command with offsets relative to the -; DSA on entry, and is useful for miscellaneous experimentation. -; - -; Verify that interrupts are working correctly and that we don't -; have a cache invalidation problem. - -ABSOLUTE test_src = 0, test_dest = 0 -ENTRY test_1 -test_1: - MOVE MEMORY 4, test_src, test_dest - -at 0x0000021a : */ 0xc0000004,0x00000000,0x00000000, -/* - INT int_test_1 - -at 0x0000021d : */ 0x98080000,0x04000000, -/* - -; -; Run arbitrary commands, with test code establishing a DSA -; - -ENTRY test_2 -test_2: - CLEAR TARGET - -at 0x0000021f : */ 0x60000200,0x00000000, -/* - - ; Enable selection timer - - - - MOVE CTEST7 & 0xef TO CTEST7 - -at 0x00000221 : */ 0x7c1bef00,0x00000000, -/* - - - SELECT ATN FROM 0, test_2_fail - -at 0x00000223 : */ 0x43000000,0x000008dc, -/* - JUMP test_2_msgout, WHEN MSG_OUT - -at 0x00000225 : */ 0x860b0000,0x0000089c, -/* -ENTRY test_2_msgout -test_2_msgout: - - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 - -at 0x00000227 : */ 0x7a1b1000,0x00000000, -/* - - MOVE FROM 8, WHEN MSG_OUT - -at 0x00000229 : */ 0x1e000000,0x00000008, -/* - MOVE FROM 16, WHEN CMD - -at 0x0000022b : */ 0x1a000000,0x00000010, -/* - MOVE FROM 24, WHEN DATA_IN - -at 0x0000022d : */ 0x19000000,0x00000018, -/* - MOVE FROM 32, WHEN STATUS - -at 0x0000022f : */ 0x1b000000,0x00000020, -/* - MOVE FROM 40, WHEN MSG_IN - -at 0x00000231 : */ 0x1f000000,0x00000028, -/* - - - - CLEAR ACK - -at 0x00000233 : */ 0x60000040,0x00000000, -/* - WAIT DISCONNECT - -at 0x00000235 : */ 0x48000000,0x00000000, -/* -test_2_fail: - - ; Disable selection timer - MOVE CTEST7 | 0x10 TO CTEST7 - -at 0x00000237 : */ 0x7a1b1000,0x00000000, -/* - - INT int_test_2 - -at 0x00000239 : */ 0x98080000,0x04010000, -/* - -ENTRY debug_break -debug_break: - INT int_debug_break - -at 0x0000023b : */ 0x98080000,0x03000000, -/* - -; -; initiator_abort -; target_abort -; -; PURPOSE : Abort the currently established nexus from with initiator -; or target mode. -; -; - -ENTRY target_abort -target_abort: - SET TARGET - -at 0x0000023d : */ 0x58000200,0x00000000, -/* - DISCONNECT - -at 0x0000023f : */ 0x48000000,0x00000000, -/* - CLEAR TARGET - -at 0x00000241 : */ 0x60000200,0x00000000, -/* - JUMP schedule - -at 0x00000243 : */ 0x80080000,0x00000000, -/* - -ENTRY initiator_abort -initiator_abort: - SET ATN - -at 0x00000245 : */ 0x58000008,0x00000000, -/* -; -; The SCSI-I specification says that targets may go into MSG out at -; their leisure upon receipt of the ATN single. On all versions of the -; specification, we can't change phases until REQ transitions true->false, -; so we need to sink/source one byte of data to allow the transition. -; -; For the sake of safety, we'll only source one byte of data in all -; cases, but to accommodate the SCSI-I dain bramage, we'll sink an -; arbitrary number of bytes. - JUMP spew_cmd, WHEN CMD - -at 0x00000247 : */ 0x820b0000,0x0000094c, -/* - JUMP eat_msgin, WHEN MSG_IN - -at 0x00000249 : */ 0x870b0000,0x0000095c, -/* - JUMP eat_datain, WHEN DATA_IN - -at 0x0000024b : */ 0x810b0000,0x0000098c, -/* - JUMP eat_status, WHEN STATUS - -at 0x0000024d : */ 0x830b0000,0x00000974, -/* - JUMP spew_dataout, WHEN DATA_OUT - -at 0x0000024f : */ 0x800b0000,0x000009a4, -/* - JUMP sated - -at 0x00000251 : */ 0x80080000,0x000009ac, -/* -spew_cmd: - MOVE 1, NCR53c7xx_zero, WHEN CMD - -at 0x00000253 : */ 0x0a000001,0x00000000, -/* - JUMP sated - -at 0x00000255 : */ 0x80080000,0x000009ac, -/* -eat_msgin: - MOVE 1, NCR53c7xx_sink, WHEN MSG_IN - -at 0x00000257 : */ 0x0f000001,0x00000000, -/* - JUMP eat_msgin, WHEN MSG_IN - -at 0x00000259 : */ 0x870b0000,0x0000095c, -/* - JUMP sated - -at 0x0000025b : */ 0x80080000,0x000009ac, -/* -eat_status: - MOVE 1, NCR53c7xx_sink, WHEN STATUS - -at 0x0000025d : */ 0x0b000001,0x00000000, -/* - JUMP eat_status, WHEN STATUS - -at 0x0000025f : */ 0x830b0000,0x00000974, -/* - JUMP sated - -at 0x00000261 : */ 0x80080000,0x000009ac, -/* -eat_datain: - MOVE 1, NCR53c7xx_sink, WHEN DATA_IN - -at 0x00000263 : */ 0x09000001,0x00000000, -/* - JUMP eat_datain, WHEN DATA_IN - -at 0x00000265 : */ 0x810b0000,0x0000098c, -/* - JUMP sated - -at 0x00000267 : */ 0x80080000,0x000009ac, -/* -spew_dataout: - MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT - -at 0x00000269 : */ 0x08000001,0x00000000, -/* -sated: - - - - MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT - -at 0x0000026b : */ 0x0e000001,0x00000000, -/* - WAIT DISCONNECT - -at 0x0000026d : */ 0x48000000,0x00000000, -/* - INT int_norm_aborted - -at 0x0000026f : */ 0x98080000,0x02040000, -/* - - - - -; Little patched jump, used to overcome problems with TEMP getting -; corrupted on memory moves. - -jump_temp: - JUMP 0 - -at 0x00000271 : */ 0x80080000,0x00000000, -}; - -#define A_NCR53c7xx_msg_abort 0x00000000 -static u32 A_NCR53c7xx_msg_abort_used[] __attribute((unused)) = { - 0x0000026c, -}; - -#define A_NCR53c7xx_msg_reject 0x00000000 -static u32 A_NCR53c7xx_msg_reject_used[] __attribute((unused)) = { - 0x00000186, -}; - -#define A_NCR53c7xx_sink 0x00000000 -static u32 A_NCR53c7xx_sink_used[] __attribute((unused)) = { - 0x00000258, - 0x0000025e, - 0x00000264, -}; - -#define A_NCR53c7xx_zero 0x00000000 -static u32 A_NCR53c7xx_zero_used[] __attribute((unused)) = { - 0x00000254, - 0x0000026a, -}; - -#define A_NOP_insn 0x00000000 -static u32 A_NOP_insn_used[] __attribute((unused)) = { - 0x00000017, -}; - -#define A_addr_dsa 0x00000000 -static u32 A_addr_dsa_used[] __attribute((unused)) = { - 0x0000000f, - 0x00000026, - 0x00000033, - 0x00000040, - 0x00000055, - 0x00000079, - 0x0000008e, - 0x000000bc, - 0x000000d2, - 0x00000130, - 0x000001a5, - 0x000001bb, - 0x000001e3, -}; - -#define A_addr_reconnect_dsa_head 0x00000000 -static u32 A_addr_reconnect_dsa_head_used[] __attribute((unused)) = { - 0x000001b7, -}; - -#define A_addr_scratch 0x00000000 -static u32 A_addr_scratch_used[] __attribute((unused)) = { - 0x00000002, - 0x00000004, - 0x00000008, - 0x00000020, - 0x00000022, - 0x00000049, - 0x00000060, - 0x0000006a, - 0x00000071, - 0x00000073, - 0x000000ab, - 0x000000b5, - 0x000000c1, - 0x000000cb, - 0x0000012c, - 0x00000142, - 0x00000157, - 0x000001b2, - 0x000001b4, - 0x000001df, - 0x000001f7, -}; - -#define A_addr_temp 0x00000000 -static u32 A_addr_temp_used[] __attribute((unused)) = { -}; - -#define A_dmode_memory_to_memory 0x00000000 -static u32 A_dmode_memory_to_memory_used[] __attribute((unused)) = { -}; - -#define A_dmode_memory_to_ncr 0x00000000 -static u32 A_dmode_memory_to_ncr_used[] __attribute((unused)) = { -}; - -#define A_dmode_ncr_to_memory 0x00000000 -static u32 A_dmode_ncr_to_memory_used[] __attribute((unused)) = { -}; - -#define A_dsa_check_reselect 0x00000000 -static u32 A_dsa_check_reselect_used[] __attribute((unused)) = { - 0x000001d0, -}; - -#define A_dsa_cmdout 0x00000048 -static u32 A_dsa_cmdout_used[] __attribute((unused)) = { - 0x0000009a, -}; - -#define A_dsa_cmnd 0x00000038 -static u32 A_dsa_cmnd_used[] __attribute((unused)) = { -}; - -#define A_dsa_datain 0x00000054 -static u32 A_dsa_datain_used[] __attribute((unused)) = { - 0x000000c2, -}; - -#define A_dsa_dataout 0x00000050 -static u32 A_dsa_dataout_used[] __attribute((unused)) = { - 0x000000ac, -}; - -#define A_dsa_end 0x00000070 -static u32 A_dsa_end_used[] __attribute((unused)) = { -}; - -#define A_dsa_fields_start 0x00000000 -static u32 A_dsa_fields_start_used[] __attribute((unused)) = { -}; - -#define A_dsa_msgin 0x00000058 -static u32 A_dsa_msgin_used[] __attribute((unused)) = { - 0x0000019c, -}; - -#define A_dsa_msgout 0x00000040 -static u32 A_dsa_msgout_used[] __attribute((unused)) = { - 0x00000089, -}; - -#define A_dsa_msgout_other 0x00000068 -static u32 A_dsa_msgout_other_used[] __attribute((unused)) = { - 0x00000194, -}; - -#define A_dsa_next 0x00000030 -static u32 A_dsa_next_used[] __attribute((unused)) = { - 0x00000061, -}; - -#define A_dsa_restore_pointers 0x00000000 -static u32 A_dsa_restore_pointers_used[] __attribute((unused)) = { - 0x00000146, -}; - -#define A_dsa_save_data_pointer 0x00000000 -static u32 A_dsa_save_data_pointer_used[] __attribute((unused)) = { - 0x00000131, -}; - -#define A_dsa_select 0x0000003c -static u32 A_dsa_select_used[] __attribute((unused)) = { - 0x00000082, -}; - -#define A_dsa_sscf_710 0x00000000 -static u32 A_dsa_sscf_710_used[] __attribute((unused)) = { - 0x00000007, -}; - -#define A_dsa_status 0x00000060 -static u32 A_dsa_status_used[] __attribute((unused)) = { - 0x00000198, -}; - -#define A_dsa_temp_addr_array_value 0x00000000 -static u32 A_dsa_temp_addr_array_value_used[] __attribute((unused)) = { -}; - -#define A_dsa_temp_addr_dsa_value 0x00000000 -static u32 A_dsa_temp_addr_dsa_value_used[] __attribute((unused)) = { - 0x00000001, -}; - -#define A_dsa_temp_addr_new_value 0x00000000 -static u32 A_dsa_temp_addr_new_value_used[] __attribute((unused)) = { -}; - -#define A_dsa_temp_addr_next 0x00000000 -static u32 A_dsa_temp_addr_next_used[] __attribute((unused)) = { - 0x0000001c, - 0x0000004f, -}; - -#define A_dsa_temp_addr_residual 0x00000000 -static u32 A_dsa_temp_addr_residual_used[] __attribute((unused)) = { - 0x0000002d, - 0x0000003b, -}; - -#define A_dsa_temp_addr_saved_pointer 0x00000000 -static u32 A_dsa_temp_addr_saved_pointer_used[] __attribute((unused)) = { - 0x0000002b, - 0x00000037, -}; - -#define A_dsa_temp_addr_saved_residual 0x00000000 -static u32 A_dsa_temp_addr_saved_residual_used[] __attribute((unused)) = { - 0x0000002e, - 0x0000003a, -}; - -#define A_dsa_temp_lun 0x00000000 -static u32 A_dsa_temp_lun_used[] __attribute((unused)) = { - 0x0000004c, -}; - -#define A_dsa_temp_next 0x00000000 -static u32 A_dsa_temp_next_used[] __attribute((unused)) = { - 0x0000001f, -}; - -#define A_dsa_temp_sync 0x00000000 -static u32 A_dsa_temp_sync_used[] __attribute((unused)) = { - 0x00000057, -}; - -#define A_dsa_temp_target 0x00000000 -static u32 A_dsa_temp_target_used[] __attribute((unused)) = { - 0x00000045, -}; - -#define A_emulfly 0x00000000 -static u32 A_emulfly_used[] __attribute((unused)) = { -}; - -#define A_int_debug_break 0x03000000 -static u32 A_int_debug_break_used[] __attribute((unused)) = { - 0x0000023c, -}; - -#define A_int_debug_panic 0x030b0000 -static u32 A_int_debug_panic_used[] __attribute((unused)) = { - 0x00000209, - 0x00000219, -}; - -#define A_int_err_check_condition 0x00030000 -static u32 A_int_err_check_condition_used[] __attribute((unused)) = { - 0x000001a9, -}; - -#define A_int_err_no_phase 0x00040000 -static u32 A_int_err_no_phase_used[] __attribute((unused)) = { -}; - -#define A_int_err_selected 0x00010000 -static u32 A_int_err_selected_used[] __attribute((unused)) = { - 0x000001ff, -}; - -#define A_int_err_unexpected_phase 0x00000000 -static u32 A_int_err_unexpected_phase_used[] __attribute((unused)) = { - 0x00000092, - 0x00000098, - 0x000000a0, - 0x000000d6, - 0x000000da, - 0x000000dc, - 0x000000e4, - 0x000000e8, - 0x000000ea, - 0x000000f2, - 0x000000f6, - 0x000000f8, - 0x000000fa, - 0x00000160, -}; - -#define A_int_err_unexpected_reselect 0x00020000 -static u32 A_int_err_unexpected_reselect_used[] __attribute((unused)) = { - 0x000001cd, -}; - -#define A_int_msg_1 0x01020000 -static u32 A_int_msg_1_used[] __attribute((unused)) = { - 0x00000114, - 0x00000116, -}; - -#define A_int_msg_sdtr 0x01010000 -static u32 A_int_msg_sdtr_used[] __attribute((unused)) = { - 0x00000180, -}; - -#define A_int_msg_wdtr 0x01000000 -static u32 A_int_msg_wdtr_used[] __attribute((unused)) = { - 0x00000174, -}; - -#define A_int_norm_aborted 0x02040000 -static u32 A_int_norm_aborted_used[] __attribute((unused)) = { - 0x00000270, -}; - -#define A_int_norm_command_complete 0x02020000 -static u32 A_int_norm_command_complete_used[] __attribute((unused)) = { -}; - -#define A_int_norm_disconnected 0x02030000 -static u32 A_int_norm_disconnected_used[] __attribute((unused)) = { -}; - -#define A_int_norm_emulateintfly 0x02060000 -static u32 A_int_norm_emulateintfly_used[] __attribute((unused)) = { - 0x000001a2, -}; - -#define A_int_norm_reselect_complete 0x02010000 -static u32 A_int_norm_reselect_complete_used[] __attribute((unused)) = { -}; - -#define A_int_norm_reset 0x02050000 -static u32 A_int_norm_reset_used[] __attribute((unused)) = { -}; - -#define A_int_norm_select_complete 0x02000000 -static u32 A_int_norm_select_complete_used[] __attribute((unused)) = { -}; - -#define A_int_test_1 0x04000000 -static u32 A_int_test_1_used[] __attribute((unused)) = { - 0x0000021e, -}; - -#define A_int_test_2 0x04010000 -static u32 A_int_test_2_used[] __attribute((unused)) = { - 0x0000023a, -}; - -#define A_int_test_3 0x04020000 -static u32 A_int_test_3_used[] __attribute((unused)) = { -}; - -#define A_msg_buf 0x00000000 -static u32 A_msg_buf_used[] __attribute((unused)) = { - 0x00000108, - 0x00000162, - 0x0000016c, - 0x00000172, - 0x00000178, - 0x0000017e, -}; - -#define A_reconnect_dsa_head 0x00000000 -static u32 A_reconnect_dsa_head_used[] __attribute((unused)) = { - 0x0000006d, - 0x00000074, - 0x000001b1, -}; - -#define A_reselected_identify 0x00000000 -static u32 A_reselected_identify_used[] __attribute((unused)) = { - 0x00000048, - 0x000001af, -}; - -#define A_reselected_tag 0x00000000 -static u32 A_reselected_tag_used[] __attribute((unused)) = { -}; - -#define A_saved_dsa 0x00000000 -static u32 A_saved_dsa_used[] __attribute((unused)) = { - 0x00000005, - 0x0000000e, - 0x00000023, - 0x00000025, - 0x00000032, - 0x0000003f, - 0x00000054, - 0x0000005f, - 0x00000070, - 0x00000078, - 0x0000008d, - 0x000000aa, - 0x000000bb, - 0x000000c0, - 0x000000d1, - 0x0000012f, - 0x000001a4, - 0x000001b5, - 0x000001ba, - 0x000001e2, -}; - -#define A_schedule 0x00000000 -static u32 A_schedule_used[] __attribute((unused)) = { - 0x0000007d, - 0x000001a7, - 0x00000203, - 0x00000244, -}; - -#define A_test_dest 0x00000000 -static u32 A_test_dest_used[] __attribute((unused)) = { - 0x0000021c, -}; - -#define A_test_src 0x00000000 -static u32 A_test_src_used[] __attribute((unused)) = { - 0x0000021b, -}; - -#define Ent_accept_message 0x00000624 -#define Ent_cmdout_cmdout 0x00000264 -#define Ent_command_complete 0x0000065c -#define Ent_command_complete_msgin 0x0000066c -#define Ent_data_transfer 0x0000026c -#define Ent_datain_to_jump 0x00000334 -#define Ent_debug_break 0x000008ec -#define Ent_dsa_code_begin 0x00000000 -#define Ent_dsa_code_check_reselect 0x0000010c -#define Ent_dsa_code_fix_jump 0x00000058 -#define Ent_dsa_code_restore_pointers 0x000000d8 -#define Ent_dsa_code_save_data_pointer 0x000000a4 -#define Ent_dsa_code_template 0x00000000 -#define Ent_dsa_code_template_end 0x00000178 -#define Ent_dsa_schedule 0x00000178 -#define Ent_dsa_zero 0x00000178 -#define Ent_end_data_transfer 0x000002a4 -#define Ent_initiator_abort 0x00000914 -#define Ent_msg_in 0x0000041c -#define Ent_msg_in_restart 0x000003fc -#define Ent_other_in 0x0000038c -#define Ent_other_out 0x00000354 -#define Ent_other_transfer 0x000003c4 -#define Ent_reject_message 0x00000604 -#define Ent_reselected_check_next 0x000006f0 -#define Ent_reselected_ok 0x00000798 -#define Ent_respond_message 0x0000063c -#define Ent_select 0x000001f8 -#define Ent_select_msgout 0x00000218 -#define Ent_target_abort 0x000008f4 -#define Ent_test_1 0x00000868 -#define Ent_test_2 0x0000087c -#define Ent_test_2_msgout 0x0000089c -#define Ent_wait_reselect 0x000006a8 -static u32 LABELPATCHES[] __attribute((unused)) = { - 0x00000011, - 0x0000001a, - 0x0000001d, - 0x00000028, - 0x0000002a, - 0x00000035, - 0x00000038, - 0x00000042, - 0x00000050, - 0x00000052, - 0x0000006b, - 0x00000083, - 0x00000085, - 0x00000090, - 0x00000094, - 0x00000096, - 0x0000009c, - 0x0000009e, - 0x000000a2, - 0x000000a4, - 0x000000a6, - 0x000000a8, - 0x000000b6, - 0x000000b9, - 0x000000cc, - 0x000000cf, - 0x000000d8, - 0x000000de, - 0x000000e0, - 0x000000e6, - 0x000000ec, - 0x000000ee, - 0x000000f4, - 0x000000fc, - 0x000000fe, - 0x0000010a, - 0x0000010c, - 0x0000010e, - 0x00000110, - 0x00000112, - 0x00000118, - 0x0000011a, - 0x0000012d, - 0x00000143, - 0x00000158, - 0x0000015c, - 0x00000164, - 0x00000166, - 0x00000168, - 0x0000016e, - 0x0000017a, - 0x000001ab, - 0x000001b8, - 0x000001bf, - 0x000001c3, - 0x000001c7, - 0x000001cb, - 0x000001e0, - 0x000001f8, - 0x00000207, - 0x0000020f, - 0x00000213, - 0x00000217, - 0x00000224, - 0x00000226, - 0x00000248, - 0x0000024a, - 0x0000024c, - 0x0000024e, - 0x00000250, - 0x00000252, - 0x00000256, - 0x0000025a, - 0x0000025c, - 0x00000260, - 0x00000262, - 0x00000266, - 0x00000268, -}; - -static struct { - u32 offset; - void *address; -} EXTERNAL_PATCHES[] __attribute((unused)) = { -}; - -static u32 INSTRUCTIONS __attribute((unused)) = 290; -static u32 PATCHES __attribute((unused)) = 78; -static u32 EXTERNAL_PATCHES_LEN __attribute((unused)) = 0; diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped linux-2.6.22-591/drivers/scsi/53c7xx_u.h_shipped --- linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/53c7xx_u.h_shipped 1969-12-31 19:00:00.000000000 -0500 @@ -1,102 +0,0 @@ -#undef A_NCR53c7xx_msg_abort -#undef A_NCR53c7xx_msg_reject -#undef A_NCR53c7xx_sink -#undef A_NCR53c7xx_zero -#undef A_NOP_insn -#undef A_addr_dsa -#undef A_addr_reconnect_dsa_head -#undef A_addr_scratch -#undef A_addr_temp -#undef A_dmode_memory_to_memory -#undef A_dmode_memory_to_ncr -#undef A_dmode_ncr_to_memory -#undef A_dsa_check_reselect -#undef A_dsa_cmdout -#undef A_dsa_cmnd -#undef A_dsa_datain -#undef A_dsa_dataout -#undef A_dsa_end -#undef A_dsa_fields_start -#undef A_dsa_msgin -#undef A_dsa_msgout -#undef A_dsa_msgout_other -#undef A_dsa_next -#undef A_dsa_restore_pointers -#undef A_dsa_save_data_pointer -#undef A_dsa_select -#undef A_dsa_sscf_710 -#undef A_dsa_status -#undef A_dsa_temp_addr_array_value -#undef A_dsa_temp_addr_dsa_value -#undef A_dsa_temp_addr_new_value -#undef A_dsa_temp_addr_next -#undef A_dsa_temp_addr_residual -#undef A_dsa_temp_addr_saved_pointer -#undef A_dsa_temp_addr_saved_residual -#undef A_dsa_temp_lun -#undef A_dsa_temp_next -#undef A_dsa_temp_sync -#undef A_dsa_temp_target -#undef A_emulfly -#undef A_int_debug_break -#undef A_int_debug_panic -#undef A_int_err_check_condition -#undef A_int_err_no_phase -#undef A_int_err_selected -#undef A_int_err_unexpected_phase -#undef A_int_err_unexpected_reselect -#undef A_int_msg_1 -#undef A_int_msg_sdtr -#undef A_int_msg_wdtr -#undef A_int_norm_aborted -#undef A_int_norm_command_complete -#undef A_int_norm_disconnected -#undef A_int_norm_emulateintfly -#undef A_int_norm_reselect_complete -#undef A_int_norm_reset -#undef A_int_norm_select_complete -#undef A_int_test_1 -#undef A_int_test_2 -#undef A_int_test_3 -#undef A_msg_buf -#undef A_reconnect_dsa_head -#undef A_reselected_identify -#undef A_reselected_tag -#undef A_saved_dsa -#undef A_schedule -#undef A_test_dest -#undef A_test_src -#undef Ent_accept_message -#undef Ent_cmdout_cmdout -#undef Ent_command_complete -#undef Ent_command_complete_msgin -#undef Ent_data_transfer -#undef Ent_datain_to_jump -#undef Ent_debug_break -#undef Ent_dsa_code_begin -#undef Ent_dsa_code_check_reselect -#undef Ent_dsa_code_fix_jump -#undef Ent_dsa_code_restore_pointers -#undef Ent_dsa_code_save_data_pointer -#undef Ent_dsa_code_template -#undef Ent_dsa_code_template_end -#undef Ent_dsa_schedule -#undef Ent_dsa_zero -#undef Ent_end_data_transfer -#undef Ent_initiator_abort -#undef Ent_msg_in -#undef Ent_msg_in_restart -#undef Ent_other_in -#undef Ent_other_out -#undef Ent_other_transfer -#undef Ent_reject_message -#undef Ent_reselected_check_next -#undef Ent_reselected_ok -#undef Ent_respond_message -#undef Ent_select -#undef Ent_select_msgout -#undef Ent_target_abort -#undef Ent_test_1 -#undef Ent_test_2 -#undef Ent_test_2_msgout -#undef Ent_wait_reselect diff -Nurb linux-2.6.22-570/drivers/scsi/BusLogic.c linux-2.6.22-591/drivers/scsi/BusLogic.c --- linux-2.6.22-570/drivers/scsi/BusLogic.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/BusLogic.c 2007-12-21 15:36:12.000000000 -0500 @@ -304,16 +304,8 @@ static void BusLogic_DeallocateCCB(struct BusLogic_CCB *CCB) { struct BusLogic_HostAdapter *HostAdapter = CCB->HostAdapter; - struct scsi_cmnd *cmd = CCB->Command; - if (cmd->use_sg != 0) { - pci_unmap_sg(HostAdapter->PCI_Device, - (struct scatterlist *)cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); - } else if (cmd->request_bufflen != 0) { - pci_unmap_single(HostAdapter->PCI_Device, CCB->DataPointer, - CCB->DataLength, cmd->sc_data_direction); - } + scsi_dma_unmap(CCB->Command); pci_unmap_single(HostAdapter->PCI_Device, CCB->SenseDataPointer, CCB->SenseDataLength, PCI_DMA_FROMDEVICE); @@ -2648,7 +2640,8 @@ */ if (CCB->CDB[0] == INQUIRY && CCB->CDB[1] == 0 && CCB->HostAdapterStatus == BusLogic_CommandCompletedNormally) { struct BusLogic_TargetFlags *TargetFlags = &HostAdapter->TargetFlags[CCB->TargetID]; - struct SCSI_Inquiry *InquiryResult = (struct SCSI_Inquiry *) Command->request_buffer; + struct SCSI_Inquiry *InquiryResult = + (struct SCSI_Inquiry *) scsi_sglist(Command); TargetFlags->TargetExists = true; TargetFlags->TaggedQueuingSupported = InquiryResult->CmdQue; TargetFlags->WideTransfersSupported = InquiryResult->WBus16; @@ -2819,9 +2812,8 @@ int CDB_Length = Command->cmd_len; int TargetID = Command->device->id; int LogicalUnit = Command->device->lun; - void *BufferPointer = Command->request_buffer; - int BufferLength = Command->request_bufflen; - int SegmentCount = Command->use_sg; + int BufferLength = scsi_bufflen(Command); + int Count; struct BusLogic_CCB *CCB; /* SCSI REQUEST_SENSE commands will be executed automatically by the Host @@ -2851,36 +2843,35 @@ return 0; } } + /* Initialize the fields in the BusLogic Command Control Block (CCB). */ - if (SegmentCount == 0 && BufferLength != 0) { - CCB->Opcode = BusLogic_InitiatorCCB; - CCB->DataLength = BufferLength; - CCB->DataPointer = pci_map_single(HostAdapter->PCI_Device, - BufferPointer, BufferLength, - Command->sc_data_direction); - } else if (SegmentCount != 0) { - struct scatterlist *ScatterList = (struct scatterlist *) BufferPointer; - int Segment, Count; + Count = scsi_dma_map(Command); + BUG_ON(Count < 0); + if (Count) { + struct scatterlist *sg; + int i; - Count = pci_map_sg(HostAdapter->PCI_Device, ScatterList, SegmentCount, - Command->sc_data_direction); CCB->Opcode = BusLogic_InitiatorCCB_ScatterGather; CCB->DataLength = Count * sizeof(struct BusLogic_ScatterGatherSegment); if (BusLogic_MultiMasterHostAdapterP(HostAdapter)) CCB->DataPointer = (unsigned int) CCB->DMA_Handle + ((unsigned long) &CCB->ScatterGatherList - (unsigned long) CCB); else CCB->DataPointer = Virtual_to_32Bit_Virtual(CCB->ScatterGatherList); - for (Segment = 0; Segment < Count; Segment++) { - CCB->ScatterGatherList[Segment].SegmentByteCount = sg_dma_len(ScatterList + Segment); - CCB->ScatterGatherList[Segment].SegmentDataPointer = sg_dma_address(ScatterList + Segment); + + scsi_for_each_sg(Command, sg, Count, i) { + CCB->ScatterGatherList[i].SegmentByteCount = + sg_dma_len(sg); + CCB->ScatterGatherList[i].SegmentDataPointer = + sg_dma_address(sg); } - } else { + } else if (!Count) { CCB->Opcode = BusLogic_InitiatorCCB; CCB->DataLength = BufferLength; CCB->DataPointer = 0; } + switch (CDB[0]) { case READ_6: case READ_10: diff -Nurb linux-2.6.22-570/drivers/scsi/Kconfig linux-2.6.22-591/drivers/scsi/Kconfig --- linux-2.6.22-570/drivers/scsi/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -739,7 +739,7 @@ config SCSI_IBMMCA tristate "IBMMCA SCSI support" - depends on MCA_LEGACY && SCSI + depends on MCA && SCSI ---help--- This is support for the IBM SCSI adapter found in many of the PS/2 series computers. These machines have an MCA bus, so you need to @@ -1007,6 +1007,11 @@ To compile this driver as a module, choose M here: the module will be called stex. +config 53C700_BE_BUS + bool + depends on SCSI_A4000T || SCSI_ZORRO7XX || MVME16x_SCSI || BVME6000_SCSI + default y + config SCSI_SYM53C8XX_2 tristate "SYM53C8XX Version 2 SCSI support" depends on PCI && SCSI @@ -1611,13 +1616,25 @@ If you have the Phase5 Fastlane Z3 SCSI controller, or plan to use one in the near future, say Y to this question. Otherwise, say N. -config SCSI_AMIGA7XX - bool "Amiga NCR53c710 SCSI support (EXPERIMENTAL)" - depends on AMIGA && SCSI && EXPERIMENTAL && BROKEN +config SCSI_A4000T + tristate "A4000T NCR53c710 SCSI support (EXPERIMENTAL)" + depends on AMIGA && SCSI && EXPERIMENTAL + select SCSI_SPI_ATTRS help - Support for various NCR53c710-based SCSI controllers on the Amiga. + If you have an Amiga 4000T and have SCSI devices connected to the + built-in SCSI controller, say Y. Otherwise, say N. + + To compile this driver as a module, choose M here: the + module will be called a4000t. + +config SCSI_ZORRO7XX + tristate "Zorro NCR53c710 SCSI support (EXPERIMENTAL)" + depends on ZORRO && SCSI && EXPERIMENTAL + select SCSI_SPI_ATTRS + help + Support for various NCR53c710-based SCSI controllers on Zorro + expansion boards for the Amiga. This includes: - - the builtin SCSI controller on the Amiga 4000T, - the Amiga 4091 Zorro III SCSI-2 controller, - the MacroSystem Development's WarpEngine Amiga SCSI-2 controller (info at @@ -1625,10 +1642,6 @@ - the SCSI controller on the Phase5 Blizzard PowerUP 603e+ accelerator card for the Amiga 1200, - the SCSI controller on the GVP Turbo 040/060 accelerator. - Note that all of the above SCSI controllers, except for the builtin - SCSI controller on the Amiga 4000T, reside on the Zorro expansion - bus, so you also have to enable Zorro bus support if you want to use - them. config OKTAGON_SCSI tristate "BSC Oktagon SCSI support (EXPERIMENTAL)" @@ -1712,8 +1725,8 @@ single-board computer. config MVME16x_SCSI - bool "NCR53C710 SCSI driver for MVME16x" - depends on MVME16x && SCSI && BROKEN + tristate "NCR53C710 SCSI driver for MVME16x" + depends on MVME16x && SCSI select SCSI_SPI_ATTRS help The Motorola MVME162, 166, 167, 172 and 177 boards use the NCR53C710 @@ -1721,22 +1734,14 @@ will want to say Y to this question. config BVME6000_SCSI - bool "NCR53C710 SCSI driver for BVME6000" - depends on BVME6000 && SCSI && BROKEN + tristate "NCR53C710 SCSI driver for BVME6000" + depends on BVME6000 && SCSI select SCSI_SPI_ATTRS help The BVME4000 and BVME6000 boards from BVM Ltd use the NCR53C710 SCSI controller chip. Almost everyone using one of these boards will want to say Y to this question. -config SCSI_NCR53C7xx_FAST - bool "allow FAST-SCSI [10MHz]" - depends on SCSI_AMIGA7XX || MVME16x_SCSI || BVME6000_SCSI - help - This will enable 10MHz FAST-SCSI transfers with your host - adapter. Some systems have problems with that speed, so it's safest - to say N here. - config SUN3_SCSI tristate "Sun3 NCR5380 SCSI" depends on SUN3 && SCSI diff -Nurb linux-2.6.22-570/drivers/scsi/Makefile linux-2.6.22-591/drivers/scsi/Makefile --- linux-2.6.22-570/drivers/scsi/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -37,7 +37,8 @@ obj-$(CONFIG_ISCSI_TCP) += libiscsi.o iscsi_tcp.o obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o -obj-$(CONFIG_SCSI_AMIGA7XX) += amiga7xx.o 53c7xx.o +obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o +obj-$(CONFIG_SCSI_ZORRO7XX) += 53c700.o zorro7xx.o obj-$(CONFIG_A3000_SCSI) += a3000.o wd33c93.o obj-$(CONFIG_A2091_SCSI) += a2091.o wd33c93.o obj-$(CONFIG_GVP11_SCSI) += gvp11.o wd33c93.o @@ -53,8 +54,8 @@ obj-$(CONFIG_MAC_SCSI) += mac_scsi.o obj-$(CONFIG_SCSI_MAC_ESP) += mac_esp.o NCR53C9x.o obj-$(CONFIG_SUN3_SCSI) += sun3_scsi.o sun3_scsi_vme.o -obj-$(CONFIG_MVME16x_SCSI) += mvme16x.o 53c7xx.o -obj-$(CONFIG_BVME6000_SCSI) += bvme6000.o 53c7xx.o +obj-$(CONFIG_MVME16x_SCSI) += 53c700.o mvme16x_scsi.o +obj-$(CONFIG_BVME6000_SCSI) += 53c700.o bvme6000_scsi.o obj-$(CONFIG_SCSI_SIM710) += 53c700.o sim710.o obj-$(CONFIG_SCSI_ADVANSYS) += advansys.o obj-$(CONFIG_SCSI_PSI240I) += psi240i.o @@ -168,10 +169,8 @@ oktagon_esp_mod-objs := oktagon_esp.o oktagon_io.o # Files generated that shall be removed upon make clean -clean-files := 53c7xx_d.h 53c700_d.h \ - 53c7xx_u.h 53c700_u.h +clean-files := 53c700_d.h 53c700_u.h -$(obj)/53c7xx.o: $(obj)/53c7xx_d.h $(obj)/53c7xx_u.h $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h # If you want to play with the firmware, uncomment @@ -179,11 +178,6 @@ ifdef GENERATE_FIRMWARE -$(obj)/53c7xx_d.h: $(src)/53c7xx.scr $(src)/script_asm.pl - $(CPP) -traditional -DCHIP=710 - < $< | grep -v '^#' | $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h) - -$(obj)/53c7xx_u.h: $(obj)/53c7xx_d.h - $(obj)/53c700_d.h: $(src)/53c700.scr $(src)/script_asm.pl $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h) < $< diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.c linux-2.6.22-591/drivers/scsi/NCR5380.c --- linux-2.6.22-570/drivers/scsi/NCR5380.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/NCR5380.c 2007-12-21 15:36:12.000000000 -0500 @@ -347,7 +347,7 @@ if((r & bit) == val) return 0; if(!in_interrupt()) - yield(); + cond_resched(); else cpu_relax(); } @@ -357,7 +357,7 @@ static struct { unsigned char value; const char *name; -} phases[] = { +} phases[] __maybe_unused = { {PHASE_DATAOUT, "DATAOUT"}, {PHASE_DATAIN, "DATAIN"}, {PHASE_CMDOUT, "CMDOUT"}, @@ -575,7 +575,8 @@ * Locks: none, irqs must be enabled on entry */ -static int __init NCR5380_probe_irq(struct Scsi_Host *instance, int possible) +static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance, + int possible) { NCR5380_local_declare(); struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata; @@ -629,7 +630,8 @@ * Locks: none */ -static void __init NCR5380_print_options(struct Scsi_Host *instance) +static void __init __maybe_unused +NCR5380_print_options(struct Scsi_Host *instance) { printk(" generic options" #ifdef AUTOPROBE_IRQ @@ -703,8 +705,8 @@ static char *lprint_opcode(int opcode, char *pos, char *buffer, int length); -static -int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start, off_t offset, int length, int inout) +static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance, + char *buffer, char **start, off_t offset, int length, int inout) { char *pos = buffer; struct NCR5380_hostdata *hostdata; diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.h linux-2.6.22-591/drivers/scsi/NCR5380.h --- linux-2.6.22-570/drivers/scsi/NCR5380.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/NCR5380.h 2007-12-21 15:36:12.000000000 -0500 @@ -299,7 +299,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id); #endif static void NCR5380_main(struct work_struct *work); -static void NCR5380_print_options(struct Scsi_Host *instance); +static void __maybe_unused NCR5380_print_options(struct Scsi_Host *instance); #ifdef NDEBUG static void NCR5380_print_phase(struct Scsi_Host *instance); static void NCR5380_print(struct Scsi_Host *instance); @@ -307,8 +307,8 @@ static int NCR5380_abort(Scsi_Cmnd * cmd); static int NCR5380_bus_reset(Scsi_Cmnd * cmd); static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *)); -static int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start, -off_t offset, int length, int inout); +static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance, + char *buffer, char **start, off_t offset, int length, int inout); static void NCR5380_reselect(struct Scsi_Host *instance); static int NCR5380_select(struct Scsi_Host *instance, Scsi_Cmnd * cmd, int tag); diff -Nurb linux-2.6.22-570/drivers/scsi/NCR53c406a.c linux-2.6.22-591/drivers/scsi/NCR53c406a.c --- linux-2.6.22-570/drivers/scsi/NCR53c406a.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/NCR53c406a.c 2007-12-21 15:36:12.000000000 -0500 @@ -698,7 +698,7 @@ int i; VDEB(printk("NCR53c406a_queue called\n")); - DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, SCpnt->request_bufflen)); + DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, scsi_bufflen(SCpnt))); #if 0 VDEB(for (i = 0; i < SCpnt->cmd_len; i++) @@ -785,8 +785,8 @@ unsigned char status, int_reg; #if USE_PIO unsigned char pio_status; - struct scatterlist *sglist; - unsigned int sgcount; + struct scatterlist *sg; + int i; #endif VDEB(printk("NCR53c406a_intr called\n")); @@ -866,21 +866,17 @@ current_SC->SCp.phase = data_out; VDEB(printk("NCR53c406a: Data-Out phase\n")); outb(FLUSH_FIFO, CMD_REG); - LOAD_DMA_COUNT(current_SC->request_bufflen); /* Max transfer size */ + LOAD_DMA_COUNT(scsi_bufflen(current_SC)); /* Max transfer size */ #if USE_DMA /* No s/g support for DMA */ - NCR53c406a_dma_write(current_SC->request_buffer, current_SC->request_bufflen); + NCR53c406a_dma_write(scsi_sglist(current_SC), + scsdi_bufflen(current_SC)); + #endif /* USE_DMA */ outb(TRANSFER_INFO | DMA_OP, CMD_REG); #if USE_PIO - if (!current_SC->use_sg) /* Don't use scatter-gather */ - NCR53c406a_pio_write(current_SC->request_buffer, current_SC->request_bufflen); - else { /* use scatter-gather */ - sgcount = current_SC->use_sg; - sglist = current_SC->request_buffer; - while (sgcount--) { - NCR53c406a_pio_write(page_address(sglist->page) + sglist->offset, sglist->length); - sglist++; - } + scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { + NCR53c406a_pio_write(page_address(sg->page) + sg->offset, + sg->length); } REG0; #endif /* USE_PIO */ @@ -893,21 +889,16 @@ current_SC->SCp.phase = data_in; VDEB(printk("NCR53c406a: Data-In phase\n")); outb(FLUSH_FIFO, CMD_REG); - LOAD_DMA_COUNT(current_SC->request_bufflen); /* Max transfer size */ + LOAD_DMA_COUNT(scsi_bufflen(current_SC)); /* Max transfer size */ #if USE_DMA /* No s/g support for DMA */ - NCR53c406a_dma_read(current_SC->request_buffer, current_SC->request_bufflen); + NCR53c406a_dma_read(scsi_sglist(current_SC), + scsdi_bufflen(current_SC)); #endif /* USE_DMA */ outb(TRANSFER_INFO | DMA_OP, CMD_REG); #if USE_PIO - if (!current_SC->use_sg) /* Don't use scatter-gather */ - NCR53c406a_pio_read(current_SC->request_buffer, current_SC->request_bufflen); - else { /* Use scatter-gather */ - sgcount = current_SC->use_sg; - sglist = current_SC->request_buffer; - while (sgcount--) { - NCR53c406a_pio_read(page_address(sglist->page) + sglist->offset, sglist->length); - sglist++; - } + scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { + NCR53c406a_pio_read(page_address(sg->page) + sg->offset, + sg->length); } REG0; #endif /* USE_PIO */ diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.c linux-2.6.22-591/drivers/scsi/a100u2w.c --- linux-2.6.22-570/drivers/scsi/a100u2w.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/a100u2w.c 2007-12-21 15:36:12.000000000 -0500 @@ -19,27 +19,6 @@ * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * - * -------------------------------------------------------------------------- - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Where this Software is combined with software released under the terms of - * the GNU General Public License ("GPL") and the terms of the GPL would require the - * combined work to also be released under the terms of the GPL, the terms - * and conditions of this License will apply in addition to those of the - * GPL with the exception of any terms or conditions of this License that - * conflict with, or are expressly prohibited by, the GPL. - * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -75,6 +54,8 @@ * 9/28/04 Christoph Hellwig * - merge the two source files * - remove internal queueing code + * 14/06/07 Alan Cox + * - Grand cleanup and Linuxisation */ #include @@ -102,14 +83,12 @@ #include "a100u2w.h" -#define JIFFIES_TO_MS(t) ((t) * 1000 / HZ) -#define MS_TO_JIFFIES(j) ((j * HZ) / 1000) +static struct orc_scb *__orc_alloc_scb(struct orc_host * host); +static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb); -static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp); -static void inia100SCBPost(BYTE * pHcb, BYTE * pScb); +static struct orc_nvram nvram, *nvramp = &nvram; -static NVRAM nvram, *nvramp = &nvram; -static UCHAR dftNvRam[64] = +static u8 default_nvram[64] = { /*----------header -------------*/ 0x01, /* 0x00: Sub System Vendor ID 0 */ @@ -158,823 +137,882 @@ }; -/***************************************************************************/ -static void waitForPause(unsigned amount) -{ - ULONG the_time = jiffies + MS_TO_JIFFIES(amount); - while (time_before_eq(jiffies, the_time)) - cpu_relax(); -} - -/***************************************************************************/ -static UCHAR waitChipReady(ORC_HCS * hcsp) +static u8 wait_chip_ready(struct orc_host * host) { int i; for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ - if (ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HOSTSTOP) /* Wait HOSTSTOP set */ + if (inb(host->base + ORC_HCTRL) & HOSTSTOP) /* Wait HOSTSTOP set */ return 1; - waitForPause(100); /* wait 100ms before try again */ + mdelay(100); } return 0; } -/***************************************************************************/ -static UCHAR waitFWReady(ORC_HCS * hcsp) +static u8 wait_firmware_ready(struct orc_host * host) { int i; for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ - if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) /* Wait READY set */ + if (inb(host->base + ORC_HSTUS) & RREADY) /* Wait READY set */ return 1; - waitForPause(100); /* wait 100ms before try again */ + mdelay(100); /* wait 100ms before try again */ } return 0; } /***************************************************************************/ -static UCHAR waitSCSIRSTdone(ORC_HCS * hcsp) +static u8 wait_scsi_reset_done(struct orc_host * host) { int i; for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ - if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & SCSIRST)) /* Wait SCSIRST done */ + if (!(inb(host->base + ORC_HCTRL) & SCSIRST)) /* Wait SCSIRST done */ return 1; - waitForPause(100); /* wait 100ms before try again */ + mdelay(100); /* wait 100ms before try again */ } return 0; } /***************************************************************************/ -static UCHAR waitHDOoff(ORC_HCS * hcsp) +static u8 wait_HDO_off(struct orc_host * host) { int i; for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ - if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HDO)) /* Wait HDO off */ + if (!(inb(host->base + ORC_HCTRL) & HDO)) /* Wait HDO off */ return 1; - waitForPause(100); /* wait 100ms before try again */ + mdelay(100); /* wait 100ms before try again */ } return 0; } /***************************************************************************/ -static UCHAR waitHDIset(ORC_HCS * hcsp, UCHAR * pData) +static u8 wait_hdi_set(struct orc_host * host, u8 * data) { int i; for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ - if ((*pData = ORC_RD(hcsp->HCS_Base, ORC_HSTUS)) & HDI) + if ((*data = inb(host->base + ORC_HSTUS)) & HDI) return 1; /* Wait HDI set */ - waitForPause(100); /* wait 100ms before try again */ + mdelay(100); /* wait 100ms before try again */ } return 0; } /***************************************************************************/ -static unsigned short get_FW_version(ORC_HCS * hcsp) +static unsigned short orc_read_fwrev(struct orc_host * host) { - UCHAR bData; - union { - unsigned short sVersion; - unsigned char cVersion[2]; - } Version; + u16 version; + u8 data; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_VERSION); - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(ORC_CMD_VERSION, host->base + ORC_HDATA); + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ + if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ return 0; - Version.cVersion[0] = ORC_RD(hcsp->HCS_Base, ORC_HDATA); - ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ + version = inb(host->base + ORC_HDATA); + outb(data, host->base + ORC_HSTUS); /* Clear HDI */ - if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ + if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ return 0; - Version.cVersion[1] = ORC_RD(hcsp->HCS_Base, ORC_HDATA); - ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ + version |= inb(host->base + ORC_HDATA) << 8; + outb(data, host->base + ORC_HSTUS); /* Clear HDI */ - return (Version.sVersion); + return version; } /***************************************************************************/ -static UCHAR set_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char value) +static u8 orc_nv_write(struct orc_host * host, unsigned char address, unsigned char value) { - ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_SET_NVM); /* Write command */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(ORC_CMD_SET_NVM, host->base + ORC_HDATA); /* Write command */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, address); /* Write address */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(address, host->base + ORC_HDATA); /* Write address */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, value); /* Write value */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(value, host->base + ORC_HDATA); /* Write value */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; return 1; } /***************************************************************************/ -static UCHAR get_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char *pDataIn) +static u8 orc_nv_read(struct orc_host * host, u8 address, u8 *ptr) { - unsigned char bData; + unsigned char data; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_GET_NVM); /* Write command */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(ORC_CMD_GET_NVM, host->base + ORC_HDATA); /* Write command */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, address); /* Write address */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(address, host->base + ORC_HDATA); /* Write address */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ + if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ return 0; - *pDataIn = ORC_RD(hcsp->HCS_Base, ORC_HDATA); - ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ + *ptr = inb(host->base + ORC_HDATA); + outb(data, host->base + ORC_HSTUS); /* Clear HDI */ return 1; + } -/***************************************************************************/ -static void orc_exec_scb(ORC_HCS * hcsp, ORC_SCB * scbp) +/** + * orc_exec_sb - Queue an SCB with the HA + * @host: host adapter the SCB belongs to + * @scb: SCB to queue for execution + */ + +static void orc_exec_scb(struct orc_host * host, struct orc_scb * scb) { - scbp->SCB_Status = ORCSCB_POST; - ORC_WR(hcsp->HCS_Base + ORC_PQUEUE, scbp->SCB_ScbIdx); - return; + scb->status = ORCSCB_POST; + outb(scb->scbidx, host->base + ORC_PQUEUE); } -/*********************************************************************** - Read SCSI H/A configuration parameters from serial EEPROM -************************************************************************/ -static int se2_rd_all(ORC_HCS * hcsp) +/** + * se2_rd_all - read SCSI parameters from EEPROM + * @host: Host whose EEPROM is being loaded + * + * Read SCSI H/A configuration parameters from serial EEPROM + */ + +static int se2_rd_all(struct orc_host * host) { int i; - UCHAR *np, chksum = 0; + u8 *np, chksum = 0; - np = (UCHAR *) nvramp; + np = (u8 *) nvramp; for (i = 0; i < 64; i++, np++) { /* <01> */ - if (get_NVRAM(hcsp, (unsigned char) i, np) == 0) + if (orc_nv_read(host, (u8) i, np) == 0) return -1; -// *np++ = get_NVRAM(hcsp, (unsigned char ) i); } -/*------ Is ckecksum ok ? ------*/ - np = (UCHAR *) nvramp; + /*------ Is ckecksum ok ? ------*/ + np = (u8 *) nvramp; for (i = 0; i < 63; i++) chksum += *np++; - if (nvramp->CheckSum != (UCHAR) chksum) + if (nvramp->CheckSum != (u8) chksum) return -1; return 1; } -/************************************************************************ - Update SCSI H/A configuration parameters from serial EEPROM -*************************************************************************/ -static void se2_update_all(ORC_HCS * hcsp) +/** + * se2_update_all - update the EEPROM + * @host: Host whose EEPROM is being updated + * + * Update changed bytes in the EEPROM image. + */ + +static void se2_update_all(struct orc_host * host) { /* setup default pattern */ int i; - UCHAR *np, *np1, chksum = 0; + u8 *np, *np1, chksum = 0; /* Calculate checksum first */ - np = (UCHAR *) dftNvRam; + np = (u8 *) default_nvram; for (i = 0; i < 63; i++) chksum += *np++; *np = chksum; - np = (UCHAR *) dftNvRam; - np1 = (UCHAR *) nvramp; + np = (u8 *) default_nvram; + np1 = (u8 *) nvramp; for (i = 0; i < 64; i++, np++, np1++) { - if (*np != *np1) { - set_NVRAM(hcsp, (unsigned char) i, *np); - } + if (*np != *np1) + orc_nv_write(host, (u8) i, *np); } - return; } -/************************************************************************* - Function name : read_eeprom -**************************************************************************/ -static void read_eeprom(ORC_HCS * hcsp) -{ - if (se2_rd_all(hcsp) != 1) { - se2_update_all(hcsp); /* setup default pattern */ - se2_rd_all(hcsp); /* load again */ +/** + * read_eeprom - load EEPROM + * @host: Host EEPROM to read + * + * Read the EEPROM for a given host. If it is invalid or fails + * the restore the defaults and use them. + */ + +static void read_eeprom(struct orc_host * host) +{ + if (se2_rd_all(host) != 1) { + se2_update_all(host); /* setup default pattern */ + se2_rd_all(host); /* load again */ } } -/***************************************************************************/ -static UCHAR load_FW(ORC_HCS * hcsp) +/** + * orc_load_firmware - initialise firmware + * @host: Host to set up + * + * Load the firmware from the EEPROM into controller SRAM. This + * is basically a 4K block copy and then a 4K block read to check + * correctness. The rest is convulted by the indirect interfaces + * in the hardware + */ + +static u8 orc_load_firmware(struct orc_host * host) { - U32 dData; - USHORT wBIOSAddress; - USHORT i; - UCHAR *pData, bData; - - - bData = ORC_RD(hcsp->HCS_Base, ORC_GCFG); - ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData | EEPRG); /* Enable EEPROM programming */ - ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, 0x00); - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x00); - if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0x55) { - ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */ - return 0; - } - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x01); - if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0xAA) { - ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */ - return 0; - } - ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD); /* Enable SRAM programming */ - pData = (UCHAR *) & dData; - dData = 0; /* Initial FW address to 0 */ - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x10); - *pData = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x11); - *(pData + 1) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x12); - *(pData + 2) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ - ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, *(pData + 2)); - ORC_WRLONG(hcsp->HCS_Base + ORC_FWBASEADR, dData); /* Write FW address */ + u32 data32; + u16 bios_addr; + u16 i; + u8 *data32_ptr, data; + + + /* Set up the EEPROM for access */ - wBIOSAddress = (USHORT) dData; /* FW code locate at BIOS address + ? */ - for (i = 0, pData = (UCHAR *) & dData; /* Download the code */ + data = inb(host->base + ORC_GCFG); + outb(data | EEPRG, host->base + ORC_GCFG); /* Enable EEPROM programming */ + outb(0x00, host->base + ORC_EBIOSADR2); + outw(0x0000, host->base + ORC_EBIOSADR0); + if (inb(host->base + ORC_EBIOSDATA) != 0x55) { + outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */ + return 0; + } + outw(0x0001, host->base + ORC_EBIOSADR0); + if (inb(host->base + ORC_EBIOSDATA) != 0xAA) { + outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */ + return 0; + } + + outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL); /* Enable SRAM programming */ + data32_ptr = (u8 *) & data32; + data32 = 0; /* Initial FW address to 0 */ + outw(0x0010, host->base + ORC_EBIOSADR0); + *data32_ptr = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ + outw(0x0011, host->base + ORC_EBIOSADR0); + *(data32_ptr + 1) = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ + outw(0x0012, host->base + ORC_EBIOSADR0); + *(data32_ptr + 2) = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ + outw(*(data32_ptr + 2), host->base + ORC_EBIOSADR2); + outl(data32, host->base + ORC_FWBASEADR); /* Write FW address */ + + /* Copy the code from the BIOS to the SRAM */ + + bios_addr = (u16) data32; /* FW code locate at BIOS address + ? */ + for (i = 0, data32_ptr = (u8 *) & data32; /* Download the code */ i < 0x1000; /* Firmware code size = 4K */ - i++, wBIOSAddress++) { - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress); - *pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ + i++, bios_addr++) { + outw(bios_addr, host->base + ORC_EBIOSADR0); + *data32_ptr++ = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ if ((i % 4) == 3) { - ORC_WRLONG(hcsp->HCS_Base + ORC_RISCRAM, dData); /* Write every 4 bytes */ - pData = (UCHAR *) & dData; + outl(data32, host->base + ORC_RISCRAM); /* Write every 4 bytes */ + data32_ptr = (u8 *) & data32; } } - ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD); /* Reset program count 0 */ - wBIOSAddress -= 0x1000; /* Reset the BIOS adddress */ - for (i = 0, pData = (UCHAR *) & dData; /* Check the code */ + /* Go back and check they match */ + + outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL); /* Reset program count 0 */ + bios_addr -= 0x1000; /* Reset the BIOS adddress */ + for (i = 0, data32_ptr = (u8 *) & data32; /* Check the code */ i < 0x1000; /* Firmware code size = 4K */ - i++, wBIOSAddress++) { - ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress); - *pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ + i++, bios_addr++) { + outw(bios_addr, host->base + ORC_EBIOSADR0); + *data32_ptr++ = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ if ((i % 4) == 3) { - if (ORC_RDLONG(hcsp->HCS_Base, ORC_RISCRAM) != dData) { - ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST); /* Reset program to 0 */ - ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /*Disable EEPROM programming */ + if (inl(host->base + ORC_RISCRAM) != data32) { + outb(PRGMRST, host->base + ORC_RISCCTL); /* Reset program to 0 */ + outb(data, host->base + ORC_GCFG); /*Disable EEPROM programming */ return 0; } - pData = (UCHAR *) & dData; + data32_ptr = (u8 *) & data32; } } - ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST); /* Reset program to 0 */ - ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */ + + /* Success */ + outb(PRGMRST, host->base + ORC_RISCCTL); /* Reset program to 0 */ + outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */ return 1; } /***************************************************************************/ -static void setup_SCBs(ORC_HCS * hcsp) +static void setup_SCBs(struct orc_host * host) { - ORC_SCB *pVirScb; + struct orc_scb *scb; int i; - ESCB *pVirEscb; - dma_addr_t pPhysEscb; + struct orc_extended_scb *escb; + dma_addr_t escb_phys; - /* Setup SCB HCS_Base and SCB Size registers */ - ORC_WR(hcsp->HCS_Base + ORC_SCBSIZE, ORC_MAXQUEUE); /* Total number of SCBs */ - /* SCB HCS_Base address 0 */ - ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE0, hcsp->HCS_physScbArray); - /* SCB HCS_Base address 1 */ - ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE1, hcsp->HCS_physScbArray); + /* Setup SCB base and SCB Size registers */ + outb(ORC_MAXQUEUE, host->base + ORC_SCBSIZE); /* Total number of SCBs */ + /* SCB base address 0 */ + outl(host->scb_phys, host->base + ORC_SCBBASE0); + /* SCB base address 1 */ + outl(host->scb_phys, host->base + ORC_SCBBASE1); /* setup scatter list address with one buffer */ - pVirScb = hcsp->HCS_virScbArray; - pVirEscb = hcsp->HCS_virEscbArray; + scb = host->scb_virt; + escb = host->escb_virt; for (i = 0; i < ORC_MAXQUEUE; i++) { - pPhysEscb = (hcsp->HCS_physEscbArray + (sizeof(ESCB) * i)); - pVirScb->SCB_SGPAddr = (U32) pPhysEscb; - pVirScb->SCB_SensePAddr = (U32) pPhysEscb; - pVirScb->SCB_EScb = pVirEscb; - pVirScb->SCB_ScbIdx = i; - pVirScb++; - pVirEscb++; + escb_phys = (host->escb_phys + (sizeof(struct orc_extended_scb) * i)); + scb->sg_addr = (u32) escb_phys; + scb->sense_addr = (u32) escb_phys; + scb->escb = escb; + scb->scbidx = i; + scb++; + escb++; } - - return; } -/***************************************************************************/ -static void initAFlag(ORC_HCS * hcsp) +/** + * init_alloc_map - initialise allocation map + * @host: host map to configure + * + * Initialise the allocation maps for this device. If the device + * is not quiescent the caller must hold the allocation lock + */ + +static void init_alloc_map(struct orc_host * host) { - UCHAR i, j; + u8 i, j; for (i = 0; i < MAX_CHANNELS; i++) { for (j = 0; j < 8; j++) { - hcsp->BitAllocFlag[i][j] = 0xffffffff; + host->allocation_map[i][j] = 0xffffffff; } } } -/***************************************************************************/ -static int init_orchid(ORC_HCS * hcsp) +/** + * init_orchid - initialise the host adapter + * @host:host adapter to initialise + * + * Initialise the controller and if neccessary load the firmware. + * + * Returns -1 if the initialisation fails. + */ + +static int init_orchid(struct orc_host * host) { - UBYTE *readBytep; - USHORT revision; - UCHAR i; - - initAFlag(hcsp); - ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFF); /* Disable all interrupt */ - if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) { /* Orchid is ready */ - revision = get_FW_version(hcsp); + u8 *ptr; + u16 revision; + u8 i; + + init_alloc_map(host); + outb(0xFF, host->base + ORC_GIMSK); /* Disable all interrupts */ + + if (inb(host->base + ORC_HSTUS) & RREADY) { /* Orchid is ready */ + revision = orc_read_fwrev(host); if (revision == 0xFFFF) { - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST); /* Reset Host Adapter */ - if (waitChipReady(hcsp) == 0) - return (-1); - load_FW(hcsp); /* Download FW */ - setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, 0); /* clear HOSTSTOP */ - if (waitFWReady(hcsp) == 0) - return (-1); + outb(DEVRST, host->base + ORC_HCTRL); /* Reset Host Adapter */ + if (wait_chip_ready(host) == 0) + return -1; + orc_load_firmware(host); /* Download FW */ + setup_SCBs(host); /* Setup SCB base and SCB Size registers */ + outb(0x00, host->base + ORC_HCTRL); /* clear HOSTSTOP */ + if (wait_firmware_ready(host) == 0) + return -1; /* Wait for firmware ready */ } else { - setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */ + setup_SCBs(host); /* Setup SCB base and SCB Size registers */ } } else { /* Orchid is not Ready */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST); /* Reset Host Adapter */ - if (waitChipReady(hcsp) == 0) - return (-1); - load_FW(hcsp); /* Download FW */ - setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); /* Do Hardware Reset & */ + outb(DEVRST, host->base + ORC_HCTRL); /* Reset Host Adapter */ + if (wait_chip_ready(host) == 0) + return -1; + orc_load_firmware(host); /* Download FW */ + setup_SCBs(host); /* Setup SCB base and SCB Size registers */ + outb(HDO, host->base + ORC_HCTRL); /* Do Hardware Reset & */ /* clear HOSTSTOP */ - if (waitFWReady(hcsp) == 0) /* Wait for firmware ready */ - return (-1); + if (wait_firmware_ready(host) == 0) /* Wait for firmware ready */ + return -1; } -/*------------- get serial EEProm settting -------*/ + /* Load an EEProm copy into RAM */ + /* Assumes single threaded at this point */ + read_eeprom(host); - read_eeprom(hcsp); - - if (nvramp->Revision != 1) - return (-1); - - hcsp->HCS_SCSI_ID = nvramp->SCSI0Id; - hcsp->HCS_BIOS = nvramp->BIOSConfig1; - hcsp->HCS_MaxTar = MAX_TARGETS; - readBytep = (UCHAR *) & (nvramp->Target00Config); - for (i = 0; i < 16; readBytep++, i++) { - hcsp->TargetFlag[i] = *readBytep; - hcsp->MaximumTags[i] = ORC_MAXTAGS; - } /* for */ + if (nvramp->revision != 1) + return -1; - if (nvramp->SCSI0Config & NCC_BUSRESET) { /* Reset SCSI bus */ - hcsp->HCS_Flags |= HCF_SCSI_RESET; + host->scsi_id = nvramp->scsi_id; + host->BIOScfg = nvramp->BIOSConfig1; + host->max_targets = MAX_TARGETS; + ptr = (u8 *) & (nvramp->Target00Config); + for (i = 0; i < 16; ptr++, i++) { + host->target_flag[i] = *ptr; + host->max_tags[i] = ORC_MAXTAGS; } - ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFB); /* enable RP FIFO interrupt */ - return (0); + + if (nvramp->SCSI0Config & NCC_BUSRESET) + host->flags |= HCF_SCSI_RESET; + outb(0xFB, host->base + ORC_GIMSK); /* enable RP FIFO interrupt */ + return 0; } -/***************************************************************************** - Function name : orc_reset_scsi_bus - Description : Reset registers, reset a hanging bus and - kill active and disconnected commands for target w/o soft reset - Input : pHCB - Pointer to host adapter structure - Output : None. - Return : pSRB - Pointer to SCSI request block. -*****************************************************************************/ -static int orc_reset_scsi_bus(ORC_HCS * pHCB) +/** + * orc_reset_scsi_bus - perform bus reset + * @host: host being reset + * + * Perform a full bus reset on the adapter. + */ + +static int orc_reset_scsi_bus(struct orc_host * host) { /* I need Host Control Block Information */ - ULONG flags; + unsigned long flags; - spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags); + spin_lock_irqsave(&host->allocation_lock, flags); - initAFlag(pHCB); + init_alloc_map(host); /* reset scsi bus */ - ORC_WR(pHCB->HCS_Base + ORC_HCTRL, SCSIRST); - if (waitSCSIRSTdone(pHCB) == 0) { - spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); + outb(SCSIRST, host->base + ORC_HCTRL); + /* FIXME: We can spend up to a second with the lock held and + interrupts off here */ + if (wait_scsi_reset_done(host) == 0) { + spin_unlock_irqrestore(&host->allocation_lock, flags); return FAILED; } else { - spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); + spin_unlock_irqrestore(&host->allocation_lock, flags); return SUCCESS; } } -/***************************************************************************** - Function name : orc_device_reset - Description : Reset registers, reset a hanging bus and - kill active and disconnected commands for target w/o soft reset - Input : pHCB - Pointer to host adapter structure - Output : None. - Return : pSRB - Pointer to SCSI request block. -*****************************************************************************/ -static int orc_device_reset(ORC_HCS * pHCB, struct scsi_cmnd *SCpnt, unsigned int target) +/** + * orc_device_reset - device reset handler + * @host: host to reset + * @cmd: command causing the reset + * @target; target device + * + * Reset registers, reset a hanging bus and kill active and disconnected + * commands for target w/o soft reset + */ + +static int orc_device_reset(struct orc_host * host, struct scsi_cmnd *cmd, unsigned int target) { /* I need Host Control Block Information */ - ORC_SCB *pScb; - ESCB *pVirEscb; - ORC_SCB *pVirScb; - UCHAR i; - ULONG flags; - - spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags); - pScb = (ORC_SCB *) NULL; - pVirEscb = (ESCB *) NULL; + struct orc_scb *scb; + struct orc_extended_scb *escb; + struct orc_scb *host_scb; + u8 i; + unsigned long flags; + + spin_lock_irqsave(&(host->allocation_lock), flags); + scb = (struct orc_scb *) NULL; + escb = (struct orc_extended_scb *) NULL; /* setup scatter list address with one buffer */ - pVirScb = pHCB->HCS_virScbArray; + host_scb = host->scb_virt; - initAFlag(pHCB); - /* device reset */ + /* FIXME: is this safe if we then fail to issue the reset or race + a completion ? */ + init_alloc_map(host); + + /* Find the scb corresponding to the command */ for (i = 0; i < ORC_MAXQUEUE; i++) { - pVirEscb = pVirScb->SCB_EScb; - if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt)) + escb = host_scb->escb; + if (host_scb->status && escb->srb == cmd) break; - pVirScb++; + host_scb++; } if (i == ORC_MAXQUEUE) { - printk("Unable to Reset - No SCB Found\n"); - spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); + printk(KERN_ERR "Unable to Reset - No SCB Found\n"); + spin_unlock_irqrestore(&(host->allocation_lock), flags); return FAILED; } - if ((pScb = orc_alloc_scb(pHCB)) == NULL) { - spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); + + /* Allocate a new SCB for the reset command to the firmware */ + if ((scb = __orc_alloc_scb(host)) == NULL) { + /* Can't happen.. */ + spin_unlock_irqrestore(&(host->allocation_lock), flags); return FAILED; } - pScb->SCB_Opcode = ORC_BUSDEVRST; - pScb->SCB_Target = target; - pScb->SCB_HaStat = 0; - pScb->SCB_TaStat = 0; - pScb->SCB_Status = 0x0; - pScb->SCB_Link = 0xFF; - pScb->SCB_Reserved0 = 0; - pScb->SCB_Reserved1 = 0; - pScb->SCB_XferLen = 0; - pScb->SCB_SGLen = 0; - - pVirEscb->SCB_Srb = NULL; - pVirEscb->SCB_Srb = SCpnt; - orc_exec_scb(pHCB, pScb); /* Start execute SCB */ - spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); + + /* Reset device is handled by the firmare, we fill in an SCB and + fire it at the controller, it does the rest */ + scb->opcode = ORC_BUSDEVRST; + scb->target = target; + scb->hastat = 0; + scb->tastat = 0; + scb->status = 0x0; + scb->link = 0xFF; + scb->reserved0 = 0; + scb->reserved1 = 0; + scb->xferlen = 0; + scb->sg_len = 0; + + escb->srb = NULL; + escb->srb = cmd; + orc_exec_scb(host, scb); /* Start execute SCB */ + spin_unlock_irqrestore(&host->allocation_lock, flags); return SUCCESS; } +/** + * __orc_alloc_scb - allocate an SCB + * @host: host to allocate from + * + * Allocate an SCB and return a pointer to the SCB object. NULL + * is returned if no SCB is free. The caller must already hold + * the allocator lock at this point. + */ -/***************************************************************************/ -static ORC_SCB *__orc_alloc_scb(ORC_HCS * hcsp) + +static struct orc_scb *__orc_alloc_scb(struct orc_host * host) { - ORC_SCB *pTmpScb; - UCHAR Ch; - ULONG idx; - UCHAR index; - UCHAR i; + u8 channel; + unsigned long idx; + u8 index; + u8 i; - Ch = hcsp->HCS_Index; + channel = host->index; for (i = 0; i < 8; i++) { for (index = 0; index < 32; index++) { - if ((hcsp->BitAllocFlag[Ch][i] >> index) & 0x01) { - hcsp->BitAllocFlag[Ch][i] &= ~(1 << index); + if ((host->allocation_map[channel][i] >> index) & 0x01) { + host->allocation_map[channel][i] &= ~(1 << index); break; } } idx = index + 32 * i; - pTmpScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (idx * sizeof(ORC_SCB))); - return (pTmpScb); + /* Translate the index to a structure instance */ + return (struct orc_scb *) ((unsigned long) host->scb_virt + (idx * sizeof(struct orc_scb))); } - return (NULL); + return NULL; } -static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp) +/** + * orc_alloc_scb - allocate an SCB + * @host: host to allocate from + * + * Allocate an SCB and return a pointer to the SCB object. NULL + * is returned if no SCB is free. + */ + +static struct orc_scb *orc_alloc_scb(struct orc_host * host) { - ORC_SCB *pTmpScb; - ULONG flags; + struct orc_scb *scb; + unsigned long flags; - spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags); - pTmpScb = __orc_alloc_scb(hcsp); - spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); - return (pTmpScb); + spin_lock_irqsave(&host->allocation_lock, flags); + scb = __orc_alloc_scb(host); + spin_unlock_irqrestore(&host->allocation_lock, flags); + return scb; } +/** + * orc_release_scb - release an SCB + * @host: host owning the SCB + * @scb: SCB that is now free + * + * Called to return a completed SCB to the allocation pool. Before + * calling the SCB must be out of use on both the host and the HA. + */ -/***************************************************************************/ -static void orc_release_scb(ORC_HCS * hcsp, ORC_SCB * scbp) +static void orc_release_scb(struct orc_host *host, struct orc_scb *scb) { - ULONG flags; - UCHAR Index; - UCHAR i; - UCHAR Ch; - - spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags); - Ch = hcsp->HCS_Index; - Index = scbp->SCB_ScbIdx; - i = Index / 32; - Index %= 32; - hcsp->BitAllocFlag[Ch][i] |= (1 << Index); - spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); + unsigned long flags; + u8 index, i, channel; + + spin_lock_irqsave(&(host->allocation_lock), flags); + channel = host->index; /* Channel */ + index = scb->scbidx; + i = index / 32; + index %= 32; + host->allocation_map[channel][i] |= (1 << index); + spin_unlock_irqrestore(&(host->allocation_lock), flags); } -/***************************************************************************** - Function name : abort_SCB - Description : Abort a queued command. - (commands that are on the bus can't be aborted easily) - Input : pHCB - Pointer to host adapter structure - Output : None. - Return : pSRB - Pointer to SCSI request block. -*****************************************************************************/ -static int abort_SCB(ORC_HCS * hcsp, ORC_SCB * pScb) +/** + * orchid_abort_scb - abort a command + * + * Abort a queued command that has been passed to the firmware layer + * if possible. This is all handled by the firmware. We aks the firmware + * and it either aborts the command or fails + */ + +static int orchid_abort_scb(struct orc_host * host, struct orc_scb * scb) { - unsigned char bData, bStatus; + unsigned char data, status; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_ABORT_SCB); /* Write command */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(ORC_CMD_ABORT_SCB, host->base + ORC_HDATA); /* Write command */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - ORC_WR(hcsp->HCS_Base + ORC_HDATA, pScb->SCB_ScbIdx); /* Write address */ - ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); - if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ + outb(scb->scbidx, host->base + ORC_HDATA); /* Write address */ + outb(HDO, host->base + ORC_HCTRL); + if (wait_HDO_off(host) == 0) /* Wait HDO off */ return 0; - if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ + if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ return 0; - bStatus = ORC_RD(hcsp->HCS_Base, ORC_HDATA); - ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ + status = inb(host->base + ORC_HDATA); + outb(data, host->base + ORC_HSTUS); /* Clear HDI */ - if (bStatus == 1) /* 0 - Successfully */ + if (status == 1) /* 0 - Successfully */ return 0; /* 1 - Fail */ return 1; } -/***************************************************************************** - Function name : inia100_abort - Description : Abort a queued command. - (commands that are on the bus can't be aborted easily) - Input : pHCB - Pointer to host adapter structure - Output : None. - Return : pSRB - Pointer to SCSI request block. -*****************************************************************************/ -static int orc_abort_srb(ORC_HCS * hcsp, struct scsi_cmnd *SCpnt) +static int inia100_abort_cmd(struct orc_host * host, struct scsi_cmnd *cmd) { - ESCB *pVirEscb; - ORC_SCB *pVirScb; - UCHAR i; - ULONG flags; - - spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags); - - pVirScb = hcsp->HCS_virScbArray; - - for (i = 0; i < ORC_MAXQUEUE; i++, pVirScb++) { - pVirEscb = pVirScb->SCB_EScb; - if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt)) { - if (pVirScb->SCB_TagMsg == 0) { - spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); - return FAILED; + struct orc_extended_scb *escb; + struct orc_scb *scb; + u8 i; + unsigned long flags; + + spin_lock_irqsave(&(host->allocation_lock), flags); + + scb = host->scb_virt; + + /* Walk the queue until we find the SCB that belongs to the command + block. This isn't a performance critical path so a walk in the park + here does no harm */ + + for (i = 0; i < ORC_MAXQUEUE; i++, scb++) { + escb = scb->escb; + if (scb->status && escb->srb == cmd) { + if (scb->tag_msg == 0) { + goto out; } else { - if (abort_SCB(hcsp, pVirScb)) { - pVirEscb->SCB_Srb = NULL; - spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); + /* Issue an ABORT to the firmware */ + if (orchid_abort_scb(host, scb)) { + escb->srb = NULL; + spin_unlock_irqrestore(&host->allocation_lock, flags); return SUCCESS; - } else { - spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); - return FAILED; - } + } else + goto out; } } } - spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); +out: + spin_unlock_irqrestore(&host->allocation_lock, flags); return FAILED; } -/*********************************************************************** - Routine Description: - This is the interrupt service routine for the Orchid SCSI adapter. - It reads the interrupt register to determine if the adapter is indeed - the source of the interrupt and clears the interrupt at the device. - Arguments: - HwDeviceExtension - HBA miniport driver's adapter data storage - Return Value: -***********************************************************************/ -static void orc_interrupt( - ORC_HCS * hcsp -) +/** + * orc_interrupt - IRQ processing + * @host: Host causing the interrupt + * + * This function is called from the IRQ handler and protected + * by the host lock. While the controller reports that there are + * scb's for processing we pull them off the controller, turn the + * index into a host address pointer to the scb and call the scb + * handler. + * + * Returns IRQ_HANDLED if any SCBs were processed, IRQ_NONE otherwise + */ + +static irqreturn_t orc_interrupt(struct orc_host * host) { - BYTE bScbIdx; - ORC_SCB *pScb; + u8 scb_index; + struct orc_scb *scb; - if (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT) == 0) { - return; // 0; + /* Check if we have an SCB queued for servicing */ + if (inb(host->base + ORC_RQUEUECNT) == 0) + return IRQ_NONE; - } do { - bScbIdx = ORC_RD(hcsp->HCS_Base, ORC_RQUEUE); - - pScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (ULONG) (sizeof(ORC_SCB) * bScbIdx)); - pScb->SCB_Status = 0x0; - - inia100SCBPost((BYTE *) hcsp, (BYTE *) pScb); - } while (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT)); - return; //1; + /* Get the SCB index of the SCB to service */ + scb_index = inb(host->base + ORC_RQUEUE); + /* Translate it back to a host pointer */ + scb = (struct orc_scb *) ((unsigned long) host->scb_virt + (unsigned long) (sizeof(struct orc_scb) * scb_index)); + scb->status = 0x0; + /* Process the SCB */ + inia100_scb_handler(host, scb); + } while (inb(host->base + ORC_RQUEUECNT)); + return IRQ_HANDLED; } /* End of I1060Interrupt() */ -/***************************************************************************** - Function name : inia100BuildSCB - Description : - Input : pHCB - Pointer to host adapter structure - Output : None. - Return : pSRB - Pointer to SCSI request block. -*****************************************************************************/ -static void inia100BuildSCB(ORC_HCS * pHCB, ORC_SCB * pSCB, struct scsi_cmnd * SCpnt) +/** + * inia100_build_scb - build SCB + * @host: host owing the control block + * @scb: control block to use + * @cmd: Mid layer command + * + * Build a host adapter control block from the SCSI mid layer command + */ + +static void inia100_build_scb(struct orc_host * host, struct orc_scb * scb, struct scsi_cmnd * cmd) { /* Create corresponding SCB */ - struct scatterlist *pSrbSG; - ORC_SG *pSG; /* Pointer to SG list */ + struct scatterlist *sg; + struct orc_sgent *sgent; /* Pointer to SG list */ int i, count_sg; - ESCB *pEScb; + struct orc_extended_scb *escb; - pEScb = pSCB->SCB_EScb; - pEScb->SCB_Srb = SCpnt; - pSG = NULL; - - pSCB->SCB_Opcode = ORC_EXECSCSI; - pSCB->SCB_Flags = SCF_NO_DCHK; /* Clear done bit */ - pSCB->SCB_Target = SCpnt->device->id; - pSCB->SCB_Lun = SCpnt->device->lun; - pSCB->SCB_Reserved0 = 0; - pSCB->SCB_Reserved1 = 0; - pSCB->SCB_SGLen = 0; - - if ((pSCB->SCB_XferLen = (U32) SCpnt->request_bufflen)) { - pSG = (ORC_SG *) & pEScb->ESCB_SGList[0]; - if (SCpnt->use_sg) { - pSrbSG = (struct scatterlist *) SCpnt->request_buffer; - count_sg = pci_map_sg(pHCB->pdev, pSrbSG, SCpnt->use_sg, - SCpnt->sc_data_direction); - pSCB->SCB_SGLen = (U32) (count_sg * 8); - for (i = 0; i < count_sg; i++, pSG++, pSrbSG++) { - pSG->SG_Ptr = (U32) sg_dma_address(pSrbSG); - pSG->SG_Len = (U32) sg_dma_len(pSrbSG); - } - } else if (SCpnt->request_bufflen != 0) {/* Non SG */ - pSCB->SCB_SGLen = 0x8; - SCpnt->SCp.dma_handle = pci_map_single(pHCB->pdev, - SCpnt->request_buffer, - SCpnt->request_bufflen, - SCpnt->sc_data_direction); - pSG->SG_Ptr = (U32) SCpnt->SCp.dma_handle; - pSG->SG_Len = (U32) SCpnt->request_bufflen; + /* Links between the escb, scb and Linux scsi midlayer cmd */ + escb = scb->escb; + escb->srb = cmd; + sgent = NULL; + + /* Set up the SCB to do a SCSI command block */ + scb->opcode = ORC_EXECSCSI; + scb->flags = SCF_NO_DCHK; /* Clear done bit */ + scb->target = cmd->device->id; + scb->lun = cmd->device->lun; + scb->reserved0 = 0; + scb->reserved1 = 0; + scb->sg_len = 0; + + scb->xferlen = (u32) scsi_bufflen(cmd); + sgent = (struct orc_sgent *) & escb->sglist[0]; + + count_sg = scsi_dma_map(cmd); + BUG_ON(count_sg < 0); + + /* Build the scatter gather lists */ + if (count_sg) { + scb->sg_len = (u32) (count_sg * 8); + scsi_for_each_sg(cmd, sg, count_sg, i) { + sgent->base = (u32) sg_dma_address(sg); + sgent->length = (u32) sg_dma_len(sg); + sgent++; + } } else { - pSCB->SCB_SGLen = 0; - pSG->SG_Ptr = 0; - pSG->SG_Len = 0; - } - } - pSCB->SCB_SGPAddr = (U32) pSCB->SCB_SensePAddr; - pSCB->SCB_HaStat = 0; - pSCB->SCB_TaStat = 0; - pSCB->SCB_Link = 0xFF; - pSCB->SCB_SenseLen = SENSE_SIZE; - pSCB->SCB_CDBLen = SCpnt->cmd_len; - if (pSCB->SCB_CDBLen >= IMAX_CDB) { - printk("max cdb length= %x\b", SCpnt->cmd_len); - pSCB->SCB_CDBLen = IMAX_CDB; - } - pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW; - if (SCpnt->device->tagged_supported) { /* Tag Support */ - pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ + scb->sg_len = 0; + sgent->base = 0; + sgent->length = 0; + } + scb->sg_addr = (u32) scb->sense_addr; + scb->hastat = 0; + scb->tastat = 0; + scb->link = 0xFF; + scb->sense_len = SENSE_SIZE; + scb->cdb_len = cmd->cmd_len; + if (scb->cdb_len >= IMAX_CDB) { + printk("max cdb length= %x\b", cmd->cmd_len); + scb->cdb_len = IMAX_CDB; + } + scb->ident = cmd->device->lun | DISC_ALLOW; + if (cmd->device->tagged_supported) { /* Tag Support */ + scb->tag_msg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ } else { - pSCB->SCB_TagMsg = 0; /* No tag support */ + scb->tag_msg = 0; /* No tag support */ } - memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, pSCB->SCB_CDBLen); - return; + memcpy(&scb->cdb[0], &cmd->cmnd, scb->cdb_len); } -/***************************************************************************** - Function name : inia100_queue - Description : Queue a command and setup interrupts for a free bus. - Input : pHCB - Pointer to host adapter structure - Output : None. - Return : pSRB - Pointer to SCSI request block. -*****************************************************************************/ -static int inia100_queue(struct scsi_cmnd * SCpnt, void (*done) (struct scsi_cmnd *)) +/** + * inia100_queue - queue command with host + * @cmd: Command block + * @done: Completion function + * + * Called by the mid layer to queue a command. Process the command + * block, build the host specific scb structures and if there is room + * queue the command down to the controller + */ + +static int inia100_queue(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *)) { - register ORC_SCB *pSCB; - ORC_HCS *pHCB; /* Point to Host adapter control block */ + struct orc_scb *scb; + struct orc_host *host; /* Point to Host adapter control block */ - pHCB = (ORC_HCS *) SCpnt->device->host->hostdata; - SCpnt->scsi_done = done; + host = (struct orc_host *) cmd->device->host->hostdata; + cmd->scsi_done = done; /* Get free SCSI control block */ - if ((pSCB = orc_alloc_scb(pHCB)) == NULL) + if ((scb = orc_alloc_scb(host)) == NULL) return SCSI_MLQUEUE_HOST_BUSY; - inia100BuildSCB(pHCB, pSCB, SCpnt); - orc_exec_scb(pHCB, pSCB); /* Start execute SCB */ - - return (0); + inia100_build_scb(host, scb, cmd); + orc_exec_scb(host, scb); /* Start execute SCB */ + return 0; } /***************************************************************************** Function name : inia100_abort Description : Abort a queued command. (commands that are on the bus can't be aborted easily) - Input : pHCB - Pointer to host adapter structure + Input : host - Pointer to host adapter structure Output : None. Return : pSRB - Pointer to SCSI request block. *****************************************************************************/ -static int inia100_abort(struct scsi_cmnd * SCpnt) +static int inia100_abort(struct scsi_cmnd * cmd) { - ORC_HCS *hcsp; + struct orc_host *host; - hcsp = (ORC_HCS *) SCpnt->device->host->hostdata; - return orc_abort_srb(hcsp, SCpnt); + host = (struct orc_host *) cmd->device->host->hostdata; + return inia100_abort_cmd(host, cmd); } /***************************************************************************** Function name : inia100_reset Description : Reset registers, reset a hanging bus and kill active and disconnected commands for target w/o soft reset - Input : pHCB - Pointer to host adapter structure + Input : host - Pointer to host adapter structure Output : None. Return : pSRB - Pointer to SCSI request block. *****************************************************************************/ -static int inia100_bus_reset(struct scsi_cmnd * SCpnt) +static int inia100_bus_reset(struct scsi_cmnd * cmd) { /* I need Host Control Block Information */ - ORC_HCS *pHCB; - pHCB = (ORC_HCS *) SCpnt->device->host->hostdata; - return orc_reset_scsi_bus(pHCB); + struct orc_host *host; + host = (struct orc_host *) cmd->device->host->hostdata; + return orc_reset_scsi_bus(host); } /***************************************************************************** Function name : inia100_device_reset Description : Reset the device - Input : pHCB - Pointer to host adapter structure + Input : host - Pointer to host adapter structure Output : None. Return : pSRB - Pointer to SCSI request block. *****************************************************************************/ -static int inia100_device_reset(struct scsi_cmnd * SCpnt) +static int inia100_device_reset(struct scsi_cmnd * cmd) { /* I need Host Control Block Information */ - ORC_HCS *pHCB; - pHCB = (ORC_HCS *) SCpnt->device->host->hostdata; - return orc_device_reset(pHCB, SCpnt, scmd_id(SCpnt)); + struct orc_host *host; + host = (struct orc_host *) cmd->device->host->hostdata; + return orc_device_reset(host, cmd, scmd_id(cmd)); } -/***************************************************************************** - Function name : inia100SCBPost - Description : This is callback routine be called when orc finish one - SCSI command. - Input : pHCB - Pointer to host adapter control block. - pSCB - Pointer to SCSI control block. - Output : None. - Return : None. -*****************************************************************************/ -static void inia100SCBPost(BYTE * pHcb, BYTE * pScb) +/** + * inia100_scb_handler - interrupt callback + * @host: Host causing the interrupt + * @scb: SCB the controller returned as needing processing + * + * Perform completion processing on a control block. Do the conversions + * from host to SCSI midlayer error coding, save any sense data and + * the complete with the midlayer and recycle the scb. + */ + +static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb) { - struct scsi_cmnd *pSRB; /* Pointer to SCSI request block */ - ORC_HCS *pHCB; - ORC_SCB *pSCB; - ESCB *pEScb; - - pHCB = (ORC_HCS *) pHcb; - pSCB = (ORC_SCB *) pScb; - pEScb = pSCB->SCB_EScb; - if ((pSRB = (struct scsi_cmnd *) pEScb->SCB_Srb) == 0) { - printk("inia100SCBPost: SRB pointer is empty\n"); - orc_release_scb(pHCB, pSCB); /* Release SCB for current channel */ + struct scsi_cmnd *cmd; /* Pointer to SCSI request block */ + struct orc_extended_scb *escb; + + escb = scb->escb; + if ((cmd = (struct scsi_cmnd *) escb->srb) == NULL) { + printk(KERN_ERR "inia100_scb_handler: SRB pointer is empty\n"); + orc_release_scb(host, scb); /* Release SCB for current channel */ return; } - pEScb->SCB_Srb = NULL; + escb->srb = NULL; - switch (pSCB->SCB_HaStat) { + switch (scb->hastat) { case 0x0: case 0xa: /* Linked command complete without error and linked normally */ case 0xb: /* Linked command complete without error interrupt generated */ - pSCB->SCB_HaStat = 0; + scb->hastat = 0; break; case 0x11: /* Selection time out-The initiator selection or target reselection was not complete within the SCSI Time out period */ - pSCB->SCB_HaStat = DID_TIME_OUT; + scb->hastat = DID_TIME_OUT; break; case 0x14: /* Target bus phase sequence failure-An invalid bus phase or bus phase sequence was requested by the target. The host adapter will generate a SCSI Reset Condition, notifying the host with a SCRD interrupt */ - pSCB->SCB_HaStat = DID_RESET; + scb->hastat = DID_RESET; break; case 0x1a: /* SCB Aborted. 07/21/98 */ - pSCB->SCB_HaStat = DID_ABORT; + scb->hastat = DID_ABORT; break; case 0x12: /* Data overrun/underrun-The target attempted to transfer more data @@ -984,46 +1022,41 @@ case 0x16: /* Invalid CCB Operation Code-The first byte of the CCB was invalid. */ default: - printk("inia100: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat); - pSCB->SCB_HaStat = DID_ERROR; /* Couldn't find any better */ + printk(KERN_DEBUG "inia100: %x %x\n", scb->hastat, scb->tastat); + scb->hastat = DID_ERROR; /* Couldn't find any better */ break; } - if (pSCB->SCB_TaStat == 2) { /* Check condition */ - memcpy((unsigned char *) &pSRB->sense_buffer[0], - (unsigned char *) &pEScb->ESCB_SGList[0], SENSE_SIZE); - } - pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16); - - if (pSRB->use_sg) { - pci_unmap_sg(pHCB->pdev, - (struct scatterlist *)pSRB->request_buffer, - pSRB->use_sg, pSRB->sc_data_direction); - } else if (pSRB->request_bufflen != 0) { - pci_unmap_single(pHCB->pdev, pSRB->SCp.dma_handle, - pSRB->request_bufflen, - pSRB->sc_data_direction); - } - - pSRB->scsi_done(pSRB); /* Notify system DONE */ - - orc_release_scb(pHCB, pSCB); /* Release SCB for current channel */ + if (scb->tastat == 2) { /* Check condition */ + memcpy((unsigned char *) &cmd->sense_buffer[0], + (unsigned char *) &escb->sglist[0], SENSE_SIZE); + } + cmd->result = scb->tastat | (scb->hastat << 16); + scsi_dma_unmap(cmd); + cmd->scsi_done(cmd); /* Notify system DONE */ + orc_release_scb(host, scb); /* Release SCB for current channel */ } -/* - * Interrupt handler (main routine of the driver) +/** + * inia100_intr - interrupt handler + * @irqno: Interrupt value + * @devid: Host adapter + * + * Entry point for IRQ handling. All the real work is performed + * by orc_interrupt. */ static irqreturn_t inia100_intr(int irqno, void *devid) { - struct Scsi_Host *host = (struct Scsi_Host *)devid; - ORC_HCS *pHcb = (ORC_HCS *)host->hostdata; + struct Scsi_Host *shost = (struct Scsi_Host *)devid; + struct orc_host *host = (struct orc_host *)shost->hostdata; unsigned long flags; + irqreturn_t res; - spin_lock_irqsave(host->host_lock, flags); - orc_interrupt(pHcb); - spin_unlock_irqrestore(host->host_lock, flags); + spin_lock_irqsave(shost->host_lock, flags); + res = orc_interrupt(host); + spin_unlock_irqrestore(shost->host_lock, flags); - return IRQ_HANDLED; + return res; } static struct scsi_host_template inia100_template = { @@ -1044,12 +1077,12 @@ const struct pci_device_id *id) { struct Scsi_Host *shost; - ORC_HCS *pHCB; + struct orc_host *host; unsigned long port, bios; int error = -ENODEV; u32 sz; - unsigned long dBiosAdr; - char *pbBiosAdr; + unsigned long biosaddr; + char *bios_phys; if (pci_enable_device(pdev)) goto out; @@ -1068,55 +1101,55 @@ } /* <02> read from base address + 0x50 offset to get the bios value. */ - bios = ORC_RDWORD(port, 0x50); + bios = inw(port + 0x50); - shost = scsi_host_alloc(&inia100_template, sizeof(ORC_HCS)); + shost = scsi_host_alloc(&inia100_template, sizeof(struct orc_host)); if (!shost) goto out_release_region; - pHCB = (ORC_HCS *)shost->hostdata; - pHCB->pdev = pdev; - pHCB->HCS_Base = port; - pHCB->HCS_BIOS = bios; - spin_lock_init(&pHCB->BitAllocFlagLock); + host = (struct orc_host *)shost->hostdata; + host->pdev = pdev; + host->base = port; + host->BIOScfg = bios; + spin_lock_init(&host->allocation_lock); /* Get total memory needed for SCB */ - sz = ORC_MAXQUEUE * sizeof(ORC_SCB); - pHCB->HCS_virScbArray = pci_alloc_consistent(pdev, sz, - &pHCB->HCS_physScbArray); - if (!pHCB->HCS_virScbArray) { + sz = ORC_MAXQUEUE * sizeof(struct orc_scb); + host->scb_virt = pci_alloc_consistent(pdev, sz, + &host->scb_phys); + if (!host->scb_virt) { printk("inia100: SCB memory allocation error\n"); goto out_host_put; } - memset(pHCB->HCS_virScbArray, 0, sz); + memset(host->scb_virt, 0, sz); /* Get total memory needed for ESCB */ - sz = ORC_MAXQUEUE * sizeof(ESCB); - pHCB->HCS_virEscbArray = pci_alloc_consistent(pdev, sz, - &pHCB->HCS_physEscbArray); - if (!pHCB->HCS_virEscbArray) { + sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb); + host->escb_virt = pci_alloc_consistent(pdev, sz, + &host->escb_phys); + if (!host->escb_virt) { printk("inia100: ESCB memory allocation error\n"); goto out_free_scb_array; } - memset(pHCB->HCS_virEscbArray, 0, sz); + memset(host->escb_virt, 0, sz); - dBiosAdr = pHCB->HCS_BIOS; - dBiosAdr = (dBiosAdr << 4); - pbBiosAdr = phys_to_virt(dBiosAdr); - if (init_orchid(pHCB)) { /* Initialize orchid chip */ + biosaddr = host->BIOScfg; + biosaddr = (biosaddr << 4); + bios_phys = phys_to_virt(biosaddr); + if (init_orchid(host)) { /* Initialize orchid chip */ printk("inia100: initial orchid fail!!\n"); goto out_free_escb_array; } - shost->io_port = pHCB->HCS_Base; + shost->io_port = host->base; shost->n_io_port = 0xff; shost->can_queue = ORC_MAXQUEUE; shost->unique_id = shost->io_port; - shost->max_id = pHCB->HCS_MaxTar; + shost->max_id = host->max_targets; shost->max_lun = 16; - shost->irq = pHCB->HCS_Intr = pdev->irq; - shost->this_id = pHCB->HCS_SCSI_ID; /* Assign HCS index */ + shost->irq = pdev->irq; + shost->this_id = host->scsi_id; /* Assign HCS index */ shost->sg_tablesize = TOTAL_SG_ENTRY; /* Initial orc chip */ @@ -1137,36 +1170,36 @@ scsi_scan_host(shost); return 0; - out_free_irq: +out_free_irq: free_irq(shost->irq, shost); - out_free_escb_array: - pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB), - pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray); - out_free_scb_array: - pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB), - pHCB->HCS_virScbArray, pHCB->HCS_physScbArray); - out_host_put: +out_free_escb_array: + pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb), + host->escb_virt, host->escb_phys); +out_free_scb_array: + pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb), + host->scb_virt, host->scb_phys); +out_host_put: scsi_host_put(shost); - out_release_region: +out_release_region: release_region(port, 256); - out_disable_device: +out_disable_device: pci_disable_device(pdev); - out: +out: return error; } static void __devexit inia100_remove_one(struct pci_dev *pdev) { struct Scsi_Host *shost = pci_get_drvdata(pdev); - ORC_HCS *pHCB = (ORC_HCS *)shost->hostdata; + struct orc_host *host = (struct orc_host *)shost->hostdata; scsi_remove_host(shost); free_irq(shost->irq, shost); - pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB), - pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray); - pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB), - pHCB->HCS_virScbArray, pHCB->HCS_physScbArray); + pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb), + host->escb_virt, host->escb_phys); + pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb), + host->scb_virt, host->scb_phys); release_region(shost->io_port, 256); scsi_host_put(shost); diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.h linux-2.6.22-591/drivers/scsi/a100u2w.h --- linux-2.6.22-570/drivers/scsi/a100u2w.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/a100u2w.h 2007-12-21 15:36:12.000000000 -0500 @@ -18,27 +18,6 @@ * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * - * -------------------------------------------------------------------------- - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Where this Software is combined with software released under the terms of - * the GNU General Public License ("GPL") and the terms of the GPL would require the - * combined work to also be released under the terms of the GPL, the terms - * and conditions of this License will apply in addition to those of the - * GPL with the exception of any terms or conditions of this License that - * conflict with, or are expressly prohibited by, the GPL. - * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -50,30 +29,19 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - */ - -/* + * * Revision History: * 06/18/98 HL, Initial production Version 1.02 * 12/19/98 bv, Use spinlocks for 2.1.95 and up * 06/25/02 Doug Ledford * - This and the i60uscsi.h file are almost identical, * merged them into a single header used by both .c files. + * 14/06/07 Alan Cox + * - Grand cleanup and Linuxisation */ #define inia100_REVID "Initio INI-A100U2W SCSI device driver; Revision: 1.02d" -#define ULONG unsigned long -#define USHORT unsigned short -#define UCHAR unsigned char -#define BYTE unsigned char -#define WORD unsigned short -#define DWORD unsigned long -#define UBYTE unsigned char -#define UWORD unsigned short -#define UDWORD unsigned long -#define U32 u32 - #if 1 #define ORC_MAXQUEUE 245 #define ORC_MAXTAGS 64 @@ -90,10 +58,10 @@ /************************************************************************/ /* Scatter-Gather Element Structure */ /************************************************************************/ -typedef struct ORC_SG_Struc { - U32 SG_Ptr; /* Data Pointer */ - U32 SG_Len; /* Data Length */ -} ORC_SG; +struct orc_sgent { + u32 base; /* Data Pointer */ + u32 length; /* Data Length */ +}; /* SCSI related definition */ #define DISC_NOT_ALLOW 0x80 /* Disconnect is not allowed */ @@ -165,42 +133,45 @@ #define ORC_PRGMCTR1 0xE3 /* RISC program counter */ #define ORC_RISCRAM 0xEC /* RISC RAM data port 4 bytes */ -typedef struct orc_extended_scb { /* Extended SCB */ - ORC_SG ESCB_SGList[TOTAL_SG_ENTRY]; /*0 Start of SG list */ - struct scsi_cmnd *SCB_Srb; /*50 SRB Pointer */ -} ESCB; +struct orc_extended_scb { /* Extended SCB */ + struct orc_sgent sglist[TOTAL_SG_ENTRY]; /*0 Start of SG list */ + struct scsi_cmnd *srb; /*50 SRB Pointer */ +}; /*********************************************************************** SCSI Control Block + + 0x40 bytes long, the last 8 are user bytes ************************************************************************/ -typedef struct orc_scb { /* Scsi_Ctrl_Blk */ - UBYTE SCB_Opcode; /*00 SCB command code&residual */ - UBYTE SCB_Flags; /*01 SCB Flags */ - UBYTE SCB_Target; /*02 Target Id */ - UBYTE SCB_Lun; /*03 Lun */ - U32 SCB_Reserved0; /*04 Reserved for ORCHID must 0 */ - U32 SCB_XferLen; /*08 Data Transfer Length */ - U32 SCB_Reserved1; /*0C Reserved for ORCHID must 0 */ - U32 SCB_SGLen; /*10 SG list # * 8 */ - U32 SCB_SGPAddr; /*14 SG List Buf physical Addr */ - U32 SCB_SGPAddrHigh; /*18 SG Buffer high physical Addr */ - UBYTE SCB_HaStat; /*1C Host Status */ - UBYTE SCB_TaStat; /*1D Target Status */ - UBYTE SCB_Status; /*1E SCB status */ - UBYTE SCB_Link; /*1F Link pointer, default 0xFF */ - UBYTE SCB_SenseLen; /*20 Sense Allocation Length */ - UBYTE SCB_CDBLen; /*21 CDB Length */ - UBYTE SCB_Ident; /*22 Identify */ - UBYTE SCB_TagMsg; /*23 Tag Message */ - UBYTE SCB_CDB[IMAX_CDB]; /*24 SCSI CDBs */ - UBYTE SCB_ScbIdx; /*3C Index for this ORCSCB */ - U32 SCB_SensePAddr; /*34 Sense Buffer physical Addr */ - - ESCB *SCB_EScb; /*38 Extended SCB Pointer */ -#ifndef ALPHA - UBYTE SCB_Reserved2[4]; /*3E Reserved for Driver use */ +struct orc_scb { /* Scsi_Ctrl_Blk */ + u8 opcode; /*00 SCB command code&residual */ + u8 flags; /*01 SCB Flags */ + u8 target; /*02 Target Id */ + u8 lun; /*03 Lun */ + u32 reserved0; /*04 Reserved for ORCHID must 0 */ + u32 xferlen; /*08 Data Transfer Length */ + u32 reserved1; /*0C Reserved for ORCHID must 0 */ + u32 sg_len; /*10 SG list # * 8 */ + u32 sg_addr; /*14 SG List Buf physical Addr */ + u32 sg_addrhigh; /*18 SG Buffer high physical Addr */ + u8 hastat; /*1C Host Status */ + u8 tastat; /*1D Target Status */ + u8 status; /*1E SCB status */ + u8 link; /*1F Link pointer, default 0xFF */ + u8 sense_len; /*20 Sense Allocation Length */ + u8 cdb_len; /*21 CDB Length */ + u8 ident; /*22 Identify */ + u8 tag_msg; /*23 Tag Message */ + u8 cdb[IMAX_CDB]; /*24 SCSI CDBs */ + u8 scbidx; /*3C Index for this ORCSCB */ + u32 sense_addr; /*34 Sense Buffer physical Addr */ + + struct orc_extended_scb *escb; /*38 Extended SCB Pointer */ + /* 64bit pointer or 32bit pointer + reserved ? */ +#ifndef CONFIG_64BIT + u8 reserved2[4]; /*3E Reserved for Driver use */ #endif -} ORC_SCB; +}; /* Opcodes of ORCSCB_Opcode */ #define ORC_EXECSCSI 0x00 /* SCSI initiator command with residual */ @@ -239,13 +210,13 @@ Target Device Control Structure **********************************************************************/ -typedef struct ORC_Tar_Ctrl_Struc { - UBYTE TCS_DrvDASD; /* 6 */ - UBYTE TCS_DrvSCSI; /* 7 */ - UBYTE TCS_DrvHead; /* 8 */ - UWORD TCS_DrvFlags; /* 4 */ - UBYTE TCS_DrvSector; /* 7 */ -} ORC_TCS; +struct orc_target { + u8 TCS_DrvDASD; /* 6 */ + u8 TCS_DrvSCSI; /* 7 */ + u8 TCS_DrvHead; /* 8 */ + u16 TCS_DrvFlags; /* 4 */ + u8 TCS_DrvSector; /* 7 */ +}; /* Bit Definition for TCF_DrvFlags */ #define TCS_DF_NODASD_SUPT 0x20 /* Suppress OS/2 DASD Mgr support */ @@ -255,32 +226,23 @@ /*********************************************************************** Host Adapter Control Structure ************************************************************************/ -typedef struct ORC_Ha_Ctrl_Struc { - USHORT HCS_Base; /* 00 */ - UBYTE HCS_Index; /* 02 */ - UBYTE HCS_Intr; /* 04 */ - UBYTE HCS_SCSI_ID; /* 06 H/A SCSI ID */ - UBYTE HCS_BIOS; /* 07 BIOS configuration */ - - UBYTE HCS_Flags; /* 0B */ - UBYTE HCS_HAConfig1; /* 1B SCSI0MAXTags */ - UBYTE HCS_MaxTar; /* 1B SCSI0MAXTags */ - - USHORT HCS_Units; /* Number of units this adapter */ - USHORT HCS_AFlags; /* Adapter info. defined flags */ - ULONG HCS_Timeout; /* Adapter timeout value */ - ORC_SCB *HCS_virScbArray; /* 28 Virtual Pointer to SCB array */ - dma_addr_t HCS_physScbArray; /* Scb Physical address */ - ESCB *HCS_virEscbArray; /* Virtual pointer to ESCB Scatter list */ - dma_addr_t HCS_physEscbArray; /* scatter list Physical address */ - UBYTE TargetFlag[16]; /* 30 target configuration, TCF_EN_TAG */ - UBYTE MaximumTags[16]; /* 40 ORC_MAX_SCBS */ - UBYTE ActiveTags[16][16]; /* 50 */ - ORC_TCS HCS_Tcs[16]; /* 28 */ - U32 BitAllocFlag[MAX_CHANNELS][8]; /* Max STB is 256, So 256/32 */ - spinlock_t BitAllocFlagLock; +struct orc_host { + unsigned long base; /* Base address */ + u8 index; /* Index (Channel)*/ + u8 scsi_id; /* H/A SCSI ID */ + u8 BIOScfg; /*BIOS configuration */ + u8 flags; + u8 max_targets; /* SCSI0MAXTags */ + struct orc_scb *scb_virt; /* Virtual Pointer to SCB array */ + dma_addr_t scb_phys; /* Scb Physical address */ + struct orc_extended_scb *escb_virt; /* Virtual pointer to ESCB Scatter list */ + dma_addr_t escb_phys; /* scatter list Physical address */ + u8 target_flag[16]; /* target configuration, TCF_EN_TAG */ + u8 max_tags[16]; /* ORC_MAX_SCBS */ + u32 allocation_map[MAX_CHANNELS][8]; /* Max STB is 256, So 256/32 */ + spinlock_t allocation_lock; struct pci_dev *pdev; -} ORC_HCS; +}; /* Bit Definition for HCS_Flags */ @@ -301,79 +263,79 @@ #define HCS_AF_DISABLE_RESET 0x10 /* Adapter disable reset */ #define HCS_AF_DISABLE_ADPT 0x80 /* Adapter disable */ -typedef struct _NVRAM { +struct orc_nvram { /*----------header ---------------*/ - UCHAR SubVendorID0; /* 00 - Sub Vendor ID */ - UCHAR SubVendorID1; /* 00 - Sub Vendor ID */ - UCHAR SubSysID0; /* 02 - Sub System ID */ - UCHAR SubSysID1; /* 02 - Sub System ID */ - UCHAR SubClass; /* 04 - Sub Class */ - UCHAR VendorID0; /* 05 - Vendor ID */ - UCHAR VendorID1; /* 05 - Vendor ID */ - UCHAR DeviceID0; /* 07 - Device ID */ - UCHAR DeviceID1; /* 07 - Device ID */ - UCHAR Reserved0[2]; /* 09 - Reserved */ - UCHAR Revision; /* 0B - Revision of data structure */ + u8 SubVendorID0; /* 00 - Sub Vendor ID */ + u8 SubVendorID1; /* 00 - Sub Vendor ID */ + u8 SubSysID0; /* 02 - Sub System ID */ + u8 SubSysID1; /* 02 - Sub System ID */ + u8 SubClass; /* 04 - Sub Class */ + u8 VendorID0; /* 05 - Vendor ID */ + u8 VendorID1; /* 05 - Vendor ID */ + u8 DeviceID0; /* 07 - Device ID */ + u8 DeviceID1; /* 07 - Device ID */ + u8 Reserved0[2]; /* 09 - Reserved */ + u8 revision; /* 0B - revision of data structure */ /* ----Host Adapter Structure ---- */ - UCHAR NumOfCh; /* 0C - Number of SCSI channel */ - UCHAR BIOSConfig1; /* 0D - BIOS configuration 1 */ - UCHAR BIOSConfig2; /* 0E - BIOS boot channel&target ID */ - UCHAR BIOSConfig3; /* 0F - BIOS configuration 3 */ + u8 NumOfCh; /* 0C - Number of SCSI channel */ + u8 BIOSConfig1; /* 0D - BIOS configuration 1 */ + u8 BIOSConfig2; /* 0E - BIOS boot channel&target ID */ + u8 BIOSConfig3; /* 0F - BIOS configuration 3 */ /* ----SCSI channel Structure ---- */ /* from "CTRL-I SCSI Host Adapter SetUp menu " */ - UCHAR SCSI0Id; /* 10 - Channel 0 SCSI ID */ - UCHAR SCSI0Config; /* 11 - Channel 0 SCSI configuration */ - UCHAR SCSI0MaxTags; /* 12 - Channel 0 Maximum tags */ - UCHAR SCSI0ResetTime; /* 13 - Channel 0 Reset recovering time */ - UCHAR ReservedforChannel0[2]; /* 14 - Reserved */ + u8 scsi_id; /* 10 - Channel 0 SCSI ID */ + u8 SCSI0Config; /* 11 - Channel 0 SCSI configuration */ + u8 SCSI0MaxTags; /* 12 - Channel 0 Maximum tags */ + u8 SCSI0ResetTime; /* 13 - Channel 0 Reset recovering time */ + u8 ReservedforChannel0[2]; /* 14 - Reserved */ /* ----SCSI target Structure ---- */ /* from "CTRL-I SCSI device SetUp menu " */ - UCHAR Target00Config; /* 16 - Channel 0 Target 0 config */ - UCHAR Target01Config; /* 17 - Channel 0 Target 1 config */ - UCHAR Target02Config; /* 18 - Channel 0 Target 2 config */ - UCHAR Target03Config; /* 19 - Channel 0 Target 3 config */ - UCHAR Target04Config; /* 1A - Channel 0 Target 4 config */ - UCHAR Target05Config; /* 1B - Channel 0 Target 5 config */ - UCHAR Target06Config; /* 1C - Channel 0 Target 6 config */ - UCHAR Target07Config; /* 1D - Channel 0 Target 7 config */ - UCHAR Target08Config; /* 1E - Channel 0 Target 8 config */ - UCHAR Target09Config; /* 1F - Channel 0 Target 9 config */ - UCHAR Target0AConfig; /* 20 - Channel 0 Target A config */ - UCHAR Target0BConfig; /* 21 - Channel 0 Target B config */ - UCHAR Target0CConfig; /* 22 - Channel 0 Target C config */ - UCHAR Target0DConfig; /* 23 - Channel 0 Target D config */ - UCHAR Target0EConfig; /* 24 - Channel 0 Target E config */ - UCHAR Target0FConfig; /* 25 - Channel 0 Target F config */ - - UCHAR SCSI1Id; /* 26 - Channel 1 SCSI ID */ - UCHAR SCSI1Config; /* 27 - Channel 1 SCSI configuration */ - UCHAR SCSI1MaxTags; /* 28 - Channel 1 Maximum tags */ - UCHAR SCSI1ResetTime; /* 29 - Channel 1 Reset recovering time */ - UCHAR ReservedforChannel1[2]; /* 2A - Reserved */ + u8 Target00Config; /* 16 - Channel 0 Target 0 config */ + u8 Target01Config; /* 17 - Channel 0 Target 1 config */ + u8 Target02Config; /* 18 - Channel 0 Target 2 config */ + u8 Target03Config; /* 19 - Channel 0 Target 3 config */ + u8 Target04Config; /* 1A - Channel 0 Target 4 config */ + u8 Target05Config; /* 1B - Channel 0 Target 5 config */ + u8 Target06Config; /* 1C - Channel 0 Target 6 config */ + u8 Target07Config; /* 1D - Channel 0 Target 7 config */ + u8 Target08Config; /* 1E - Channel 0 Target 8 config */ + u8 Target09Config; /* 1F - Channel 0 Target 9 config */ + u8 Target0AConfig; /* 20 - Channel 0 Target A config */ + u8 Target0BConfig; /* 21 - Channel 0 Target B config */ + u8 Target0CConfig; /* 22 - Channel 0 Target C config */ + u8 Target0DConfig; /* 23 - Channel 0 Target D config */ + u8 Target0EConfig; /* 24 - Channel 0 Target E config */ + u8 Target0FConfig; /* 25 - Channel 0 Target F config */ + + u8 SCSI1Id; /* 26 - Channel 1 SCSI ID */ + u8 SCSI1Config; /* 27 - Channel 1 SCSI configuration */ + u8 SCSI1MaxTags; /* 28 - Channel 1 Maximum tags */ + u8 SCSI1ResetTime; /* 29 - Channel 1 Reset recovering time */ + u8 ReservedforChannel1[2]; /* 2A - Reserved */ /* ----SCSI target Structure ---- */ /* from "CTRL-I SCSI device SetUp menu " */ - UCHAR Target10Config; /* 2C - Channel 1 Target 0 config */ - UCHAR Target11Config; /* 2D - Channel 1 Target 1 config */ - UCHAR Target12Config; /* 2E - Channel 1 Target 2 config */ - UCHAR Target13Config; /* 2F - Channel 1 Target 3 config */ - UCHAR Target14Config; /* 30 - Channel 1 Target 4 config */ - UCHAR Target15Config; /* 31 - Channel 1 Target 5 config */ - UCHAR Target16Config; /* 32 - Channel 1 Target 6 config */ - UCHAR Target17Config; /* 33 - Channel 1 Target 7 config */ - UCHAR Target18Config; /* 34 - Channel 1 Target 8 config */ - UCHAR Target19Config; /* 35 - Channel 1 Target 9 config */ - UCHAR Target1AConfig; /* 36 - Channel 1 Target A config */ - UCHAR Target1BConfig; /* 37 - Channel 1 Target B config */ - UCHAR Target1CConfig; /* 38 - Channel 1 Target C config */ - UCHAR Target1DConfig; /* 39 - Channel 1 Target D config */ - UCHAR Target1EConfig; /* 3A - Channel 1 Target E config */ - UCHAR Target1FConfig; /* 3B - Channel 1 Target F config */ - UCHAR reserved[3]; /* 3C - Reserved */ + u8 Target10Config; /* 2C - Channel 1 Target 0 config */ + u8 Target11Config; /* 2D - Channel 1 Target 1 config */ + u8 Target12Config; /* 2E - Channel 1 Target 2 config */ + u8 Target13Config; /* 2F - Channel 1 Target 3 config */ + u8 Target14Config; /* 30 - Channel 1 Target 4 config */ + u8 Target15Config; /* 31 - Channel 1 Target 5 config */ + u8 Target16Config; /* 32 - Channel 1 Target 6 config */ + u8 Target17Config; /* 33 - Channel 1 Target 7 config */ + u8 Target18Config; /* 34 - Channel 1 Target 8 config */ + u8 Target19Config; /* 35 - Channel 1 Target 9 config */ + u8 Target1AConfig; /* 36 - Channel 1 Target A config */ + u8 Target1BConfig; /* 37 - Channel 1 Target B config */ + u8 Target1CConfig; /* 38 - Channel 1 Target C config */ + u8 Target1DConfig; /* 39 - Channel 1 Target D config */ + u8 Target1EConfig; /* 3A - Channel 1 Target E config */ + u8 Target1FConfig; /* 3B - Channel 1 Target F config */ + u8 reserved[3]; /* 3C - Reserved */ /* ---------- CheckSum ---------- */ - UCHAR CheckSum; /* 3F - Checksum of NVRam */ -} NVRAM, *PNVRAM; + u8 CheckSum; /* 3F - Checksum of NVRam */ +}; /* Bios Configuration for nvram->BIOSConfig1 */ #define NBC_BIOSENABLE 0x01 /* BIOS enable */ @@ -407,10 +369,3 @@ #define NCC_RESET_TIME 0x0A /* SCSI RESET recovering time */ #define NTC_DEFAULT (NTC_1GIGA | NTC_NO_WIDESYNC | NTC_DISC_ENABLE) -#define ORC_RD(x,y) (UCHAR)(inb( (int)((ULONG)((ULONG)x+(UCHAR)y)) )) -#define ORC_RDWORD(x,y) (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) )) -#define ORC_RDLONG(x,y) (long)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) )) - -#define ORC_WR( adr,data) outb( (UCHAR)(data), (int)(adr)) -#define ORC_WRSHORT(adr,data) outw( (UWORD)(data), (int)(adr)) -#define ORC_WRLONG( adr,data) outl( (ULONG)(data), (int)(adr)) diff -Nurb linux-2.6.22-570/drivers/scsi/a4000t.c linux-2.6.22-591/drivers/scsi/a4000t.c --- linux-2.6.22-570/drivers/scsi/a4000t.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/a4000t.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,143 @@ +/* + * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux. + * Amiga Technologies A4000T SCSI controller. + * + * Written 1997 by Alan Hourihane + * plus modifications of the 53c7xx.c driver to support the Amiga. + * + * Rewritten to use 53c700.c by Kars de Jong + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "53c700.h" + +MODULE_AUTHOR("Alan Hourihane / Kars de Jong "); +MODULE_DESCRIPTION("Amiga A4000T NCR53C710 driver"); +MODULE_LICENSE("GPL"); + + +static struct scsi_host_template a4000t_scsi_driver_template = { + .name = "A4000T builtin SCSI", + .proc_name = "A4000t", + .this_id = 7, + .module = THIS_MODULE, +}; + +static struct platform_device *a4000t_scsi_device; + +#define A4000T_SCSI_ADDR 0xdd0040 + +static int __devinit a4000t_probe(struct device *dev) +{ + struct Scsi_Host * host = NULL; + struct NCR_700_Host_Parameters *hostdata; + + if (!(MACH_IS_AMIGA && AMIGAHW_PRESENT(A4000_SCSI))) + goto out; + + if (!request_mem_region(A4000T_SCSI_ADDR, 0x1000, + "A4000T builtin SCSI")) + goto out; + + hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); + if (hostdata == NULL) { + printk(KERN_ERR "a4000t-scsi: Failed to allocate host data\n"); + goto out_release; + } + memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); + + /* Fill in the required pieces of hostdata */ + hostdata->base = (void __iomem *)ZTWO_VADDR(A4000T_SCSI_ADDR); + hostdata->clock = 50; + hostdata->chip710 = 1; + hostdata->dmode_extra = DMODE_FC2; + hostdata->dcntl_extra = EA_710; + + /* and register the chip */ + host = NCR_700_detect(&a4000t_scsi_driver_template, hostdata, dev); + if (!host) { + printk(KERN_ERR "a4000t-scsi: No host detected; " + "board configuration problem?\n"); + goto out_free; + } + + host->this_id = 7; + host->base = A4000T_SCSI_ADDR; + host->irq = IRQ_AMIGA_PORTS; + + if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "a4000t-scsi", + host)) { + printk(KERN_ERR "a4000t-scsi: request_irq failed\n"); + goto out_put_host; + } + + scsi_scan_host(host); + + return 0; + + out_put_host: + scsi_host_put(host); + out_free: + kfree(hostdata); + out_release: + release_mem_region(A4000T_SCSI_ADDR, 0x1000); + out: + return -ENODEV; +} + +static __devexit int a4000t_device_remove(struct device *dev) +{ + struct Scsi_Host *host = dev_to_shost(dev); + struct NCR_700_Host_Parameters *hostdata = shost_priv(host); + + scsi_remove_host(host); + + NCR_700_release(host); + kfree(hostdata); + free_irq(host->irq, host); + release_mem_region(A4000T_SCSI_ADDR, 0x1000); + + return 0; +} + +static struct device_driver a4000t_scsi_driver = { + .name = "a4000t-scsi", + .bus = &platform_bus_type, + .probe = a4000t_probe, + .remove = __devexit_p(a4000t_device_remove), +}; + +static int __init a4000t_scsi_init(void) +{ + int err; + + err = driver_register(&a4000t_scsi_driver); + if (err) + return err; + + a4000t_scsi_device = platform_device_register_simple("a4000t-scsi", + -1, NULL, 0); + if (IS_ERR(a4000t_scsi_device)) { + driver_unregister(&a4000t_scsi_driver); + return PTR_ERR(a4000t_scsi_device); + } + + return err; +} + +static void __exit a4000t_scsi_exit(void) +{ + platform_device_unregister(a4000t_scsi_device); + driver_unregister(&a4000t_scsi_driver); +} + +module_init(a4000t_scsi_init); +module_exit(a4000t_scsi_exit); diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aachba.c linux-2.6.22-591/drivers/scsi/aacraid/aachba.c --- linux-2.6.22-570/drivers/scsi/aacraid/aachba.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aacraid/aachba.c 2007-12-21 15:36:12.000000000 -0500 @@ -169,6 +169,18 @@ module_param(acbsize, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware."); +int update_interval = 30 * 60; +module_param(update_interval, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter."); + +int check_interval = 24 * 60 * 60; +module_param(check_interval, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks."); + +int check_reset = 1; +module_param(check_reset, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter."); + int expose_physicals = -1; module_param(expose_physicals, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on"); @@ -312,11 +324,10 @@ if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS) maximum_num_containers = MAXIMUM_NUM_CONTAINERS; - fsa_dev_ptr = kmalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers, + fsa_dev_ptr = kzalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers, GFP_KERNEL); if (!fsa_dev_ptr) return -ENOMEM; - memset(fsa_dev_ptr, 0, sizeof(*fsa_dev_ptr) * maximum_num_containers); dev->fsa_dev = fsa_dev_ptr; dev->maximum_num_containers = maximum_num_containers; @@ -344,20 +355,15 @@ { void *buf; int transfer_len; - struct scatterlist *sg = scsicmd->request_buffer; + struct scatterlist *sg = scsi_sglist(scsicmd); - if (scsicmd->use_sg) { buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; transfer_len = min(sg->length, len + offset); - } else { - buf = scsicmd->request_buffer; - transfer_len = min(scsicmd->request_bufflen, len + offset); - } + transfer_len -= offset; if (buf && transfer_len > 0) memcpy(buf + offset, data, transfer_len); - if (scsicmd->use_sg) kunmap_atomic(buf - sg->offset, KM_IRQ0); } @@ -451,7 +457,7 @@ { struct fsa_dev_info *fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev; - if (fsa_dev_ptr[scmd_id(scsicmd)].valid) + if ((fsa_dev_ptr[scmd_id(scsicmd)].valid & 1)) return aac_scsi_cmd(scsicmd); scsicmd->result = DID_NO_CONNECT << 16; @@ -459,18 +465,18 @@ return 0; } -static int _aac_probe_container2(void * context, struct fib * fibptr) +static void _aac_probe_container2(void * context, struct fib * fibptr) { struct fsa_dev_info *fsa_dev_ptr; int (*callback)(struct scsi_cmnd *); struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context; - if (!aac_valid_context(scsicmd, fibptr)) - return 0; - fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev; + if (!aac_valid_context(scsicmd, fibptr)) + return; scsicmd->SCp.Status = 0; + fsa_dev_ptr = fibptr->dev->fsa_dev; if (fsa_dev_ptr) { struct aac_mount * dresp = (struct aac_mount *) fib_data(fibptr); fsa_dev_ptr += scmd_id(scsicmd); @@ -493,10 +499,11 @@ aac_fib_free(fibptr); callback = (int (*)(struct scsi_cmnd *))(scsicmd->SCp.ptr); scsicmd->SCp.ptr = NULL; - return (*callback)(scsicmd); + (*callback)(scsicmd); + return; } -static int _aac_probe_container1(void * context, struct fib * fibptr) +static void _aac_probe_container1(void * context, struct fib * fibptr) { struct scsi_cmnd * scsicmd; struct aac_mount * dresp; @@ -506,13 +513,14 @@ dresp = (struct aac_mount *) fib_data(fibptr); dresp->mnt[0].capacityhigh = 0; if ((le32_to_cpu(dresp->status) != ST_OK) || - (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) - return _aac_probe_container2(context, fibptr); + (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) { + _aac_probe_container2(context, fibptr); + return; + } scsicmd = (struct scsi_cmnd *) context; - scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; if (!aac_valid_context(scsicmd, fibptr)) - return 0; + return; aac_fib_init(fibptr); @@ -527,21 +535,18 @@ sizeof(struct aac_query_mount), FsaNormal, 0, 1, - (fib_callback) _aac_probe_container2, + _aac_probe_container2, (void *) scsicmd); /* * Check that the command queued to the controller */ - if (status == -EINPROGRESS) { + if (status == -EINPROGRESS) scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; - return 0; - } - if (status < 0) { + else if (status < 0) { /* Inherit results from VM_NameServe, if any */ dresp->status = cpu_to_le32(ST_OK); - return _aac_probe_container2(context, fibptr); + _aac_probe_container2(context, fibptr); } - return 0; } static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(struct scsi_cmnd *)) @@ -566,7 +571,7 @@ sizeof(struct aac_query_mount), FsaNormal, 0, 1, - (fib_callback) _aac_probe_container1, + _aac_probe_container1, (void *) scsicmd); /* * Check that the command queued to the controller @@ -620,7 +625,7 @@ return -ENOMEM; } scsicmd->list.next = NULL; - scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))_aac_probe_container1; + scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))aac_probe_container_callback1; scsicmd->device = scsidev; scsidev->sdev_state = 0; @@ -825,7 +830,7 @@ readcmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32)); readcmd->count = cpu_to_le32(count<<9); readcmd->cid = cpu_to_le16(scmd_id(cmd)); - readcmd->flags = cpu_to_le16(1); + readcmd->flags = cpu_to_le16(IO_TYPE_READ); readcmd->bpTotal = 0; readcmd->bpComplete = 0; @@ -904,7 +909,7 @@ (void *) cmd); } -static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count) +static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua) { u16 fibsize; struct aac_raw_io *writecmd; @@ -914,7 +919,9 @@ writecmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32)); writecmd->count = cpu_to_le32(count<<9); writecmd->cid = cpu_to_le16(scmd_id(cmd)); - writecmd->flags = 0; + writecmd->flags = fua ? + cpu_to_le16(IO_TYPE_WRITE|IO_SUREWRITE) : + cpu_to_le16(IO_TYPE_WRITE); writecmd->bpTotal = 0; writecmd->bpComplete = 0; @@ -933,7 +940,7 @@ (void *) cmd); } -static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count) +static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua) { u16 fibsize; struct aac_write64 *writecmd; @@ -964,7 +971,7 @@ (void *) cmd); } -static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count) +static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua) { u16 fibsize; struct aac_write *writecmd; @@ -1041,7 +1048,7 @@ struct aac_srb * srbcmd = aac_scsi_common(fib, cmd); aac_build_sg64(cmd, (struct sgmap64*) &srbcmd->sg); - srbcmd->count = cpu_to_le32(cmd->request_bufflen); + srbcmd->count = cpu_to_le32(scsi_bufflen(cmd)); memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb)); memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len); @@ -1069,7 +1076,7 @@ struct aac_srb * srbcmd = aac_scsi_common(fib, cmd); aac_build_sg(cmd, (struct sgmap*)&srbcmd->sg); - srbcmd->count = cpu_to_le32(cmd->request_bufflen); + srbcmd->count = cpu_to_le32(scsi_bufflen(cmd)); memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb)); memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len); @@ -1172,6 +1179,7 @@ } if (!dev->in_reset) { + char buffer[16]; tmp = le32_to_cpu(dev->adapter_info.kernelrev); printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n", dev->name, @@ -1192,16 +1200,23 @@ dev->name, dev->id, tmp>>24,(tmp>>16)&0xff,tmp&0xff, le32_to_cpu(dev->adapter_info.biosbuild)); - if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0) - printk(KERN_INFO "%s%d: serial %x\n", - dev->name, dev->id, - le32_to_cpu(dev->adapter_info.serial[0])); + buffer[0] = '\0'; + if (aac_show_serial_number( + shost_to_class(dev->scsi_host_ptr), buffer)) + printk(KERN_INFO "%s%d: serial %s", + dev->name, dev->id, buffer); if (dev->supplement_adapter_info.VpdInfo.Tsid[0]) { printk(KERN_INFO "%s%d: TSID %.*s\n", dev->name, dev->id, (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid), dev->supplement_adapter_info.VpdInfo.Tsid); } + if (!check_reset || + (dev->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_IGNORE_RESET))) { + printk(KERN_INFO "%s%d: Reset Adapter Ignored\n", + dev->name, dev->id); + } } dev->nondasd_support = 0; @@ -1332,7 +1347,7 @@ if (!aac_valid_context(scsicmd, fibptr)) return; - dev = (struct aac_dev *)scsicmd->device->host->hostdata; + dev = fibptr->dev; cid = scmd_id(scsicmd); if (nblank(dprintk(x))) { @@ -1372,15 +1387,8 @@ BUG_ON(fibptr == NULL); - if(scsicmd->use_sg) - pci_unmap_sg(dev->pdev, - (struct scatterlist *)scsicmd->request_buffer, - scsicmd->use_sg, - scsicmd->sc_data_direction); - else if(scsicmd->request_bufflen) - pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, - scsicmd->request_bufflen, - scsicmd->sc_data_direction); + scsi_dma_unmap(scsicmd); + readreply = (struct aac_read_reply *)fib_data(fibptr); if (le32_to_cpu(readreply->status) == ST_OK) scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; @@ -1498,6 +1506,7 @@ { u64 lba; u32 count; + int fua; int status; struct aac_dev *dev; struct fib * cmd_fibcontext; @@ -1512,6 +1521,7 @@ count = scsicmd->cmnd[4]; if (count == 0) count = 256; + fua = 0; } else if (scsicmd->cmnd[0] == WRITE_16) { /* 16 byte command */ dprintk((KERN_DEBUG "aachba: received a write(16) command on id %d.\n", scmd_id(scsicmd))); @@ -1524,6 +1534,7 @@ (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9]; count = (scsicmd->cmnd[10] << 24) | (scsicmd->cmnd[11] << 16) | (scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13]; + fua = scsicmd->cmnd[1] & 0x8; } else if (scsicmd->cmnd[0] == WRITE_12) { /* 12 byte command */ dprintk((KERN_DEBUG "aachba: received a write(12) command on id %d.\n", scmd_id(scsicmd))); @@ -1531,10 +1542,12 @@ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5]; count = (scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16) | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9]; + fua = scsicmd->cmnd[1] & 0x8; } else { dprintk((KERN_DEBUG "aachba: received a write(10) command on id %d.\n", scmd_id(scsicmd))); lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5]; count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8]; + fua = scsicmd->cmnd[1] & 0x8; } dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n", smp_processor_id(), (unsigned long long)lba, jiffies)); @@ -1549,7 +1562,7 @@ return 0; } - status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count); + status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua); /* * Check that the command queued to the controller @@ -1592,7 +1605,7 @@ COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; else { struct scsi_device *sdev = cmd->device; - struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; + struct aac_dev *dev = fibptr->dev; u32 cid = sdev_id(sdev); printk(KERN_WARNING "synchronize_callback: synchronize failed, status = %d\n", @@ -1699,7 +1712,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) { - u32 cid = 0; + u32 cid; struct Scsi_Host *host = scsicmd->device->host; struct aac_dev *dev = (struct aac_dev *)host->hostdata; struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev; @@ -1711,15 +1724,15 @@ * Test does not apply to ID 16, the pseudo id for the controller * itself. */ - if (scmd_id(scsicmd) != host->this_id) { - if ((scmd_channel(scsicmd) == CONTAINER_CHANNEL)) { - if((scmd_id(scsicmd) >= dev->maximum_num_containers) || + cid = scmd_id(scsicmd); + if (cid != host->this_id) { + if (scmd_channel(scsicmd) == CONTAINER_CHANNEL) { + if((cid >= dev->maximum_num_containers) || (scsicmd->device->lun != 0)) { scsicmd->result = DID_NO_CONNECT << 16; scsicmd->scsi_done(scsicmd); return 0; } - cid = scmd_id(scsicmd); /* * If the target container doesn't exist, it may have @@ -1782,7 +1795,7 @@ { struct inquiry_data inq_data; - dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", scmd_id(scsicmd))); + dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", cid)); memset(&inq_data, 0, sizeof (struct inquiry_data)); inq_data.inqd_ver = 2; /* claim compliance to SCSI-2 */ @@ -1794,7 +1807,7 @@ * Set the Vendor, Product, and Revision Level * see: .c i.e. aac.c */ - if (scmd_id(scsicmd) == host->this_id) { + if (cid == host->this_id) { setinqstr(dev, (void *) (inq_data.inqd_vid), ARRAY_SIZE(container_types)); inq_data.inqd_pdt = INQD_PDT_PROC; /* Processor device */ aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data)); @@ -1886,15 +1899,29 @@ case MODE_SENSE: { - char mode_buf[4]; + char mode_buf[7]; + int mode_buf_length = 4; dprintk((KERN_DEBUG "MODE SENSE command.\n")); mode_buf[0] = 3; /* Mode data length */ mode_buf[1] = 0; /* Medium type - default */ - mode_buf[2] = 0; /* Device-specific param, bit 8: 0/1 = write enabled/protected */ + mode_buf[2] = 0; /* Device-specific param, + bit 8: 0/1 = write enabled/protected + bit 4: 0/1 = FUA enabled */ + if (dev->raw_io_interface) + mode_buf[2] = 0x10; mode_buf[3] = 0; /* Block descriptor length */ - - aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf)); + if (((scsicmd->cmnd[2] & 0x3f) == 8) || + ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) { + mode_buf[0] = 6; + mode_buf[4] = 8; + mode_buf[5] = 1; + mode_buf[6] = 0x04; /* WCE */ + mode_buf_length = 7; + if (mode_buf_length > scsicmd->cmnd[4]) + mode_buf_length = scsicmd->cmnd[4]; + } + aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length); scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; scsicmd->scsi_done(scsicmd); @@ -1902,18 +1929,33 @@ } case MODE_SENSE_10: { - char mode_buf[8]; + char mode_buf[11]; + int mode_buf_length = 8; dprintk((KERN_DEBUG "MODE SENSE 10 byte command.\n")); mode_buf[0] = 0; /* Mode data length (MSB) */ mode_buf[1] = 6; /* Mode data length (LSB) */ mode_buf[2] = 0; /* Medium type - default */ - mode_buf[3] = 0; /* Device-specific param, bit 8: 0/1 = write enabled/protected */ + mode_buf[3] = 0; /* Device-specific param, + bit 8: 0/1 = write enabled/protected + bit 4: 0/1 = FUA enabled */ + if (dev->raw_io_interface) + mode_buf[3] = 0x10; mode_buf[4] = 0; /* reserved */ mode_buf[5] = 0; /* reserved */ mode_buf[6] = 0; /* Block descriptor length (MSB) */ mode_buf[7] = 0; /* Block descriptor length (LSB) */ - aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf)); + if (((scsicmd->cmnd[2] & 0x3f) == 8) || + ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) { + mode_buf[1] = 9; + mode_buf[8] = 8; + mode_buf[9] = 1; + mode_buf[10] = 0x04; /* WCE */ + mode_buf_length = 11; + if (mode_buf_length > scsicmd->cmnd[8]) + mode_buf_length = scsicmd->cmnd[8]; + } + aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length); scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; scsicmd->scsi_done(scsicmd); @@ -2136,10 +2178,10 @@ if (!aac_valid_context(scsicmd, fibptr)) return; - dev = (struct aac_dev *)scsicmd->device->host->hostdata; - BUG_ON(fibptr == NULL); + dev = fibptr->dev; + srbreply = (struct aac_srb_reply *) fib_data(fibptr); scsicmd->sense_buffer[0] = '\0'; /* Initialize sense valid flag to false */ @@ -2147,17 +2189,10 @@ * Calculate resid for sg */ - scsicmd->resid = scsicmd->request_bufflen - - le32_to_cpu(srbreply->data_xfer_length); + scsi_set_resid(scsicmd, scsi_bufflen(scsicmd) + - le32_to_cpu(srbreply->data_xfer_length)); - if(scsicmd->use_sg) - pci_unmap_sg(dev->pdev, - (struct scatterlist *)scsicmd->request_buffer, - scsicmd->use_sg, - scsicmd->sc_data_direction); - else if(scsicmd->request_bufflen) - pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, scsicmd->request_bufflen, - scsicmd->sc_data_direction); + scsi_dma_unmap(scsicmd); /* * First check the fib status @@ -2233,7 +2268,7 @@ break; case SRB_STATUS_BUSY: - scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8; + scsicmd->result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8; break; case SRB_STATUS_BUS_RESET: @@ -2343,34 +2378,33 @@ { struct aac_dev *dev; unsigned long byte_count = 0; + int nseg; dev = (struct aac_dev *)scsicmd->device->host->hostdata; // Get rid of old data psg->count = 0; psg->sg[0].addr = 0; psg->sg[0].count = 0; - if (scsicmd->use_sg) { + + nseg = scsi_dma_map(scsicmd); + BUG_ON(nseg < 0); + if (nseg) { struct scatterlist *sg; int i; - int sg_count; - sg = (struct scatterlist *) scsicmd->request_buffer; - sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg, - scsicmd->sc_data_direction); - psg->count = cpu_to_le32(sg_count); + psg->count = cpu_to_le32(nseg); - for (i = 0; i < sg_count; i++) { + scsi_for_each_sg(scsicmd, sg, nseg, i) { psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg)); psg->sg[i].count = cpu_to_le32(sg_dma_len(sg)); byte_count += sg_dma_len(sg); - sg++; } /* hba wants the size to be exact */ - if(byte_count > scsicmd->request_bufflen){ + if (byte_count > scsi_bufflen(scsicmd)) { u32 temp = le32_to_cpu(psg->sg[i-1].count) - - (byte_count - scsicmd->request_bufflen); + (byte_count - scsi_bufflen(scsicmd)); psg->sg[i-1].count = cpu_to_le32(temp); - byte_count = scsicmd->request_bufflen; + byte_count = scsi_bufflen(scsicmd); } /* Check for command underflow */ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){ @@ -2378,18 +2412,6 @@ byte_count, scsicmd->underflow); } } - else if(scsicmd->request_bufflen) { - u32 addr; - scsicmd->SCp.dma_handle = pci_map_single(dev->pdev, - scsicmd->request_buffer, - scsicmd->request_bufflen, - scsicmd->sc_data_direction); - addr = scsicmd->SCp.dma_handle; - psg->count = cpu_to_le32(1); - psg->sg[0].addr = cpu_to_le32(addr); - psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen); - byte_count = scsicmd->request_bufflen; - } return byte_count; } @@ -2399,6 +2421,7 @@ struct aac_dev *dev; unsigned long byte_count = 0; u64 addr; + int nseg; dev = (struct aac_dev *)scsicmd->device->host->hostdata; // Get rid of old data @@ -2406,31 +2429,28 @@ psg->sg[0].addr[0] = 0; psg->sg[0].addr[1] = 0; psg->sg[0].count = 0; - if (scsicmd->use_sg) { + + nseg = scsi_dma_map(scsicmd); + BUG_ON(nseg < 0); + if (nseg) { struct scatterlist *sg; int i; - int sg_count; - sg = (struct scatterlist *) scsicmd->request_buffer; - - sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg, - scsicmd->sc_data_direction); - for (i = 0; i < sg_count; i++) { + scsi_for_each_sg(scsicmd, sg, nseg, i) { int count = sg_dma_len(sg); addr = sg_dma_address(sg); psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff); psg->sg[i].addr[1] = cpu_to_le32(addr>>32); psg->sg[i].count = cpu_to_le32(count); byte_count += count; - sg++; } - psg->count = cpu_to_le32(sg_count); + psg->count = cpu_to_le32(nseg); /* hba wants the size to be exact */ - if(byte_count > scsicmd->request_bufflen){ + if (byte_count > scsi_bufflen(scsicmd)) { u32 temp = le32_to_cpu(psg->sg[i-1].count) - - (byte_count - scsicmd->request_bufflen); + (byte_count - scsi_bufflen(scsicmd)); psg->sg[i-1].count = cpu_to_le32(temp); - byte_count = scsicmd->request_bufflen; + byte_count = scsi_bufflen(scsicmd); } /* Check for command underflow */ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){ @@ -2438,26 +2458,13 @@ byte_count, scsicmd->underflow); } } - else if(scsicmd->request_bufflen) { - scsicmd->SCp.dma_handle = pci_map_single(dev->pdev, - scsicmd->request_buffer, - scsicmd->request_bufflen, - scsicmd->sc_data_direction); - addr = scsicmd->SCp.dma_handle; - psg->count = cpu_to_le32(1); - psg->sg[0].addr[0] = cpu_to_le32(addr & 0xffffffff); - psg->sg[0].addr[1] = cpu_to_le32(addr >> 32); - psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen); - byte_count = scsicmd->request_bufflen; - } return byte_count; } static unsigned long aac_build_sgraw(struct scsi_cmnd* scsicmd, struct sgmapraw* psg) { - struct Scsi_Host *host = scsicmd->device->host; - struct aac_dev *dev = (struct aac_dev *)host->hostdata; unsigned long byte_count = 0; + int nseg; // Get rid of old data psg->count = 0; @@ -2467,16 +2474,14 @@ psg->sg[0].addr[1] = 0; psg->sg[0].count = 0; psg->sg[0].flags = 0; - if (scsicmd->use_sg) { + + nseg = scsi_dma_map(scsicmd); + BUG_ON(nseg < 0); + if (nseg) { struct scatterlist *sg; int i; - int sg_count; - sg = (struct scatterlist *) scsicmd->request_buffer; - sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg, - scsicmd->sc_data_direction); - - for (i = 0; i < sg_count; i++) { + scsi_for_each_sg(scsicmd, sg, nseg, i) { int count = sg_dma_len(sg); u64 addr = sg_dma_address(sg); psg->sg[i].next = 0; @@ -2486,15 +2491,14 @@ psg->sg[i].count = cpu_to_le32(count); psg->sg[i].flags = 0; byte_count += count; - sg++; } - psg->count = cpu_to_le32(sg_count); + psg->count = cpu_to_le32(nseg); /* hba wants the size to be exact */ - if(byte_count > scsicmd->request_bufflen){ + if (byte_count > scsi_bufflen(scsicmd)) { u32 temp = le32_to_cpu(psg->sg[i-1].count) - - (byte_count - scsicmd->request_bufflen); + (byte_count - scsi_bufflen(scsicmd)); psg->sg[i-1].count = cpu_to_le32(temp); - byte_count = scsicmd->request_bufflen; + byte_count = scsi_bufflen(scsicmd); } /* Check for command underflow */ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){ @@ -2502,24 +2506,6 @@ byte_count, scsicmd->underflow); } } - else if(scsicmd->request_bufflen) { - int count; - u64 addr; - scsicmd->SCp.dma_handle = pci_map_single(dev->pdev, - scsicmd->request_buffer, - scsicmd->request_bufflen, - scsicmd->sc_data_direction); - addr = scsicmd->SCp.dma_handle; - count = scsicmd->request_bufflen; - psg->count = cpu_to_le32(1); - psg->sg[0].next = 0; - psg->sg[0].prev = 0; - psg->sg[0].addr[1] = cpu_to_le32((u32)(addr>>32)); - psg->sg[0].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff)); - psg->sg[0].count = cpu_to_le32(count); - psg->sg[0].flags = 0; - byte_count = scsicmd->request_bufflen; - } return byte_count; } diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h linux-2.6.22-591/drivers/scsi/aacraid/aacraid.h --- linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aacraid/aacraid.h 2007-12-21 15:36:12.000000000 -0500 @@ -12,8 +12,8 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 2437 -# define AAC_DRIVER_BRANCH "-mh4" +# define AAC_DRIVER_BUILD 2447 +# define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 @@ -464,12 +464,12 @@ int (*adapter_restart)(struct aac_dev *dev, int bled); /* Transport operations */ int (*adapter_ioremap)(struct aac_dev * dev, u32 size); - irqreturn_t (*adapter_intr)(int irq, void *dev_id); + irq_handler_t adapter_intr; /* Packet operations */ int (*adapter_deliver)(struct fib * fib); int (*adapter_bounds)(struct aac_dev * dev, struct scsi_cmnd * cmd, u64 lba); int (*adapter_read)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count); - int (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count); + int (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua); int (*adapter_scsi)(struct fib * fib, struct scsi_cmnd * cmd); /* Administrative operations */ int (*adapter_comm)(struct aac_dev * dev, int comm); @@ -860,10 +860,12 @@ __le32 FlashFirmwareBootBuild; u8 MfgPcbaSerialNo[12]; u8 MfgWWNName[8]; - __le32 MoreFeatureBits; + __le32 SupportedOptions2; __le32 ReservedGrowth[1]; }; #define AAC_FEATURE_FALCON 0x00000010 +#define AAC_OPTION_MU_RESET 0x00000001 +#define AAC_OPTION_IGNORE_RESET 0x00000002 #define AAC_SIS_VERSION_V3 3 #define AAC_SIS_SLOT_UNKNOWN 0xFF @@ -1054,8 +1056,8 @@ #define aac_adapter_read(fib,cmd,lba,count) \ ((fib)->dev)->a_ops.adapter_read(fib,cmd,lba,count) -#define aac_adapter_write(fib,cmd,lba,count) \ - ((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count) +#define aac_adapter_write(fib,cmd,lba,count,fua) \ + ((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count,fua) #define aac_adapter_scsi(fib,cmd) \ ((fib)->dev)->a_ops.adapter_scsi(fib,cmd) @@ -1213,6 +1215,9 @@ __le32 block; __le16 pad; __le16 flags; +#define IO_TYPE_WRITE 0x00000000 +#define IO_TYPE_READ 0x00000001 +#define IO_SUREWRITE 0x00000008 struct sgmap64 sg; // Must be last in struct because it is variable }; struct aac_write_reply @@ -1257,6 +1262,19 @@ u8 data[16]; }; +#define CT_PAUSE_IO 65 +#define CT_RELEASE_IO 66 +struct aac_pause { + __le32 command; /* VM_ContainerConfig */ + __le32 type; /* CT_PAUSE_IO */ + __le32 timeout; /* 10ms ticks */ + __le32 min; + __le32 noRescan; + __le32 parm3; + __le32 parm4; + __le32 count; /* sizeof(((struct aac_pause_reply *)NULL)->data) */ +}; + struct aac_srb { __le32 function; @@ -1804,6 +1822,10 @@ int aac_get_containers(struct aac_dev *dev); int aac_scsi_cmd(struct scsi_cmnd *cmd); int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg); +#ifndef shost_to_class +#define shost_to_class(shost) &shost->shost_classdev +#endif +ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf); int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg); int aac_rx_init(struct aac_dev *dev); int aac_rkt_init(struct aac_dev *dev); @@ -1813,6 +1835,7 @@ unsigned int aac_response_normal(struct aac_queue * q); unsigned int aac_command_normal(struct aac_queue * q); unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index); +int aac_reset_adapter(struct aac_dev * dev, int forced); int aac_check_health(struct aac_dev * dev); int aac_command_thread(void *data); int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx); @@ -1832,3 +1855,6 @@ extern int expose_physicals; extern int aac_reset_devices; extern int aac_commit; +extern int update_interval; +extern int check_interval; +extern int check_reset; diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/commsup.c linux-2.6.22-591/drivers/scsi/aacraid/commsup.c --- linux-2.6.22-570/drivers/scsi/aacraid/commsup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aacraid/commsup.c 2007-12-21 15:36:12.000000000 -0500 @@ -1021,7 +1021,7 @@ } -static int _aac_reset_adapter(struct aac_dev *aac) +static int _aac_reset_adapter(struct aac_dev *aac, int forced) { int index, quirks; int retval; @@ -1029,25 +1029,32 @@ struct scsi_device *dev; struct scsi_cmnd *command; struct scsi_cmnd *command_list; + int jafo = 0; /* * Assumptions: - * - host is locked. + * - host is locked, unless called by the aacraid thread. + * (a matter of convenience, due to legacy issues surrounding + * eh_host_adapter_reset). * - in_reset is asserted, so no new i/o is getting to the * card. - * - The card is dead. + * - The card is dead, or will be very shortly ;-/ so no new + * commands are completing in the interrupt service. */ host = aac->scsi_host_ptr; scsi_block_requests(host); aac_adapter_disable_int(aac); + if (aac->thread->pid != current->pid) { spin_unlock_irq(host->host_lock); kthread_stop(aac->thread); + jafo = 1; + } /* * If a positive health, means in a known DEAD PANIC * state and the adapter could be reset to `try again'. */ - retval = aac_adapter_restart(aac, aac_adapter_check_health(aac)); + retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac)); if (retval) goto out; @@ -1104,11 +1111,13 @@ if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) goto out; + if (jafo) { aac->thread = kthread_run(aac_command_thread, aac, aac->name); if (IS_ERR(aac->thread)) { retval = PTR_ERR(aac->thread); goto out; } + } (void)aac_get_adapter_info(aac); quirks = aac_get_driver_ident(index)->quirks; if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) { @@ -1150,7 +1159,98 @@ out: aac->in_reset = 0; scsi_unblock_requests(host); + if (jafo) { spin_lock_irq(host->host_lock); + } + return retval; +} + +int aac_reset_adapter(struct aac_dev * aac, int forced) +{ + unsigned long flagv = 0; + int retval; + struct Scsi_Host * host; + + if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0) + return -EBUSY; + + if (aac->in_reset) { + spin_unlock_irqrestore(&aac->fib_lock, flagv); + return -EBUSY; + } + aac->in_reset = 1; + spin_unlock_irqrestore(&aac->fib_lock, flagv); + + /* + * Wait for all commands to complete to this specific + * target (block maximum 60 seconds). Although not necessary, + * it does make us a good storage citizen. + */ + host = aac->scsi_host_ptr; + scsi_block_requests(host); + if (forced < 2) for (retval = 60; retval; --retval) { + struct scsi_device * dev; + struct scsi_cmnd * command; + int active = 0; + + __shost_for_each_device(dev, host) { + spin_lock_irqsave(&dev->list_lock, flagv); + list_for_each_entry(command, &dev->cmd_list, list) { + if (command->SCp.phase == AAC_OWNER_FIRMWARE) { + active++; + break; + } + } + spin_unlock_irqrestore(&dev->list_lock, flagv); + if (active) + break; + + } + /* + * We can exit If all the commands are complete + */ + if (active == 0) + break; + ssleep(1); + } + + /* Quiesce build, flush cache, write through mode */ + aac_send_shutdown(aac); + spin_lock_irqsave(host->host_lock, flagv); + retval = _aac_reset_adapter(aac, forced); + spin_unlock_irqrestore(host->host_lock, flagv); + + if (retval == -ENODEV) { + /* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */ + struct fib * fibctx = aac_fib_alloc(aac); + if (fibctx) { + struct aac_pause *cmd; + int status; + + aac_fib_init(fibctx); + + cmd = (struct aac_pause *) fib_data(fibctx); + + cmd->command = cpu_to_le32(VM_ContainerConfig); + cmd->type = cpu_to_le32(CT_PAUSE_IO); + cmd->timeout = cpu_to_le32(1); + cmd->min = cpu_to_le32(1); + cmd->noRescan = cpu_to_le32(1); + cmd->count = cpu_to_le32(0); + + status = aac_fib_send(ContainerCommand, + fibctx, + sizeof(struct aac_pause), + FsaNormal, + -2 /* Timeout silently */, 1, + NULL, NULL); + + if (status >= 0) + aac_fib_complete(fibctx); + aac_fib_free(fibctx); + } + } + return retval; } @@ -1270,9 +1370,14 @@ printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED); + if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_IGNORE_RESET))) + goto out; host = aac->scsi_host_ptr; + if (aac->thread->pid != current->pid) spin_lock_irqsave(host->host_lock, flagv); - BlinkLED = _aac_reset_adapter(aac); + BlinkLED = _aac_reset_adapter(aac, 0); + if (aac->thread->pid != current->pid) spin_unlock_irqrestore(host->host_lock, flagv); return BlinkLED; @@ -1300,6 +1405,9 @@ struct aac_fib_context *fibctx; unsigned long flags; DECLARE_WAITQUEUE(wait, current); + unsigned long next_jiffies = jiffies + HZ; + unsigned long next_check_jiffies = next_jiffies; + long difference = HZ; /* * We can only have one thread per adapter for AIF's. @@ -1507,11 +1615,79 @@ * There are no more AIF's */ spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags); - schedule(); - if (kthread_should_stop()) + /* + * Background activity + */ + if ((time_before(next_check_jiffies,next_jiffies)) + && ((difference = next_check_jiffies - jiffies) <= 0)) { + next_check_jiffies = next_jiffies; + if (aac_check_health(dev) == 0) { + difference = ((long)(unsigned)check_interval) + * HZ; + next_check_jiffies = jiffies + difference; + } else if (!dev->queues) break; + } + if (!time_before(next_check_jiffies,next_jiffies) + && ((difference = next_jiffies - jiffies) <= 0)) { + struct timeval now; + int ret; + + /* Don't even try to talk to adapter if its sick */ + ret = aac_check_health(dev); + if (!ret && !dev->queues) + break; + next_check_jiffies = jiffies + + ((long)(unsigned)check_interval) + * HZ; + do_gettimeofday(&now); + + /* Synchronize our watches */ + if (((1000000 - (1000000 / HZ)) > now.tv_usec) + && (now.tv_usec > (1000000 / HZ))) + difference = (((1000000 - now.tv_usec) * HZ) + + 500000) / 1000000; + else if (ret == 0) { + struct fib *fibptr; + + if ((fibptr = aac_fib_alloc(dev))) { + u32 * info; + + aac_fib_init(fibptr); + + info = (u32 *) fib_data(fibptr); + if (now.tv_usec > 500000) + ++now.tv_sec; + + *info = cpu_to_le32(now.tv_sec); + + (void)aac_fib_send(SendHostTime, + fibptr, + sizeof(*info), + FsaNormal, + 1, 1, + NULL, + NULL); + aac_fib_complete(fibptr); + aac_fib_free(fibptr); + } + difference = (long)(unsigned)update_interval*HZ; + } else { + /* retry shortly */ + difference = 10 * HZ; + } + next_jiffies = jiffies + difference; + if (time_before(next_check_jiffies,next_jiffies)) + difference = next_check_jiffies - jiffies; + } + if (difference <= 0) + difference = 1; set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(difference); + + if (kthread_should_stop()) + break; } if (dev->queues) remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait); diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/linit.c linux-2.6.22-591/drivers/scsi/aacraid/linit.c --- linux-2.6.22-570/drivers/scsi/aacraid/linit.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/aacraid/linit.c 2007-12-21 15:36:12.000000000 -0500 @@ -39,10 +39,8 @@ #include #include #include -#include #include #include -#include #include #include @@ -223,12 +221,12 @@ { aac_rx_init, "percraid", "DELL ", "PERC 320/DC ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Perc 320/DC*/ { aac_sa_init, "aacraid", "ADAPTEC ", "Adaptec 5400S ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/ { aac_sa_init, "aacraid", "ADAPTEC ", "AAC-364 ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/ - { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell PERC2/QC */ + { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_34SG }, /* Dell PERC2/QC */ { aac_sa_init, "hpnraid", "HP ", "NetRAID ", 4, AAC_QUIRK_34SG }, /* HP NetRAID-4M */ { aac_rx_init, "aacraid", "DELL ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell Catchall */ { aac_rx_init, "aacraid", "Legend ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend Catchall */ - { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec Catch All */ + { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2 }, /* Adaptec Catch All */ { aac_rkt_init, "aacraid", "ADAPTEC ", "RAID ", 2 }, /* Adaptec Rocket Catch All */ { aac_nark_init, "aacraid", "ADAPTEC ", "RAID ", 2 } /* Adaptec NEMER/ARK Catch All */ }; @@ -403,10 +401,6 @@ static int aac_slave_configure(struct scsi_device *sdev) { - if (sdev_channel(sdev) == CONTAINER_CHANNEL) { - sdev->skip_ms_page_8 = 1; - sdev->skip_ms_page_3f = 1; - } if ((sdev->type == TYPE_DISK) && (sdev_channel(sdev) != CONTAINER_CHANNEL)) { if (expose_physicals == 0) @@ -450,6 +444,43 @@ return 0; } +/** + * aac_change_queue_depth - alter queue depths + * @sdev: SCSI device we are considering + * @depth: desired queue depth + * + * Alters queue depths for target device based on the host adapter's + * total capacity and the queue depth supported by the target device. + */ + +static int aac_change_queue_depth(struct scsi_device *sdev, int depth) +{ + if (sdev->tagged_supported && (sdev->type == TYPE_DISK) && + (sdev_channel(sdev) == CONTAINER_CHANNEL)) { + struct scsi_device * dev; + struct Scsi_Host *host = sdev->host; + unsigned num = 0; + + __shost_for_each_device(dev, host) { + if (dev->tagged_supported && (dev->type == TYPE_DISK) && + (sdev_channel(dev) == CONTAINER_CHANNEL)) + ++num; + ++num; + } + if (num >= host->can_queue) + num = host->can_queue - 1; + if (depth > (host->can_queue - num)) + depth = host->can_queue - num; + if (depth > 256) + depth = 256; + else if (depth < 2) + depth = 2; + scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth); + } else + scsi_adjust_queue_depth(sdev, 0, 1); + return sdev->queue_depth; +} + static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg) { struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; @@ -548,6 +579,14 @@ ssleep(1); } printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME); + /* + * This adapter needs a blind reset, only do so for Adapters that + * support a register, instead of a commanded, reset. + */ + if ((aac->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) == + le32_to_cpu(AAC_OPTION_MU_RESET)) + aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */ return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */ } @@ -735,15 +774,21 @@ return len; } -static ssize_t aac_show_serial_number(struct class_device *class_dev, - char *buf) +ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf) { struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata; int len = 0; if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0) - len = snprintf(buf, PAGE_SIZE, "%x\n", + len = snprintf(buf, PAGE_SIZE, "%06X\n", le32_to_cpu(dev->adapter_info.serial[0])); + if (len && + !memcmp(&dev->supplement_adapter_info.MfgPcbaSerialNo[ + sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo)+2-len], + buf, len)) + len = snprintf(buf, PAGE_SIZE, "%.*s\n", + (int)sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo), + dev->supplement_adapter_info.MfgPcbaSerialNo); return len; } @@ -759,6 +804,31 @@ class_to_shost(class_dev)->max_id); } +static ssize_t aac_store_reset_adapter(struct class_device *class_dev, + const char *buf, size_t count) +{ + int retval = -EACCES; + + if (!capable(CAP_SYS_ADMIN)) + return retval; + retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!'); + if (retval >= 0) + retval = count; + return retval; +} + +static ssize_t aac_show_reset_adapter(struct class_device *class_dev, + char *buf) +{ + struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata; + int len, tmp; + + tmp = aac_adapter_check_health(dev); + if ((tmp == 0) && dev->in_reset) + tmp = -EBUSY; + len = snprintf(buf, PAGE_SIZE, "0x%x", tmp); + return len; +} static struct class_device_attribute aac_model = { .attr = { @@ -816,6 +886,14 @@ }, .show = aac_show_max_id, }; +static struct class_device_attribute aac_reset = { + .attr = { + .name = "reset_host", + .mode = S_IWUSR|S_IRUGO, + }, + .store = aac_store_reset_adapter, + .show = aac_show_reset_adapter, +}; static struct class_device_attribute *aac_attrs[] = { &aac_model, @@ -826,6 +904,7 @@ &aac_serial_number, &aac_max_channel, &aac_max_id, + &aac_reset, NULL }; @@ -852,6 +931,7 @@ .bios_param = aac_biosparm, .shost_attrs = aac_attrs, .slave_configure = aac_slave_configure, + .change_queue_depth = aac_change_queue_depth, .eh_abort_handler = aac_eh_abort, .eh_host_reset_handler = aac_eh_reset, .can_queue = AAC_NUM_IO_FIB, @@ -1090,7 +1170,7 @@ { int error; - printk(KERN_INFO "Adaptec %s driver (%s)\n", + printk(KERN_INFO "Adaptec %s driver %s\n", AAC_DRIVERNAME, aac_driver_version); error = pci_register_driver(&aac_pci_driver); diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/rx.c linux-2.6.22-591/drivers/scsi/aacraid/rx.c --- linux-2.6.22-570/drivers/scsi/aacraid/rx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aacraid/rx.c 2007-12-21 15:36:12.000000000 -0500 @@ -464,6 +464,8 @@ { u32 var; + if (!(dev->supplement_adapter_info.SupportedOptions2 & + le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) { if (bled) printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n", dev->name, dev->id, bled); @@ -479,6 +481,7 @@ if (bled && (bled != -ETIMEDOUT)) return -EINVAL; + } if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */ rx_writel(dev, MUnit.reserved2, 3); msleep(5000); /* Delay 5 seconds */ @@ -596,7 +599,7 @@ } msleep(1); } - if (restart) + if (restart && aac_commit) aac_commit = 1; /* * Fill in the common function dispatch table. diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.c linux-2.6.22-591/drivers/scsi/advansys.c --- linux-2.6.22-570/drivers/scsi/advansys.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/advansys.c 2007-12-21 15:36:12.000000000 -0500 @@ -798,7 +798,6 @@ #include #include #include -#include "advansys.h" #ifdef CONFIG_PCI #include #endif /* CONFIG_PCI */ @@ -2014,7 +2013,7 @@ STATIC void AscEnableIsaDma(uchar); #endif /* CONFIG_ISA */ STATIC ASC_DCNT AscGetMaxDmaCount(ushort); - +static const char *advansys_info(struct Scsi_Host *shp); /* * --- Adv Library Constants and Macros @@ -3970,10 +3969,6 @@ ASC_IS_PCI, }; -/* - * Used with the LILO 'advansys' option to eliminate or - * limit I/O port probing at boot time, cf. advansys_setup(). - */ STATIC int asc_iopflag = ASC_FALSE; STATIC int asc_ioport[ASC_NUM_IOPORT_PROBE] = { 0, 0, 0, 0 }; @@ -4055,10 +4050,6 @@ #endif /* ADVANSYS_DEBUG */ -/* - * --- Linux 'struct scsi_host_template' and advansys_setup() Functions - */ - #ifdef CONFIG_PROC_FS /* * advansys_proc_info() - /proc/scsi/advansys/[0-(ASC_NUM_BOARD_SUPPORTED-1)] @@ -4080,7 +4071,7 @@ * if 'prtbuf' is too small it will not be overwritten. Instead the * user just won't get all the available statistics. */ -int +static int advansys_proc_info(struct Scsi_Host *shost, char *buffer, char **start, off_t offset, int length, int inout) { @@ -4296,7 +4287,7 @@ * it must not call SCSI mid-level functions including scsi_malloc() * and scsi_free(). */ -int __init +static int __init advansys_detect(struct scsi_host_template *tpnt) { static int detect_called = ASC_FALSE; @@ -5428,7 +5419,7 @@ * * Release resources allocated for a single AdvanSys adapter. */ -int +static int advansys_release(struct Scsi_Host *shp) { asc_board_t *boardp; @@ -5475,7 +5466,7 @@ * Note: The information line should not exceed ASC_INFO_SIZE bytes, * otherwise the static 'info' array will be overrun. */ -const char * +static const char * advansys_info(struct Scsi_Host *shp) { static char info[ASC_INFO_SIZE]; @@ -5568,7 +5559,7 @@ * This function always returns 0. Command return status is saved * in the 'scp' result field. */ -int +static int advansys_queuecommand(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *)) { struct Scsi_Host *shp; @@ -5656,7 +5647,7 @@ * sleeping is allowed and no locking other than for host structures is * required. Returns SUCCESS or FAILED. */ -int +static int advansys_reset(struct scsi_cmnd *scp) { struct Scsi_Host *shp; @@ -5841,7 +5832,7 @@ * ip[1]: sectors * ip[2]: cylinders */ -int +static int advansys_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int ip[]) { @@ -5875,82 +5866,6 @@ } /* - * advansys_setup() - * - * This function is called from init/main.c at boot time. - * It it passed LILO parameters that can be set from the - * LILO command line or in /etc/lilo.conf. - * - * It is used by the AdvanSys driver to either disable I/O - * port scanning or to limit scanning to 1 - 4 I/O ports. - * Regardless of the option setting EISA and PCI boards - * will still be searched for and detected. This option - * only affects searching for ISA and VL boards. - * - * If ADVANSYS_DEBUG is defined the driver debug level may - * be set using the 5th (ASC_NUM_IOPORT_PROBE + 1) I/O Port. - * - * Examples: - * 1. Eliminate I/O port scanning: - * boot: linux advansys= - * or - * boot: linux advansys=0x0 - * 2. Limit I/O port scanning to one I/O port: - * boot: linux advansys=0x110 - * 3. Limit I/O port scanning to four I/O ports: - * boot: linux advansys=0x110,0x210,0x230,0x330 - * 4. If ADVANSYS_DEBUG, limit I/O port scanning to four I/O ports and - * set the driver debug level to 2. - * boot: linux advansys=0x110,0x210,0x230,0x330,0xdeb2 - * - * ints[0] - number of arguments - * ints[1] - first argument - * ints[2] - second argument - * ... - */ -void __init -advansys_setup(char *str, int *ints) -{ - int i; - - if (asc_iopflag == ASC_TRUE) { - printk("AdvanSys SCSI: 'advansys' LILO option may appear only once\n"); - return; - } - - asc_iopflag = ASC_TRUE; - - if (ints[0] > ASC_NUM_IOPORT_PROBE) { -#ifdef ADVANSYS_DEBUG - if ((ints[0] == ASC_NUM_IOPORT_PROBE + 1) && - (ints[ASC_NUM_IOPORT_PROBE + 1] >> 4 == 0xdeb)) { - asc_dbglvl = ints[ASC_NUM_IOPORT_PROBE + 1] & 0xf; - } else { -#endif /* ADVANSYS_DEBUG */ - printk("AdvanSys SCSI: only %d I/O ports accepted\n", - ASC_NUM_IOPORT_PROBE); -#ifdef ADVANSYS_DEBUG - } -#endif /* ADVANSYS_DEBUG */ - } - -#ifdef ADVANSYS_DEBUG - ASC_DBG1(1, "advansys_setup: ints[0] %d\n", ints[0]); - for (i = 1; i < ints[0]; i++) { - ASC_DBG2(1, " ints[%d] 0x%x", i, ints[i]); - } - ASC_DBG(1, "\n"); -#endif /* ADVANSYS_DEBUG */ - - for (i = 1; i <= ints[0] && i <= ASC_NUM_IOPORT_PROBE; i++) { - asc_ioport[i-1] = ints[i]; - ASC_DBG2(1, "advansys_setup: asc_ioport[%d] 0x%x\n", - i - 1, asc_ioport[i-1]); - } -} - - -/* * --- Loadable Driver Support */ diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.h linux-2.6.22-591/drivers/scsi/advansys.h --- linux-2.6.22-570/drivers/scsi/advansys.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/advansys.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,36 +0,0 @@ -/* - * advansys.h - Linux Host Driver for AdvanSys SCSI Adapters - * - * Copyright (c) 1995-2000 Advanced System Products, Inc. - * Copyright (c) 2000-2001 ConnectCom Solutions, Inc. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that redistributions of source - * code retain the above copyright notice and this comment without - * modification. - * - * As of March 8, 2000 Advanced System Products, Inc. (AdvanSys) - * changed its name to ConnectCom Solutions, Inc. - * - */ - -#ifndef _ADVANSYS_H -#define _ADVANSYS_H - -/* - * struct scsi_host_template function prototypes. - */ -int advansys_detect(struct scsi_host_template *); -int advansys_release(struct Scsi_Host *); -const char *advansys_info(struct Scsi_Host *); -int advansys_queuecommand(struct scsi_cmnd *, void (* done)(struct scsi_cmnd *)); -int advansys_reset(struct scsi_cmnd *); -int advansys_biosparam(struct scsi_device *, struct block_device *, - sector_t, int[]); -static int advansys_slave_configure(struct scsi_device *); - -/* init/main.c setup function */ -void advansys_setup(char *, int *); - -#endif /* _ADVANSYS_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/aha152x.c linux-2.6.22-591/drivers/scsi/aha152x.c --- linux-2.6.22-570/drivers/scsi/aha152x.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aha152x.c 2007-12-21 15:36:12.000000000 -0500 @@ -240,6 +240,7 @@ #include #include #include +#include #include #include #include @@ -253,7 +254,6 @@ #include #include #include -#include #include #include "scsi.h" @@ -551,7 +551,7 @@ */ struct aha152x_scdata { Scsi_Cmnd *next; /* next sc in queue */ - struct semaphore *sem; /* semaphore to block on */ + struct completion *done;/* semaphore to block on */ unsigned char cmd_len; unsigned char cmnd[MAX_COMMAND_SIZE]; unsigned short use_sg; @@ -608,7 +608,7 @@ #define SCDATA(SCpnt) ((struct aha152x_scdata *) (SCpnt)->host_scribble) #define SCNEXT(SCpnt) SCDATA(SCpnt)->next -#define SCSEM(SCpnt) SCDATA(SCpnt)->sem +#define SCSEM(SCpnt) SCDATA(SCpnt)->done #define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset)) @@ -969,7 +969,8 @@ /* * Queue a command and setup interrupts for a free bus. */ -static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct semaphore *sem, int phase, void (*done)(Scsi_Cmnd *)) +static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct completion *complete, + int phase, void (*done)(Scsi_Cmnd *)) { struct Scsi_Host *shpnt = SCpnt->device->host; unsigned long flags; @@ -1013,7 +1014,7 @@ } SCNEXT(SCpnt) = NULL; - SCSEM(SCpnt) = sem; + SCSEM(SCpnt) = complete; /* setup scratch area SCp.ptr : buffer pointer @@ -1084,9 +1085,9 @@ DPRINTK(debug_eh, INFO_LEAD "reset_done called\n", CMDINFO(SCpnt)); #endif if(SCSEM(SCpnt)) { - up(SCSEM(SCpnt)); + complete(SCSEM(SCpnt)); } else { - printk(KERN_ERR "aha152x: reset_done w/o semaphore\n"); + printk(KERN_ERR "aha152x: reset_done w/o completion\n"); } } @@ -1139,21 +1140,6 @@ return FAILED; } -static void timer_expired(unsigned long p) -{ - Scsi_Cmnd *SCp = (Scsi_Cmnd *)p; - struct semaphore *sem = SCSEM(SCp); - struct Scsi_Host *shpnt = SCp->device->host; - unsigned long flags; - - /* remove command from issue queue */ - DO_LOCK(flags); - remove_SC(&ISSUE_SC, SCp); - DO_UNLOCK(flags); - - up(sem); -} - /* * Reset a device * @@ -1161,14 +1147,14 @@ static int aha152x_device_reset(Scsi_Cmnd * SCpnt) { struct Scsi_Host *shpnt = SCpnt->device->host; - DECLARE_MUTEX_LOCKED(sem); - struct timer_list timer; + DECLARE_COMPLETION(done); int ret, issued, disconnected; unsigned char old_cmd_len = SCpnt->cmd_len; unsigned short old_use_sg = SCpnt->use_sg; void *old_buffer = SCpnt->request_buffer; unsigned old_bufflen = SCpnt->request_bufflen; unsigned long flags; + unsigned long timeleft; #if defined(AHA152X_DEBUG) if(HOSTDATA(shpnt)->debug & debug_eh) { @@ -1192,15 +1178,15 @@ SCpnt->request_buffer = NULL; SCpnt->request_bufflen = 0; - init_timer(&timer); - timer.data = (unsigned long) SCpnt; - timer.expires = jiffies + 100*HZ; /* 10s */ - timer.function = (void (*)(unsigned long)) timer_expired; - - aha152x_internal_queue(SCpnt, &sem, resetting, reset_done); - add_timer(&timer); - down(&sem); - del_timer(&timer); + aha152x_internal_queue(SCpnt, &done, resetting, reset_done); + + timeleft = wait_for_completion_timeout(&done, 100*HZ); + if (!timeleft) { + /* remove command from issue queue */ + DO_LOCK(flags); + remove_SC(&ISSUE_SC, SCpnt); + DO_UNLOCK(flags); + } SCpnt->cmd_len = old_cmd_len; SCpnt->use_sg = old_use_sg; diff -Nurb linux-2.6.22-570/drivers/scsi/aha1740.c linux-2.6.22-591/drivers/scsi/aha1740.c --- linux-2.6.22-570/drivers/scsi/aha1740.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aha1740.c 2007-12-21 15:36:12.000000000 -0500 @@ -271,19 +271,7 @@ continue; } sgptr = (struct aha1740_sg *) SCtmp->host_scribble; - if (SCtmp->use_sg) { - /* We used scatter-gather. - Do the unmapping dance. */ - dma_unmap_sg (&edev->dev, - (struct scatterlist *) SCtmp->request_buffer, - SCtmp->use_sg, - SCtmp->sc_data_direction); - } else { - dma_unmap_single (&edev->dev, - sgptr->buf_dma_addr, - SCtmp->request_bufflen, - DMA_BIDIRECTIONAL); - } + scsi_dma_unmap(SCtmp); /* Free the sg block */ dma_free_coherent (&edev->dev, @@ -349,11 +337,9 @@ unchar target = scmd_id(SCpnt); struct aha1740_hostdata *host = HOSTDATA(SCpnt->device->host); unsigned long flags; - void *buff = SCpnt->request_buffer; - int bufflen = SCpnt->request_bufflen; dma_addr_t sg_dma; struct aha1740_sg *sgptr; - int ecbno; + int ecbno, nseg; DEB(int i); if(*cmd == REQUEST_SENSE) { @@ -424,23 +410,22 @@ sgptr = (struct aha1740_sg *) SCpnt->host_scribble; sgptr->sg_dma_addr = sg_dma; - if (SCpnt->use_sg) { - struct scatterlist * sgpnt; + nseg = scsi_dma_map(SCpnt); + BUG_ON(nseg < 0); + if (nseg) { + struct scatterlist *sg; struct aha1740_chain * cptr; - int i, count; + int i; DEB(unsigned char * ptr); host->ecb[ecbno].sg = 1; /* SCSI Initiator Command * w/scatter-gather*/ - sgpnt = (struct scatterlist *) SCpnt->request_buffer; cptr = sgptr->sg_chain; - count = dma_map_sg (&host->edev->dev, sgpnt, SCpnt->use_sg, - SCpnt->sc_data_direction); - for(i=0; i < count; i++) { - cptr[i].datalen = sg_dma_len (sgpnt + i); - cptr[i].dataptr = sg_dma_address (sgpnt + i); + scsi_for_each_sg(SCpnt, sg, nseg, i) { + cptr[i].datalen = sg_dma_len (sg); + cptr[i].dataptr = sg_dma_address (sg); } - host->ecb[ecbno].datalen = count*sizeof(struct aha1740_chain); + host->ecb[ecbno].datalen = nseg * sizeof(struct aha1740_chain); host->ecb[ecbno].dataptr = sg_dma; #ifdef DEBUG printk("cptr %x: ",cptr); @@ -448,11 +433,8 @@ for(i=0;i<24;i++) printk("%02x ", ptr[i]); #endif } else { - host->ecb[ecbno].datalen = bufflen; - sgptr->buf_dma_addr = dma_map_single (&host->edev->dev, - buff, bufflen, - DMA_BIDIRECTIONAL); - host->ecb[ecbno].dataptr = sgptr->buf_dma_addr; + host->ecb[ecbno].datalen = 0; + host->ecb[ecbno].dataptr = 0; } host->ecb[ecbno].lun = SCpnt->device->lun; host->ecb[ecbno].ses = 1; /* Suppress underrun errors */ diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.c --- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.c 2007-12-21 15:36:12.000000000 -0500 @@ -376,21 +376,10 @@ ahd_linux_unmap_scb(struct ahd_softc *ahd, struct scb *scb) { struct scsi_cmnd *cmd; - int direction; cmd = scb->io_ctx; - direction = cmd->sc_data_direction; ahd_sync_sglist(ahd, scb, BUS_DMASYNC_POSTWRITE); - if (cmd->use_sg != 0) { - struct scatterlist *sg; - - sg = (struct scatterlist *)cmd->request_buffer; - pci_unmap_sg(ahd->dev_softc, sg, cmd->use_sg, direction); - } else if (cmd->request_bufflen != 0) { - pci_unmap_single(ahd->dev_softc, - scb->platform_data->buf_busaddr, - cmd->request_bufflen, direction); - } + scsi_dma_unmap(cmd); } /******************************** Macros **************************************/ @@ -1422,6 +1411,7 @@ u_int col_idx; uint16_t mask; unsigned long flags; + int nseg; ahd_lock(ahd, &flags); @@ -1494,18 +1484,17 @@ ahd_set_residual(scb, 0); ahd_set_sense_residual(scb, 0); scb->sg_count = 0; - if (cmd->use_sg != 0) { - void *sg; + + nseg = scsi_dma_map(cmd); + BUG_ON(nseg < 0); + if (nseg > 0) { + void *sg = scb->sg_list; struct scatterlist *cur_seg; - u_int nseg; - int dir; + int i; - cur_seg = (struct scatterlist *)cmd->request_buffer; - dir = cmd->sc_data_direction; - nseg = pci_map_sg(ahd->dev_softc, cur_seg, - cmd->use_sg, dir); scb->platform_data->xfer_len = 0; - for (sg = scb->sg_list; nseg > 0; nseg--, cur_seg++) { + + scsi_for_each_sg(cmd, cur_seg, nseg, i) { dma_addr_t addr; bus_size_t len; @@ -1513,22 +1502,8 @@ len = sg_dma_len(cur_seg); scb->platform_data->xfer_len += len; sg = ahd_sg_setup(ahd, scb, sg, addr, len, - /*last*/nseg == 1); + i == (nseg - 1)); } - } else if (cmd->request_bufflen != 0) { - void *sg; - dma_addr_t addr; - int dir; - - sg = scb->sg_list; - dir = cmd->sc_data_direction; - addr = pci_map_single(ahd->dev_softc, - cmd->request_buffer, - cmd->request_bufflen, dir); - scb->platform_data->xfer_len = cmd->request_bufflen; - scb->platform_data->buf_busaddr = addr; - sg = ahd_sg_setup(ahd, scb, sg, addr, - cmd->request_bufflen, /*last*/TRUE); } LIST_INSERT_HEAD(&ahd->pending_scbs, scb, pending_links); diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.h --- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.h 2007-12-21 15:36:12.000000000 -0500 @@ -781,7 +781,7 @@ static __inline void ahd_set_residual(struct scb *scb, u_long resid) { - scb->io_ctx->resid = resid; + scsi_set_resid(scb->io_ctx, resid); } static __inline @@ -793,7 +793,7 @@ static __inline u_long ahd_get_residual(struct scb *scb) { - return (scb->io_ctx->resid); + return scsi_get_resid(scb->io_ctx); } static __inline diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.c --- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.c 2007-12-21 15:36:12.000000000 -0500 @@ -402,18 +402,8 @@ cmd = scb->io_ctx; ahc_sync_sglist(ahc, scb, BUS_DMASYNC_POSTWRITE); - if (cmd->use_sg != 0) { - struct scatterlist *sg; - sg = (struct scatterlist *)cmd->request_buffer; - pci_unmap_sg(ahc->dev_softc, sg, cmd->use_sg, - cmd->sc_data_direction); - } else if (cmd->request_bufflen != 0) { - pci_unmap_single(ahc->dev_softc, - scb->platform_data->buf_busaddr, - cmd->request_bufflen, - cmd->sc_data_direction); - } + scsi_dma_unmap(cmd); } static __inline int @@ -1381,6 +1371,7 @@ struct ahc_tmode_tstate *tstate; uint16_t mask; struct scb_tailq *untagged_q = NULL; + int nseg; /* * Schedule us to run later. The only reason we are not @@ -1472,23 +1463,21 @@ ahc_set_residual(scb, 0); ahc_set_sense_residual(scb, 0); scb->sg_count = 0; - if (cmd->use_sg != 0) { + + nseg = scsi_dma_map(cmd); + BUG_ON(nseg < 0); + if (nseg > 0) { struct ahc_dma_seg *sg; struct scatterlist *cur_seg; - struct scatterlist *end_seg; - int nseg; + int i; - cur_seg = (struct scatterlist *)cmd->request_buffer; - nseg = pci_map_sg(ahc->dev_softc, cur_seg, cmd->use_sg, - cmd->sc_data_direction); - end_seg = cur_seg + nseg; /* Copy the segments into the SG list. */ sg = scb->sg_list; /* * The sg_count may be larger than nseg if * a transfer crosses a 32bit page. */ - while (cur_seg < end_seg) { + scsi_for_each_sg(cmd, cur_seg, nseg, i) { dma_addr_t addr; bus_size_t len; int consumed; @@ -1499,7 +1488,6 @@ sg, addr, len); sg += consumed; scb->sg_count += consumed; - cur_seg++; } sg--; sg->len |= ahc_htole32(AHC_DMA_LAST_SEG); @@ -1516,33 +1504,6 @@ */ scb->hscb->dataptr = scb->sg_list->addr; scb->hscb->datacnt = scb->sg_list->len; - } else if (cmd->request_bufflen != 0) { - struct ahc_dma_seg *sg; - dma_addr_t addr; - - sg = scb->sg_list; - addr = pci_map_single(ahc->dev_softc, - cmd->request_buffer, - cmd->request_bufflen, - cmd->sc_data_direction); - scb->platform_data->buf_busaddr = addr; - scb->sg_count = ahc_linux_map_seg(ahc, scb, - sg, addr, - cmd->request_bufflen); - sg->len |= ahc_htole32(AHC_DMA_LAST_SEG); - - /* - * Reset the sg list pointer. - */ - scb->hscb->sgptr = - ahc_htole32(scb->sg_list_phys | SG_FULL_RESID); - - /* - * Copy the first SG into the "current" - * data pointer area. - */ - scb->hscb->dataptr = sg->addr; - scb->hscb->datacnt = sg->len; } else { scb->hscb->sgptr = ahc_htole32(SG_LIST_NULL); scb->hscb->dataptr = 0; diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.h --- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.h 2007-12-21 15:36:12.000000000 -0500 @@ -751,7 +751,7 @@ static __inline void ahc_set_residual(struct scb *scb, u_long resid) { - scb->io_ctx->resid = resid; + scsi_set_resid(scb->io_ctx, resid); } static __inline @@ -763,7 +763,7 @@ static __inline u_long ahc_get_residual(struct scb *scb) { - return (scb->io_ctx->resid); + return scsi_get_resid(scb->io_ctx); } static __inline diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx_old.c linux-2.6.22-591/drivers/scsi/aic7xxx_old.c --- linux-2.6.22-570/drivers/scsi/aic7xxx_old.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/aic7xxx_old.c 2007-12-21 15:36:12.000000000 -0500 @@ -2690,17 +2690,8 @@ struct aic7xxx_scb *scbp; unsigned char queue_depth; - if (cmd->use_sg > 1) - { - struct scatterlist *sg; + scsi_dma_unmap(cmd); - sg = (struct scatterlist *)cmd->request_buffer; - pci_unmap_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction); - } - else if (cmd->request_bufflen) - pci_unmap_single(p->pdev, aic7xxx_mapping(cmd), - cmd->request_bufflen, - cmd->sc_data_direction); if (scb->flags & SCB_SENSE) { pci_unmap_single(p->pdev, @@ -3869,7 +3860,7 @@ * the mid layer didn't check residual data counts to see if the * command needs retried. */ - cmd->resid = scb->sg_length - actual; + scsi_set_resid(cmd, scb->sg_length - actual); aic7xxx_status(cmd) = hscb->target_status; } } @@ -10137,6 +10128,7 @@ struct scsi_device *sdptr = cmd->device; unsigned char tindex = TARGET_INDEX(cmd); struct request *req = cmd->request; + int use_sg; mask = (0x01 << tindex); hscb = scb->hscb; @@ -10209,8 +10201,10 @@ memcpy(scb->cmnd, cmd->cmnd, cmd->cmd_len); hscb->SCSI_cmd_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, scb->cmnd)); - if (cmd->use_sg) - { + use_sg = scsi_dma_map(cmd); + BUG_ON(use_sg < 0); + + if (use_sg) { struct scatterlist *sg; /* Must be mid-level SCSI code scatterlist */ /* @@ -10219,11 +10213,11 @@ * differences and the kernel SG list uses virtual addresses where * we need physical addresses. */ - int i, use_sg; + int i; - sg = (struct scatterlist *)cmd->request_buffer; scb->sg_length = 0; - use_sg = pci_map_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction); + + /* * Copy the segments into the SG array. NOTE!!! - We used to * have the first entry both in the data_pointer area and the first @@ -10231,10 +10225,9 @@ * entry in both places, but now we download the address of * scb->sg_list[1] instead of 0 to the sg pointer in the hscb. */ - for (i = 0; i < use_sg; i++) - { - unsigned int len = sg_dma_len(sg+i); - scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg+i)); + scsi_for_each_sg(cmd, sg, use_sg, i) { + unsigned int len = sg_dma_len(sg); + scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg)); scb->sg_list[i].length = cpu_to_le32(len); scb->sg_length += len; } @@ -10244,26 +10237,7 @@ scb->sg_count = i; hscb->SG_segment_count = i; hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[1])); - } - else - { - if (cmd->request_bufflen) - { - unsigned int address = pci_map_single(p->pdev, cmd->request_buffer, - cmd->request_bufflen, - cmd->sc_data_direction); - aic7xxx_mapping(cmd) = address; - scb->sg_list[0].address = cpu_to_le32(address); - scb->sg_list[0].length = cpu_to_le32(cmd->request_bufflen); - scb->sg_count = 1; - scb->sg_length = cmd->request_bufflen; - hscb->SG_segment_count = 1; - hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[0])); - hscb->data_count = scb->sg_list[0].length; - hscb->data_pointer = scb->sg_list[0].address; - } - else - { + } else { scb->sg_count = 0; scb->sg_length = 0; hscb->SG_segment_count = 0; @@ -10271,7 +10245,6 @@ hscb->data_count = 0; hscb->data_pointer = 0; } - } } /*+F************************************************************************* diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.c linux-2.6.22-591/drivers/scsi/amiga7xx.c --- linux-2.6.22-570/drivers/scsi/amiga7xx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/amiga7xx.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,138 +0,0 @@ -/* - * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux. - * Amiga MacroSystemUS WarpEngine SCSI controller. - * Amiga Technologies A4000T SCSI controller. - * Amiga Technologies/DKB A4091 SCSI controller. - * - * Written 1997 by Alan Hourihane - * plus modifications of the 53c7xx.c driver to support the Amiga. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "scsi.h" -#include -#include "53c7xx.h" -#include "amiga7xx.h" - - -static int amiga7xx_register_one(struct scsi_host_template *tpnt, - unsigned long address) -{ - long long options; - int clock; - - if (!request_mem_region(address, 0x1000, "ncr53c710")) - return 0; - - address = (unsigned long)z_ioremap(address, 0x1000); - options = OPTION_MEMORY_MAPPED | OPTION_DEBUG_TEST1 | OPTION_INTFLY | - OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS | - OPTION_DISCONNECT; - clock = 50000000; /* 50 MHz SCSI Clock */ - ncr53c7xx_init(tpnt, 0, 710, address, 0, IRQ_AMIGA_PORTS, DMA_NONE, - options, clock); - return 1; -} - - -#ifdef CONFIG_ZORRO - -static struct { - zorro_id id; - unsigned long offset; - int absolute; /* offset is absolute address */ -} amiga7xx_table[] = { - { .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS, .offset = 0xf40000, - .absolute = 1 }, - { .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx, .offset = 0x40000 }, - { .id = ZORRO_PROD_CBM_A4091_1, .offset = 0x800000 }, - { .id = ZORRO_PROD_CBM_A4091_2, .offset = 0x800000 }, - { .id = ZORRO_PROD_GVP_GFORCE_040_060, .offset = 0x40000 }, - { 0 } -}; - -static int __init amiga7xx_zorro_detect(struct scsi_host_template *tpnt) -{ - int num = 0, i; - struct zorro_dev *z = NULL; - unsigned long address; - - while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { - for (i = 0; amiga7xx_table[i].id; i++) - if (z->id == amiga7xx_table[i].id) - break; - if (!amiga7xx_table[i].id) - continue; - if (amiga7xx_table[i].absolute) - address = amiga7xx_table[i].offset; - else - address = z->resource.start + amiga7xx_table[i].offset; - num += amiga7xx_register_one(tpnt, address); - } - return num; -} - -#endif /* CONFIG_ZORRO */ - - -int __init amiga7xx_detect(struct scsi_host_template *tpnt) -{ - static unsigned char called = 0; - int num = 0; - - if (called || !MACH_IS_AMIGA) - return 0; - - tpnt->proc_name = "Amiga7xx"; - - if (AMIGAHW_PRESENT(A4000_SCSI)) - num += amiga7xx_register_one(tpnt, 0xdd0040); - -#ifdef CONFIG_ZORRO - num += amiga7xx_zorro_detect(tpnt); -#endif - - called = 1; - return num; -} - -static int amiga7xx_release(struct Scsi_Host *shost) -{ - if (shost->irq) - free_irq(shost->irq, NULL); - if (shost->dma_channel != 0xff) - free_dma(shost->dma_channel); - if (shost->io_port && shost->n_io_port) - release_region(shost->io_port, shost->n_io_port); - scsi_unregister(shost); - return 0; -} - -static struct scsi_host_template driver_template = { - .name = "Amiga NCR53c710 SCSI", - .detect = amiga7xx_detect, - .release = amiga7xx_release, - .queuecommand = NCR53c7xx_queue_command, - .abort = NCR53c7xx_abort, - .reset = NCR53c7xx_reset, - .can_queue = 24, - .this_id = 7, - .sg_tablesize = 63, - .cmd_per_lun = 3, - .use_clustering = DISABLE_CLUSTERING -}; - - -#include "scsi_module.c" diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.h linux-2.6.22-591/drivers/scsi/amiga7xx.h --- linux-2.6.22-570/drivers/scsi/amiga7xx.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/amiga7xx.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,23 +0,0 @@ -#ifndef AMIGA7XX_H - -#include - -int amiga7xx_detect(struct scsi_host_template *); -const char *NCR53c7x0_info(void); -int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int NCR53c7xx_abort(Scsi_Cmnd *); -int NCR53c7x0_release (struct Scsi_Host *); -int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int); -void NCR53c7x0_intr(int irq, void *dev_id); - -#ifndef CMD_PER_LUN -#define CMD_PER_LUN 3 -#endif - -#ifndef CAN_QUEUE -#define CAN_QUEUE 24 -#endif - -#include - -#endif /* AMIGA7XX_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr.h --- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr.h 2007-12-21 15:36:12.000000000 -0500 @@ -48,9 +48,10 @@ #define ARCMSR_MAX_OUTSTANDING_CMD 256 #define ARCMSR_MAX_FREECCB_NUM 288 -#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.13" +#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.14" #define ARCMSR_SCSI_INITIATOR_ID 255 #define ARCMSR_MAX_XFER_SECTORS 512 +#define ARCMSR_MAX_XFER_SECTORS_B 4096 #define ARCMSR_MAX_TARGETID 17 #define ARCMSR_MAX_TARGETLUN 8 #define ARCMSR_MAX_CMD_PERLUN ARCMSR_MAX_OUTSTANDING_CMD @@ -469,4 +470,3 @@ extern struct class_device_attribute *arcmsr_host_attrs[]; extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb); void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb); - diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_attr.c --- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_attr.c 2007-12-21 15:36:12.000000000 -0500 @@ -59,8 +59,9 @@ struct class_device_attribute *arcmsr_host_attrs[]; static ssize_t -arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off, - size_t count) +arcmsr_sysfs_iop_message_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *host = class_to_shost(cdev); @@ -105,8 +106,9 @@ } static ssize_t -arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off, - size_t count) +arcmsr_sysfs_iop_message_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *host = class_to_shost(cdev); @@ -152,8 +154,9 @@ } static ssize_t -arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off, - size_t count) +arcmsr_sysfs_iop_message_clear(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *host = class_to_shost(cdev); @@ -188,7 +191,6 @@ .attr = { .name = "mu_read", .mode = S_IRUSR , - .owner = THIS_MODULE, }, .size = 1032, .read = arcmsr_sysfs_iop_message_read, @@ -198,7 +200,6 @@ .attr = { .name = "mu_write", .mode = S_IWUSR, - .owner = THIS_MODULE, }, .size = 1032, .write = arcmsr_sysfs_iop_message_write, @@ -208,7 +209,6 @@ .attr = { .name = "mu_clear", .mode = S_IWUSR, - .owner = THIS_MODULE, }, .size = 1, .write = arcmsr_sysfs_iop_message_clear, diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_hba.c --- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_hba.c 2007-12-21 15:36:12.000000000 -0500 @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,7 @@ #include "arcmsr.h" MODULE_AUTHOR("Erich Chen "); -MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter"); +MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID HOST Adapter"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(ARCMSR_DRIVER_VERSION); @@ -93,7 +94,9 @@ static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb); static const char *arcmsr_info(struct Scsi_Host *); static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb); - +static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state); +static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev); static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth) { if (queue_depth > ARCMSR_MAX_CMD_PERLUN) @@ -104,7 +107,8 @@ static struct scsi_host_template arcmsr_scsi_host_template = { .module = THIS_MODULE, - .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, + .name = "ARCMSR ARECA SATA/SAS RAID HOST Adapter" + ARCMSR_DRIVER_VERSION, .info = arcmsr_info, .queuecommand = arcmsr_queue_command, .eh_abort_handler = arcmsr_abort, @@ -119,6 +123,10 @@ .use_clustering = ENABLE_CLUSTERING, .shost_attrs = arcmsr_host_attrs, }; +static struct pci_error_handlers arcmsr_pci_error_handlers = { + .error_detected = arcmsr_pci_error_detected, + .slot_reset = arcmsr_pci_slot_reset, +}; static struct pci_device_id arcmsr_device_id_table[] = { {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)}, @@ -144,7 +152,8 @@ .id_table = arcmsr_device_id_table, .probe = arcmsr_probe, .remove = arcmsr_remove, - .shutdown = arcmsr_shutdown + .shutdown = arcmsr_shutdown, + .err_handler = &arcmsr_pci_error_handlers, }; static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id) @@ -328,6 +337,8 @@ arcmsr_iop_init(acb); pci_set_drvdata(pdev, host); + if (strncmp(acb->firm_version, "V1.42", 5) >= 0) + host->max_sectors= ARCMSR_MAX_XFER_SECTORS_B; error = scsi_add_host(host, &pdev->dev); if (error) @@ -338,6 +349,7 @@ goto out_free_sysfs; scsi_scan_host(host); + pci_enable_pcie_error_reporting(pdev); return 0; out_free_sysfs: out_free_irq: @@ -369,19 +381,9 @@ static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb) { - struct AdapterControlBlock *acb = ccb->acb; struct scsi_cmnd *pcmd = ccb->pcmd; - if (pcmd->use_sg != 0) { - struct scatterlist *sl; - - sl = (struct scatterlist *)pcmd->request_buffer; - pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction); - } - else if (pcmd->request_bufflen != 0) - pci_unmap_single(acb->pdev, - pcmd->SCp.dma_handle, - pcmd->request_bufflen, pcmd->sc_data_direction); + scsi_dma_unmap(pcmd); } static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag) @@ -498,7 +500,7 @@ static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb) { - struct MessageUnit __iomem *reg=acb->pmu; + struct MessageUnit __iomem *reg = acb->pmu; writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, ®->inbound_msgaddr0); if (arcmsr_wait_msgint_ready(acb)) @@ -551,6 +553,7 @@ int8_t *psge = (int8_t *)&arcmsr_cdb->u; uint32_t address_lo, address_hi; int arccdbsize = 0x30; + int nseg; ccb->pcmd = pcmd; memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB)); @@ -561,20 +564,20 @@ arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len; arcmsr_cdb->Context = (unsigned long)arcmsr_cdb; memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len); - if (pcmd->use_sg) { - int length, sgcount, i, cdb_sgcount = 0; - struct scatterlist *sl; - - /* Get Scatter Gather List from scsiport. */ - sl = (struct scatterlist *) pcmd->request_buffer; - sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg, - pcmd->sc_data_direction); + + nseg = scsi_dma_map(pcmd); + BUG_ON(nseg < 0); + + if (nseg) { + int length, i, cdb_sgcount = 0; + struct scatterlist *sg; + /* map stor port SG list to our iop SG List. */ - for (i = 0; i < sgcount; i++) { + scsi_for_each_sg(pcmd, sg, nseg, i) { /* Get the physical address of the current data pointer */ - length = cpu_to_le32(sg_dma_len(sl)); - address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl))); - address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl))); + length = cpu_to_le32(sg_dma_len(sg)); + address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sg))); + address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sg))); if (address_hi == 0) { struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; @@ -591,32 +594,12 @@ psge += sizeof (struct SG64ENTRY); arccdbsize += sizeof (struct SG64ENTRY); } - sl++; cdb_sgcount++; } arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount; - arcmsr_cdb->DataLength = pcmd->request_bufflen; + arcmsr_cdb->DataLength = scsi_bufflen(pcmd); if ( arccdbsize > 256) arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE; - } else if (pcmd->request_bufflen) { - dma_addr_t dma_addr; - dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer, - pcmd->request_bufflen, pcmd->sc_data_direction); - pcmd->SCp.dma_handle = dma_addr; - address_lo = cpu_to_le32(dma_addr_lo32(dma_addr)); - address_hi = cpu_to_le32(dma_addr_hi32(dma_addr)); - if (address_hi == 0) { - struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; - pdma_sg->address = address_lo; - pdma_sg->length = pcmd->request_bufflen; - } else { - struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge; - pdma_sg->addresshigh = address_hi; - pdma_sg->address = address_lo; - pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR; - } - arcmsr_cdb->sgcount = 1; - arcmsr_cdb->DataLength = pcmd->request_bufflen; } if (pcmd->sc_data_direction == DMA_TO_DEVICE ) { arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE; @@ -758,20 +741,20 @@ (flag_ccb << 5)); if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) { if (ccb->startdone == ARCMSR_CCB_ABORTED) { - struct scsi_cmnd *abortcmd=ccb->pcmd; + struct scsi_cmnd *abortcmd = ccb->pcmd; if (abortcmd) { abortcmd->result |= DID_ABORT >> 16; arcmsr_ccb_complete(ccb, 1); printk(KERN_NOTICE - "arcmsr%d: ccb='0x%p' isr got aborted command \n" + "arcmsr%d: ccb ='0x%p' isr got aborted command \n" , acb->host->host_no, ccb); } continue; } printk(KERN_NOTICE - "arcmsr%d: isr get an illegal ccb command done acb='0x%p'" - "ccb='0x%p' ccbacb='0x%p' startdone = 0x%x" - " ccboutstandingcount=%d \n" + "arcmsr%d: isr get an illegal ccb command done acb = '0x%p'" + "ccb = '0x%p' ccbacb = '0x%p' startdone = 0x%x" + " ccboutstandingcount = %d \n" , acb->host->host_no , acb , ccb @@ -791,7 +774,7 @@ switch(ccb->arcmsr_cdb.DeviceStatus) { case ARCMSR_DEV_SELECT_TIMEOUT: { acb->devstate[id][lun] = ARECA_RAID_GONE; - ccb->pcmd->result = DID_TIME_OUT << 16; + ccb->pcmd->result = DID_NO_CONNECT << 16; arcmsr_ccb_complete(ccb, 1); } break; @@ -810,8 +793,8 @@ break; default: printk(KERN_NOTICE - "arcmsr%d: scsi id=%d lun=%d" - " isr get command error done," + "arcmsr%d: scsi id = %d lun = %d" + " isr get command error done, " "but got unknown DeviceStatus = 0x%x \n" , acb->host->host_no , id @@ -848,24 +831,21 @@ struct CMD_MESSAGE_FIELD *pcmdmessagefld; int retvalue = 0, transfer_len = 0; char *buffer; + struct scatterlist *sg; uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 | (uint32_t ) cmd->cmnd[6] << 16 | (uint32_t ) cmd->cmnd[7] << 8 | (uint32_t ) cmd->cmnd[8]; /* 4 bytes: Areca io control code */ - if (cmd->use_sg) { - struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer; + sg = scsi_sglist(cmd); buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; - if (cmd->use_sg > 1) { + if (scsi_sg_count(cmd) > 1) { retvalue = ARCMSR_MESSAGE_FAIL; goto message_out; } transfer_len += sg->length; - } else { - buffer = cmd->request_buffer; - transfer_len = cmd->request_bufflen; - } + if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) { retvalue = ARCMSR_MESSAGE_FAIL; goto message_out; @@ -1057,12 +1037,9 @@ retvalue = ARCMSR_MESSAGE_FAIL; } message_out: - if (cmd->use_sg) { - struct scatterlist *sg; - - sg = (struct scatterlist *) cmd->request_buffer; + sg = scsi_sglist(cmd); kunmap_atomic(buffer - sg->offset, KM_IRQ0); - } + return retvalue; } @@ -1085,6 +1062,7 @@ case INQUIRY: { unsigned char inqdata[36]; char *buffer; + struct scatterlist *sg; if (cmd->device->lun) { cmd->result = (DID_TIME_OUT << 16); @@ -1096,7 +1074,7 @@ inqdata[1] = 0; /* rem media bit & Dev Type Modifier */ inqdata[2] = 0; - /* ISO,ECMA,& ANSI versions */ + /* ISO, ECMA, & ANSI versions */ inqdata[4] = 31; /* length of additional data */ strncpy(&inqdata[8], "Areca ", 8); @@ -1104,21 +1082,14 @@ strncpy(&inqdata[16], "RAID controller ", 16); /* Product Identification */ strncpy(&inqdata[32], "R001", 4); /* Product Revision */ - if (cmd->use_sg) { - struct scatterlist *sg; - sg = (struct scatterlist *) cmd->request_buffer; + sg = scsi_sglist(cmd); buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; - } else { - buffer = cmd->request_buffer; - } - memcpy(buffer, inqdata, sizeof(inqdata)); - if (cmd->use_sg) { - struct scatterlist *sg; - sg = (struct scatterlist *) cmd->request_buffer; + memcpy(buffer, inqdata, sizeof(inqdata)); + sg = scsi_sglist(cmd); kunmap_atomic(buffer - sg->offset, KM_IRQ0); - } + cmd->scsi_done(cmd); } break; @@ -1153,7 +1124,7 @@ , acb->host->host_no); return SCSI_MLQUEUE_HOST_BUSY; } - if(target == 16) { + if (target == 16) { /* virtual device for iop message transfer */ arcmsr_handle_virtual_command(acb, cmd); return 0; @@ -1166,7 +1137,7 @@ printk(KERN_NOTICE "arcmsr%d: block 'read/write'" "command with gone raid volume" - " Cmd=%2x, TargetId=%d, Lun=%d \n" + " Cmd = %2x, TargetId = %d, Lun = %d \n" , acb->host->host_no , cmd->cmnd[0] , target, lun); @@ -1257,7 +1228,7 @@ if ((ccb->startdone == ARCMSR_CCB_ABORTED) || (ccb == poll_ccb)) { printk(KERN_NOTICE - "arcmsr%d: scsi id=%d lun=%d ccb='0x%p'" + "arcmsr%d: scsi id = %d lun = %d ccb = '0x%p'" " poll command abort successfully \n" , acb->host->host_no , ccb->pcmd->device->id @@ -1270,8 +1241,8 @@ } printk(KERN_NOTICE "arcmsr%d: polling get an illegal ccb" - " command done ccb='0x%p'" - "ccboutstandingcount=%d \n" + " command done ccb ='0x%p'" + "ccboutstandingcount = %d \n" , acb->host->host_no , ccb , atomic_read(&acb->ccboutstandingcount)); @@ -1288,7 +1259,7 @@ switch(ccb->arcmsr_cdb.DeviceStatus) { case ARCMSR_DEV_SELECT_TIMEOUT: { acb->devstate[id][lun] = ARECA_RAID_GONE; - ccb->pcmd->result = DID_TIME_OUT << 16; + ccb->pcmd->result = DID_NO_CONNECT << 16; arcmsr_ccb_complete(ccb, 1); } break; @@ -1307,7 +1278,7 @@ break; default: printk(KERN_NOTICE - "arcmsr%d: scsi id=%d lun=%d" + "arcmsr%d: scsi id = %d lun = %d" " polling and getting command error done" "but got unknown DeviceStatus = 0x%x \n" , acb->host->host_no @@ -1322,6 +1293,94 @@ } } } +static void arcmsr_done4_abort_postqueue(struct AdapterControlBlock *acb) +{ + int i = 0, found = 0; + int id, lun; + uint32_t flag_ccb, outbound_intstatus; + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + /*clear and abort all outbound posted Q*/ + + while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) && +(i++ < 256)){ + ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + +(flag_ccb << 5)); + if (ccb){ + if ((ccb->acb != acb)||(ccb->startdone != \ +ARCMSR_CCB_START)){ + printk(KERN_NOTICE "arcmsr%d: polling get \ +an illegal ccb" "command done ccb = '0x%p'""ccboutstandingcount = %d \n", + acb->host->host_no, ccb, + atomic_read(&acb->ccboutstandingcount)); + continue; + } + + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)){ + if (acb->devstate[id][lun] == ARECA_RAID_GONE) + acb->devstate[id][lun] = ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } + else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + + case ARCMSR_DEV_ABORTED: + + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = + ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + + default: + printk(KERN_NOTICE + "arcmsr%d: scsi id = %d \ + lun = %d""polling and \ + getting command error \ + done""but got unknown \ + DeviceStatus = 0x%x \n", + acb->host->host_no, id, + lun, ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + found = 1; + } + } + if (found){ + outbound_intstatus = readl(®->outbound_intstatus) & \ + acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus); + /*clear interrupt*/ + } + return; +} + static void arcmsr_iop_init(struct AdapterControlBlock *acb) { @@ -1355,7 +1414,6 @@ static void arcmsr_iop_reset(struct AdapterControlBlock *acb) { - struct MessageUnit __iomem *reg = acb->pmu; struct CommandControlBlock *ccb; uint32_t intmask_org; int i = 0; @@ -1368,21 +1426,17 @@ /* disable all outbound interrupt */ intmask_org = arcmsr_disable_outbound_ints(acb); /* clear all outbound posted Q */ - for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++) - readl(®->outbound_queueport); + arcmsr_done4_abort_postqueue(acb); for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { ccb = acb->pccb_pool[i]; - if ((ccb->startdone == ARCMSR_CCB_START) || - (ccb->startdone == ARCMSR_CCB_ABORTED)) { + if (ccb->startdone == ARCMSR_CCB_START) { ccb->startdone = ARCMSR_CCB_ABORTED; - ccb->pcmd->result = DID_ABORT << 16; - arcmsr_ccb_complete(ccb, 1); } } /* enable all outbound interrupt */ arcmsr_enable_outbound_ints(acb, intmask_org); } - atomic_set(&acb->ccboutstandingcount, 0); + } static int arcmsr_bus_reset(struct scsi_cmnd *cmd) @@ -1428,10 +1482,9 @@ int i = 0; printk(KERN_NOTICE - "arcmsr%d: abort device command of scsi id=%d lun=%d \n", + "arcmsr%d: abort device command of scsi id = %d lun = %d \n", acb->host->host_no, cmd->device->id, cmd->device->lun); acb->num_aborts++; - /* ************************************************ ** the all interrupt service routine is locked @@ -1492,4 +1545,300 @@ return buf; } +static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev) +{ + struct Scsi_Host *host; + struct AdapterControlBlock *acb; + uint8_t bus, dev_fun; + int error; + + error = pci_enable_device(pdev); + if (error) + return PCI_ERS_RESULT_DISCONNECT; + pci_set_master(pdev); + + host = scsi_host_alloc(&arcmsr_scsi_host_template, sizeof \ +(struct AdapterControlBlock)); + if (!host) + return PCI_ERS_RESULT_DISCONNECT; + acb = (struct AdapterControlBlock *)host->hostdata; + memset(acb, 0, sizeof (struct AdapterControlBlock)); + + error = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + if (error) { + error = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (error) { + printk(KERN_WARNING + "scsi%d: No suitable DMA mask available\n", + host->host_no); + return PCI_ERS_RESULT_DISCONNECT; + } + } + bus = pdev->bus->number; + dev_fun = pdev->devfn; + acb = (struct AdapterControlBlock *) host->hostdata; + memset(acb, 0, sizeof(struct AdapterControlBlock)); + acb->pdev = pdev; + acb->host = host; + host->max_sectors = ARCMSR_MAX_XFER_SECTORS; + host->max_lun = ARCMSR_MAX_TARGETLUN; + host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/ + host->max_cmd_len = 16; /*this is issue of 64bit LBA, over 2T byte*/ + host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES; + host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */ + host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN; + host->this_id = ARCMSR_SCSI_INITIATOR_ID; + host->unique_id = (bus << 8) | dev_fun; + host->irq = pdev->irq; + error = pci_request_regions(pdev, "arcmsr"); + if (error) + return PCI_ERS_RESULT_DISCONNECT; + acb->pmu = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!acb->pmu) { + printk(KERN_NOTICE "arcmsr%d: memory" + " mapping region fail \n", acb->host->host_no); + return PCI_ERS_RESULT_DISCONNECT; + } + acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED | + ACB_F_MESSAGE_RQBUFFER_CLEARED | + ACB_F_MESSAGE_WQBUFFER_READED); + acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER; + INIT_LIST_HEAD(&acb->ccb_free_list); + + error = arcmsr_alloc_ccb_pool(acb); + if (error) + return PCI_ERS_RESULT_DISCONNECT; + + error = request_irq(pdev->irq, arcmsr_do_interrupt, + IRQF_DISABLED | IRQF_SHARED, "arcmsr", acb); + if (error) + return PCI_ERS_RESULT_DISCONNECT; + + arcmsr_iop_init(acb); + if (strncmp(acb->firm_version, "V1.42", 5) >= 0) + host->max_sectors = ARCMSR_MAX_XFER_SECTORS_B; + + pci_set_drvdata(pdev, host); + + error = scsi_add_host(host, &pdev->dev); + if (error) + return PCI_ERS_RESULT_DISCONNECT; + + error = arcmsr_alloc_sysfs_attr(acb); + if (error) + return PCI_ERS_RESULT_DISCONNECT; + + scsi_scan_host(host); + return PCI_ERS_RESULT_RECOVERED; +} + +static void arcmsr_pci_ers_need_reset_forepart(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + /*clear and abort all outbound posted Q*/ + int i = 0, found = 0; + int id, lun; + uint32_t flag_ccb, outbound_intstatus; + + while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) && + (i++ < 256)){ + ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + + (flag_ccb << 5)); + if (ccb){ + if ((ccb->acb != acb)||(ccb->startdone != + ARCMSR_CCB_START)){ + printk(KERN_NOTICE "arcmsr%d: polling \ + get an illegal ccb"" command done ccb = '0x%p'" + "ccboutstandingcount = %d \n", + acb->host->host_no, ccb, + atomic_read(&acb->ccboutstandingcount)); + continue; + } + + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { + if (acb->devstate[id][lun] == + ARECA_RAID_GONE) + acb->devstate[id][lun] = + ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } + else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + + case ARCMSR_DEV_ABORTED: + + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = + ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + + default: + printk(KERN_NOTICE + "arcmsr%d: scsi \ + id = %d lun = %d" + " polling and \ + getting command \ + error done" + "but got unknown \ + DeviceStatus = 0x%x \n" + , acb->host->host_no, + id, lun, + ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + found = 1; + } + } + if (found){ + outbound_intstatus = readl(®->outbound_intstatus) & + acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus); + /*clear interrupt*/ + } + return; +} + + +static void arcmsr_pci_ers_disconnect_forepart(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + /*clear and abort all outbound posted Q*/ + int i = 0, found = 0; + int id, lun; + uint32_t flag_ccb, outbound_intstatus; + + while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) && + (i++ < 256)){ + ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + + (flag_ccb << 5)); + if (ccb){ + if ((ccb->acb != acb)||(ccb->startdone != + ARCMSR_CCB_START)){ + printk(KERN_NOTICE + "arcmsr%d: polling get an illegal ccb" + " command done ccb = '0x%p'" + "ccboutstandingcount = %d \n", + acb->host->host_no, ccb, + atomic_read(&acb->ccboutstandingcount)); + continue; + } + + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { + if (acb->devstate[id][lun] == ARECA_RAID_GONE) + acb->devstate[id][lun] = ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } + else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + + case ARCMSR_DEV_ABORTED: + + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = + ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + + default: + printk(KERN_NOTICE "arcmsr%d: \ + scsi id = %d lun = %d" + " polling and \ + getting command error done" + "but got unknown \ + DeviceStatus = 0x%x \n" + , acb->host->host_no, + id, lun, ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = + ARECA_RAID_GONE; + ccb->pcmd->result = + DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + found = 1; + } + } + if (found){ + outbound_intstatus = readl(®->outbound_intstatus) & + acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus); + /*clear interrupt*/ + } + return; +} + +static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + switch (state) { + case pci_channel_io_frozen: + arcmsr_pci_ers_need_reset_forepart(pdev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + arcmsr_pci_ers_disconnect_forepart(pdev); + return PCI_ERS_RESULT_DISCONNECT; + break; + default: + return PCI_ERS_RESULT_NEED_RESET; + } +} diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.c linux-2.6.22-591/drivers/scsi/bvme6000.c --- linux-2.6.22-570/drivers/scsi/bvme6000.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/bvme6000.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,76 +0,0 @@ -/* - * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux. - * - * Based on work by Alan Hourihane - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "scsi.h" -#include -#include "53c7xx.h" -#include "bvme6000.h" - -#include - - -int bvme6000_scsi_detect(struct scsi_host_template *tpnt) -{ - static unsigned char called = 0; - int clock; - long long options; - - if (called) - return 0; - if (!MACH_IS_BVME6000) - return 0; - - tpnt->proc_name = "BVME6000"; - - options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT; - - clock = 40000000; /* 66MHz SCSI Clock */ - - ncr53c7xx_init(tpnt, 0, 710, (unsigned long)BVME_NCR53C710_BASE, - 0, BVME_IRQ_SCSI, DMA_NONE, - options, clock); - called = 1; - return 1; -} - -static int bvme6000_scsi_release(struct Scsi_Host *shost) -{ - if (shost->irq) - free_irq(shost->irq, NULL); - if (shost->dma_channel != 0xff) - free_dma(shost->dma_channel); - if (shost->io_port && shost->n_io_port) - release_region(shost->io_port, shost->n_io_port); - scsi_unregister(shost); - return 0; -} - -static struct scsi_host_template driver_template = { - .name = "BVME6000 NCR53c710 SCSI", - .detect = bvme6000_scsi_detect, - .release = bvme6000_scsi_release, - .queuecommand = NCR53c7xx_queue_command, - .abort = NCR53c7xx_abort, - .reset = NCR53c7xx_reset, - .can_queue = 24, - .this_id = 7, - .sg_tablesize = 63, - .cmd_per_lun = 3, - .use_clustering = DISABLE_CLUSTERING -}; - - -#include "scsi_module.c" diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.h linux-2.6.22-591/drivers/scsi/bvme6000.h --- linux-2.6.22-570/drivers/scsi/bvme6000.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/bvme6000.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,24 +0,0 @@ -#ifndef BVME6000_SCSI_H -#define BVME6000_SCSI_H - -#include - -int bvme6000_scsi_detect(struct scsi_host_template *); -const char *NCR53c7x0_info(void); -int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int NCR53c7xx_abort(Scsi_Cmnd *); -int NCR53c7x0_release (struct Scsi_Host *); -int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int); -void NCR53c7x0_intr(int irq, void *dev_id); - -#ifndef CMD_PER_LUN -#define CMD_PER_LUN 3 -#endif - -#ifndef CAN_QUEUE -#define CAN_QUEUE 24 -#endif - -#include - -#endif /* BVME6000_SCSI_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c linux-2.6.22-591/drivers/scsi/bvme6000_scsi.c --- linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/bvme6000_scsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,135 @@ +/* + * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux. + * + * Based on work by Alan Hourihane and Kars de Jong + * + * Rewritten to use 53c700.c by Richard Hirst + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "53c700.h" + +MODULE_AUTHOR("Richard Hirst "); +MODULE_DESCRIPTION("BVME6000 NCR53C710 driver"); +MODULE_LICENSE("GPL"); + +static struct scsi_host_template bvme6000_scsi_driver_template = { + .name = "BVME6000 NCR53c710 SCSI", + .proc_name = "BVME6000", + .this_id = 7, + .module = THIS_MODULE, +}; + +static struct platform_device *bvme6000_scsi_device; + +static __devinit int +bvme6000_probe(struct device *dev) +{ + struct Scsi_Host * host = NULL; + struct NCR_700_Host_Parameters *hostdata; + + if (!MACH_IS_BVME6000) + goto out; + + hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); + if (hostdata == NULL) { + printk(KERN_ERR "bvme6000-scsi: " + "Failed to allocate host data\n"); + goto out; + } + memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); + + /* Fill in the required pieces of hostdata */ + hostdata->base = (void __iomem *)BVME_NCR53C710_BASE; + hostdata->clock = 40; /* XXX - depends on the CPU clock! */ + hostdata->chip710 = 1; + hostdata->dmode_extra = DMODE_FC2; + hostdata->dcntl_extra = EA_710; + hostdata->ctest7_extra = CTEST7_TT1; + + /* and register the chip */ + host = NCR_700_detect(&bvme6000_scsi_driver_template, hostdata, dev); + if (!host) { + printk(KERN_ERR "bvme6000-scsi: No host detected; " + "board configuration problem?\n"); + goto out_free; + } + host->base = BVME_NCR53C710_BASE; + host->this_id = 7; + host->irq = BVME_IRQ_SCSI; + if (request_irq(BVME_IRQ_SCSI, NCR_700_intr, 0, "bvme6000-scsi", + host)) { + printk(KERN_ERR "bvme6000-scsi: request_irq failed\n"); + goto out_put_host; + } + + scsi_scan_host(host); + + return 0; + + out_put_host: + scsi_host_put(host); + out_free: + kfree(hostdata); + out: + return -ENODEV; +} + +static __devexit int +bvme6000_device_remove(struct device *dev) +{ + struct Scsi_Host *host = dev_to_shost(dev); + struct NCR_700_Host_Parameters *hostdata = shost_priv(host); + + scsi_remove_host(host); + NCR_700_release(host); + kfree(hostdata); + free_irq(host->irq, host); + + return 0; +} + +static struct device_driver bvme6000_scsi_driver = { + .name = "bvme6000-scsi", + .bus = &platform_bus_type, + .probe = bvme6000_probe, + .remove = __devexit_p(bvme6000_device_remove), +}; + +static int __init bvme6000_scsi_init(void) +{ + int err; + + err = driver_register(&bvme6000_scsi_driver); + if (err) + return err; + + bvme6000_scsi_device = platform_device_register_simple("bvme6000-scsi", + -1, NULL, 0); + if (IS_ERR(bvme6000_scsi_device)) { + driver_unregister(&bvme6000_scsi_driver); + return PTR_ERR(bvme6000_scsi_device); + } + + return 0; +} + +static void __exit bvme6000_scsi_exit(void) +{ + platform_device_unregister(bvme6000_scsi_device); + driver_unregister(&bvme6000_scsi_driver); +} + +module_init(bvme6000_scsi_init); +module_exit(bvme6000_scsi_exit); diff -Nurb linux-2.6.22-570/drivers/scsi/dpt_i2o.c linux-2.6.22-591/drivers/scsi/dpt_i2o.c --- linux-2.6.22-570/drivers/scsi/dpt_i2o.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/dpt_i2o.c 2007-12-21 15:36:12.000000000 -0500 @@ -2078,12 +2078,13 @@ u32 *lenptr; int direction; int scsidir; + int nseg; u32 len; u32 reqlen; s32 rcode; memset(msg, 0 , sizeof(msg)); - len = cmd->request_bufflen; + len = scsi_bufflen(cmd); direction = 0x00000000; scsidir = 0x00000000; // DATA NO XFER @@ -2140,21 +2141,21 @@ lenptr=mptr++; /* Remember me - fill in when we know */ reqlen = 14; // SINGLE SGE /* Now fill in the SGList and command */ - if(cmd->use_sg) { - struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer; - int sg_count = pci_map_sg(pHba->pDev, sg, cmd->use_sg, - cmd->sc_data_direction); + nseg = scsi_dma_map(cmd); + BUG_ON(nseg < 0); + if (nseg) { + struct scatterlist *sg; len = 0; - for(i = 0 ; i < sg_count; i++) { + scsi_for_each_sg(cmd, sg, nseg, i) { *mptr++ = direction|0x10000000|sg_dma_len(sg); len+=sg_dma_len(sg); *mptr++ = sg_dma_address(sg); - sg++; - } /* Make this an end of list */ - mptr[-2] = direction|0xD0000000|sg_dma_len(sg-1); + if (i == nseg - 1) + mptr[-2] = direction|0xD0000000|sg_dma_len(sg); + } reqlen = mptr - msg; *lenptr = len; @@ -2163,16 +2164,8 @@ len, cmd->underflow); } } else { - *lenptr = len = cmd->request_bufflen; - if(len == 0) { + *lenptr = len = 0; reqlen = 12; - } else { - *mptr++ = 0xD0000000|direction|cmd->request_bufflen; - *mptr++ = pci_map_single(pHba->pDev, - cmd->request_buffer, - cmd->request_bufflen, - cmd->sc_data_direction); - } } /* Stick the headers on */ @@ -2232,7 +2225,7 @@ hba_status = detailed_status >> 8; // calculate resid for sg - cmd->resid = cmd->request_bufflen - readl(reply+5); + scsi_set_resid(cmd, scsi_bufflen(cmd) - readl(reply+5)); pHba = (adpt_hba*) cmd->device->host->hostdata[0]; diff -Nurb linux-2.6.22-570/drivers/scsi/eata.c linux-2.6.22-591/drivers/scsi/eata.c --- linux-2.6.22-570/drivers/scsi/eata.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/eata.c 2007-12-21 15:36:12.000000000 -0500 @@ -1609,8 +1609,9 @@ static void map_dma(unsigned int i, struct hostdata *ha) { - unsigned int k, count, pci_dir; - struct scatterlist *sgpnt; + unsigned int k, pci_dir; + int count; + struct scatterlist *sg; struct mscp *cpp; struct scsi_cmnd *SCpnt; @@ -1625,38 +1626,19 @@ cpp->sense_len = sizeof SCpnt->sense_buffer; - if (!SCpnt->use_sg) { - - /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */ - if (!SCpnt->request_bufflen) - pci_dir = PCI_DMA_BIDIRECTIONAL; - - if (SCpnt->request_buffer) - cpp->data_address = H2DEV(pci_map_single(ha->pdev, - SCpnt-> - request_buffer, - SCpnt-> - request_bufflen, - pci_dir)); - - cpp->data_len = H2DEV(SCpnt->request_bufflen); - return; - } - - sgpnt = (struct scatterlist *)SCpnt->request_buffer; - count = pci_map_sg(ha->pdev, sgpnt, SCpnt->use_sg, pci_dir); - - for (k = 0; k < count; k++) { - cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k])); - cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k])); + count = scsi_dma_map(SCpnt); + BUG_ON(count < 0); + scsi_for_each_sg(SCpnt, sg, count, k) { + cpp->sglist[k].address = H2DEV(sg_dma_address(sg)); + cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg)); } cpp->sg = 1; cpp->data_address = H2DEV(pci_map_single(ha->pdev, cpp->sglist, - SCpnt->use_sg * + scsi_sg_count(SCpnt) * sizeof(struct sg_list), pci_dir)); - cpp->data_len = H2DEV((SCpnt->use_sg * sizeof(struct sg_list))); + cpp->data_len = H2DEV((scsi_sg_count(SCpnt) * sizeof(struct sg_list))); } static void unmap_dma(unsigned int i, struct hostdata *ha) @@ -1673,9 +1655,7 @@ pci_unmap_single(ha->pdev, DEV2H(cpp->sense_addr), DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); - if (SCpnt->use_sg) - pci_unmap_sg(ha->pdev, SCpnt->request_buffer, SCpnt->use_sg, - pci_dir); + scsi_dma_unmap(SCpnt); if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL; @@ -1700,9 +1680,9 @@ DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); - if (SCpnt->use_sg) - pci_dma_sync_sg_for_cpu(ha->pdev, SCpnt->request_buffer, - SCpnt->use_sg, pci_dir); + if (scsi_sg_count(SCpnt)) + pci_dma_sync_sg_for_cpu(ha->pdev, scsi_sglist(SCpnt), + scsi_sg_count(SCpnt), pci_dir); if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL; diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.c linux-2.6.22-591/drivers/scsi/esp_scsi.c --- linux-2.6.22-570/drivers/scsi/esp_scsi.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/esp_scsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -324,17 +324,14 @@ static void esp_map_dma(struct esp *esp, struct scsi_cmnd *cmd) { struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd); - struct scatterlist *sg = cmd->request_buffer; + struct scatterlist *sg = scsi_sglist(cmd); int dir = cmd->sc_data_direction; int total, i; if (dir == DMA_NONE) return; - BUG_ON(cmd->use_sg == 0); - - spriv->u.num_sg = esp->ops->map_sg(esp, sg, - cmd->use_sg, dir); + spriv->u.num_sg = esp->ops->map_sg(esp, sg, scsi_sg_count(cmd), dir); spriv->cur_residue = sg_dma_len(sg); spriv->cur_sg = sg; @@ -407,8 +404,7 @@ if (dir == DMA_NONE) return; - esp->ops->unmap_sg(esp, cmd->request_buffer, - spriv->u.num_sg, dir); + esp->ops->unmap_sg(esp, scsi_sglist(cmd), spriv->u.num_sg, dir); } static void esp_save_pointers(struct esp *esp, struct esp_cmd_entry *ent) @@ -921,7 +917,7 @@ static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { struct scsi_device *dev = cmd->device; - struct esp *esp = host_to_esp(dev->host); + struct esp *esp = shost_priv(dev->host); struct esp_cmd_priv *spriv; struct esp_cmd_entry *ent; @@ -2358,7 +2354,7 @@ static int esp_slave_alloc(struct scsi_device *dev) { - struct esp *esp = host_to_esp(dev->host); + struct esp *esp = shost_priv(dev->host); struct esp_target_data *tp = &esp->target[dev->id]; struct esp_lun_data *lp; @@ -2382,7 +2378,7 @@ static int esp_slave_configure(struct scsi_device *dev) { - struct esp *esp = host_to_esp(dev->host); + struct esp *esp = shost_priv(dev->host); struct esp_target_data *tp = &esp->target[dev->id]; int goal_tags, queue_depth; @@ -2424,7 +2420,7 @@ static int esp_eh_abort_handler(struct scsi_cmnd *cmd) { - struct esp *esp = host_to_esp(cmd->device->host); + struct esp *esp = shost_priv(cmd->device->host); struct esp_cmd_entry *ent, *tmp; struct completion eh_done; unsigned long flags; @@ -2540,7 +2536,7 @@ static int esp_eh_bus_reset_handler(struct scsi_cmnd *cmd) { - struct esp *esp = host_to_esp(cmd->device->host); + struct esp *esp = shost_priv(cmd->device->host); struct completion eh_reset; unsigned long flags; @@ -2576,7 +2572,7 @@ /* All bets are off, reset the entire device. */ static int esp_eh_host_reset_handler(struct scsi_cmnd *cmd) { - struct esp *esp = host_to_esp(cmd->device->host); + struct esp *esp = shost_priv(cmd->device->host); unsigned long flags; spin_lock_irqsave(esp->host->host_lock, flags); @@ -2616,7 +2612,7 @@ static void esp_get_signalling(struct Scsi_Host *host) { - struct esp *esp = host_to_esp(host); + struct esp *esp = shost_priv(host); enum spi_signal_type type; if (esp->flags & ESP_FLAG_DIFFERENTIAL) @@ -2630,7 +2626,7 @@ static void esp_set_offset(struct scsi_target *target, int offset) { struct Scsi_Host *host = dev_to_shost(target->dev.parent); - struct esp *esp = host_to_esp(host); + struct esp *esp = shost_priv(host); struct esp_target_data *tp = &esp->target[target->id]; tp->nego_goal_offset = offset; @@ -2640,7 +2636,7 @@ static void esp_set_period(struct scsi_target *target, int period) { struct Scsi_Host *host = dev_to_shost(target->dev.parent); - struct esp *esp = host_to_esp(host); + struct esp *esp = shost_priv(host); struct esp_target_data *tp = &esp->target[target->id]; tp->nego_goal_period = period; @@ -2650,7 +2646,7 @@ static void esp_set_width(struct scsi_target *target, int width) { struct Scsi_Host *host = dev_to_shost(target->dev.parent); - struct esp *esp = host_to_esp(host); + struct esp *esp = shost_priv(host); struct esp_target_data *tp = &esp->target[target->id]; tp->nego_goal_width = (width ? 1 : 0); diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.h linux-2.6.22-591/drivers/scsi/esp_scsi.h --- linux-2.6.22-570/drivers/scsi/esp_scsi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/esp_scsi.h 2007-12-21 15:36:12.000000000 -0500 @@ -517,8 +517,6 @@ struct sbus_dma *dma; }; -#define host_to_esp(host) ((struct esp *)(host)->hostdata) - /* A front-end driver for the ESP chip should do the following in * it's device probe routine: * 1) Allocate the host and private area using scsi_host_alloc() diff -Nurb linux-2.6.22-570/drivers/scsi/fdomain.c linux-2.6.22-591/drivers/scsi/fdomain.c --- linux-2.6.22-570/drivers/scsi/fdomain.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/fdomain.c 2007-12-21 15:36:12.000000000 -0500 @@ -410,6 +410,8 @@ static char * fdomain = NULL; module_param(fdomain, charp, 0); +#ifndef PCMCIA + static unsigned long addresses[] = { 0xc8000, 0xca000, @@ -426,6 +428,8 @@ static unsigned short ints[] = { 3, 5, 10, 11, 12, 14, 15, 0 }; +#endif /* !PCMCIA */ + /* READ THIS BEFORE YOU ADD A SIGNATURE! @@ -458,6 +462,8 @@ */ +#ifndef PCMCIA + static struct signature { const char *signature; int sig_offset; @@ -503,6 +509,8 @@ #define SIGNATURE_COUNT ARRAY_SIZE(signatures) +#endif /* !PCMCIA */ + static void print_banner( struct Scsi_Host *shpnt ) { if (!shpnt) return; /* This won't ever happen */ @@ -633,6 +641,8 @@ return 0; } +#ifndef PCMCIA + /* fdomain_get_irq assumes that we have a valid MCA ID for a TMC-1660/TMC-1680 Future Domain board. Now, check to be sure the bios_base matches these ports. If someone was unlucky enough to have @@ -667,7 +677,6 @@ static int fdomain_isa_detect( int *irq, int *iobase ) { -#ifndef PCMCIA int i, j; int base = 0xdeadbeef; int flag = 0; @@ -786,11 +795,22 @@ *iobase = base; return 1; /* success */ -#else +} + +#else /* PCMCIA */ + +static int fdomain_isa_detect( int *irq, int *iobase ) +{ + if (irq) + *irq = 0; + if (iobase) + *iobase = 0; return 0; -#endif } +#endif /* !PCMCIA */ + + /* PCI detection function: int fdomain_pci_bios_detect(int* irq, int* iobase) This function gets the Interrupt Level and I/O base address from the PCI configuration registers. */ @@ -1345,16 +1365,15 @@ #if ERRORS_ONLY if (current_SC->cmnd[0] == REQUEST_SENSE && !current_SC->SCp.Status) { - if ((unsigned char)(*((char *)current_SC->request_buffer+2)) & 0x0f) { + char *buf = scsi_sglist(current_SC); + if ((unsigned char)(*(buf + 2)) & 0x0f) { unsigned char key; unsigned char code; unsigned char qualifier; - key = (unsigned char)(*((char *)current_SC->request_buffer + 2)) - & 0x0f; - code = (unsigned char)(*((char *)current_SC->request_buffer + 12)); - qualifier = (unsigned char)(*((char *)current_SC->request_buffer - + 13)); + key = (unsigned char)(*(buf + 2)) & 0x0f; + code = (unsigned char)(*(buf + 12)); + qualifier = (unsigned char)(*(buf + 13)); if (key != UNIT_ATTENTION && !(key == NOT_READY @@ -1405,8 +1424,8 @@ printk( "queue: target = %d cmnd = 0x%02x pieces = %d size = %u\n", SCpnt->target, *(unsigned char *)SCpnt->cmnd, - SCpnt->use_sg, - SCpnt->request_bufflen ); + scsi_sg_count(SCpnt), + scsi_bufflen(SCpnt)); #endif fdomain_make_bus_idle(); @@ -1416,20 +1435,19 @@ /* Initialize static data */ - if (current_SC->use_sg) { - current_SC->SCp.buffer = - (struct scatterlist *)current_SC->request_buffer; - current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; + if (scsi_sg_count(current_SC)) { + current_SC->SCp.buffer = scsi_sglist(current_SC); + current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + + current_SC->SCp.buffer->offset; current_SC->SCp.this_residual = current_SC->SCp.buffer->length; - current_SC->SCp.buffers_residual = current_SC->use_sg - 1; + current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1; } else { - current_SC->SCp.ptr = (char *)current_SC->request_buffer; - current_SC->SCp.this_residual = current_SC->request_bufflen; + current_SC->SCp.ptr = 0; + current_SC->SCp.this_residual = 0; current_SC->SCp.buffer = NULL; current_SC->SCp.buffers_residual = 0; } - current_SC->SCp.Status = 0; current_SC->SCp.Message = 0; current_SC->SCp.have_data_in = 0; @@ -1472,8 +1490,8 @@ SCpnt->SCp.phase, SCpnt->device->id, *(unsigned char *)SCpnt->cmnd, - SCpnt->use_sg, - SCpnt->request_bufflen ); + scsi_sg_count(SCpnt), + scsi_bufflen(SCpnt)); printk( "sent_command = %d, have_data_in = %d, timeout = %d\n", SCpnt->SCp.sent_command, SCpnt->SCp.have_data_in, diff -Nurb linux-2.6.22-570/drivers/scsi/gdth.c linux-2.6.22-591/drivers/scsi/gdth.c --- linux-2.6.22-570/drivers/scsi/gdth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/gdth.c 2007-12-21 15:36:12.000000000 -0500 @@ -876,7 +876,7 @@ /* Vortex only makes RAID controllers. * We do not really want to specify all 550 ids here, so wildcard match. */ -static struct pci_device_id gdthtable[] __attribute_used__ = { +static struct pci_device_id gdthtable[] __maybe_unused = { {PCI_VENDOR_ID_VORTEX,PCI_ANY_ID,PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC,PCI_ANY_ID,PCI_ANY_ID}, {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC_XSCALE,PCI_ANY_ID,PCI_ANY_ID}, @@ -1955,7 +1955,7 @@ for (j = 0; j < 12; ++j) rtc[j] = CMOS_READ(j); } while (rtc[0] != CMOS_READ(0)); - spin_lock_irqrestore(&rtc_lock, flags); + spin_unlock_irqrestore(&rtc_lock, flags); TRACE2(("gdth_search_drives(): RTC: %x/%x/%x\n",*(ulong32 *)&rtc[0], *(ulong32 *)&rtc[4], *(ulong32 *)&rtc[8])); /* 3. send to controller firmware */ diff -Nurb linux-2.6.22-570/drivers/scsi/hptiop.c linux-2.6.22-591/drivers/scsi/hptiop.c --- linux-2.6.22-570/drivers/scsi/hptiop.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/hptiop.c 2007-12-21 15:36:12.000000000 -0500 @@ -339,20 +339,8 @@ scp = hba->reqs[tag].scp; - if (HPT_SCP(scp)->mapped) { - if (scp->use_sg) - pci_unmap_sg(hba->pcidev, - (struct scatterlist *)scp->request_buffer, - scp->use_sg, - scp->sc_data_direction - ); - else - pci_unmap_single(hba->pcidev, - HPT_SCP(scp)->dma_handle, - scp->request_bufflen, - scp->sc_data_direction - ); - } + if (HPT_SCP(scp)->mapped) + scsi_dma_unmap(scp); switch (le32_to_cpu(req->header.result)) { case IOP_RESULT_SUCCESS: @@ -449,43 +437,26 @@ { struct Scsi_Host *host = scp->device->host; struct hptiop_hba *hba = (struct hptiop_hba *)host->hostdata; - struct scatterlist *sglist = (struct scatterlist *)scp->request_buffer; + struct scatterlist *sg; + int idx, nseg; - /* - * though we'll not get non-use_sg fields anymore, - * keep use_sg checking anyway - */ - if (scp->use_sg) { - int idx; - - HPT_SCP(scp)->sgcnt = pci_map_sg(hba->pcidev, - sglist, scp->use_sg, - scp->sc_data_direction); + nseg = scsi_dma_map(scp); + BUG_ON(nseg < 0); + if (!nseg) + return 0; + + HPT_SCP(scp)->sgcnt = nseg; HPT_SCP(scp)->mapped = 1; + BUG_ON(HPT_SCP(scp)->sgcnt > hba->max_sg_descriptors); - for (idx = 0; idx < HPT_SCP(scp)->sgcnt; idx++) { - psg[idx].pci_address = - cpu_to_le64(sg_dma_address(&sglist[idx])); - psg[idx].size = cpu_to_le32(sg_dma_len(&sglist[idx])); + scsi_for_each_sg(scp, sg, HPT_SCP(scp)->sgcnt, idx) { + psg[idx].pci_address = cpu_to_le64(sg_dma_address(sg)); + psg[idx].size = cpu_to_le32(sg_dma_len(sg)); psg[idx].eot = (idx == HPT_SCP(scp)->sgcnt - 1) ? cpu_to_le32(1) : 0; } - return HPT_SCP(scp)->sgcnt; - } else { - HPT_SCP(scp)->dma_handle = pci_map_single( - hba->pcidev, - scp->request_buffer, - scp->request_bufflen, - scp->sc_data_direction - ); - HPT_SCP(scp)->mapped = 1; - psg->pci_address = cpu_to_le64(HPT_SCP(scp)->dma_handle); - psg->size = cpu_to_le32(scp->request_bufflen); - psg->eot = cpu_to_le32(1); - return 1; - } } static int hptiop_queuecommand(struct scsi_cmnd *scp, @@ -530,9 +501,8 @@ req = (struct hpt_iop_request_scsi_command *)_req->req_virt; /* build S/G table */ - if (scp->request_bufflen) sg_count = hptiop_buildsgl(scp, req->sg_list); - else + if (!sg_count) HPT_SCP(scp)->mapped = 0; req->header.flags = cpu_to_le32(IOP_REQUEST_FLAG_OUTPUT_CONTEXT); @@ -541,7 +511,7 @@ req->header.context = cpu_to_le32(IOPMU_QUEUE_ADDR_HOST_BIT | (u32)_req->index); req->header.context_hi32 = 0; - req->dataxfer_length = cpu_to_le32(scp->request_bufflen); + req->dataxfer_length = cpu_to_le32(scsi_bufflen(scp)); req->channel = scp->device->channel; req->target = scp->device->id; req->lun = scp->device->lun; diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.c linux-2.6.22-591/drivers/scsi/ibmmca.c --- linux-2.6.22-570/drivers/scsi/ibmmca.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ibmmca.c 2007-12-21 15:36:12.000000000 -0500 @@ -31,14 +31,21 @@ #include #include #include -#include #include #include #include "scsi.h" #include -#include "ibmmca.h" + +/* Common forward declarations for all Linux-versions: */ +static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *)); +static int ibmmca_abort (Scsi_Cmnd *); +static int ibmmca_host_reset (Scsi_Cmnd *); +static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *); +static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout); + + /* current version of this driver-source: */ #define IBMMCA_SCSI_DRIVER_VERSION "4.0b-ac" @@ -65,11 +72,11 @@ #define IM_DEBUG_CMD_DEVICE TYPE_TAPE /* relative addresses of hardware registers on a subsystem */ -#define IM_CMD_REG(hi) (hosts[(hi)]->io_port) /*Command Interface, (4 bytes long) */ -#define IM_ATTN_REG(hi) (hosts[(hi)]->io_port+4) /*Attention (1 byte) */ -#define IM_CTR_REG(hi) (hosts[(hi)]->io_port+5) /*Basic Control (1 byte) */ -#define IM_INTR_REG(hi) (hosts[(hi)]->io_port+6) /*Interrupt Status (1 byte, r/o) */ -#define IM_STAT_REG(hi) (hosts[(hi)]->io_port+7) /*Basic Status (1 byte, read only) */ +#define IM_CMD_REG(h) ((h)->io_port) /*Command Interface, (4 bytes long) */ +#define IM_ATTN_REG(h) ((h)->io_port+4) /*Attention (1 byte) */ +#define IM_CTR_REG(h) ((h)->io_port+5) /*Basic Control (1 byte) */ +#define IM_INTR_REG(h) ((h)->io_port+6) /*Interrupt Status (1 byte, r/o) */ +#define IM_STAT_REG(h) ((h)->io_port+7) /*Basic Status (1 byte, read only) */ /* basic I/O-port of first adapter */ #define IM_IO_PORT 0x3540 @@ -266,30 +273,36 @@ if ((display_mode & LED_ACTIVITY)||(!display_mode)) \ outb(inb(PS2_SYS_CTR) & 0x3f, PS2_SYS_CTR); } -/*list of supported subsystems */ -struct subsys_list_struct { - unsigned short mca_id; - char *description; -}; - /* types of different supported hardware that goes to hostdata special */ #define IBM_SCSI2_FW 0 #define IBM_7568_WCACHE 1 #define IBM_EXP_UNIT 2 #define IBM_SCSI_WCACHE 3 #define IBM_SCSI 4 +#define IBM_INTEGSCSI 5 /* other special flags for hostdata structure */ #define FORCED_DETECTION 100 #define INTEGRATED_SCSI 101 /* List of possible IBM-SCSI-adapters */ -static struct subsys_list_struct subsys_list[] = { - {0x8efc, "IBM SCSI-2 F/W Adapter"}, /* special = 0 */ - {0x8efd, "IBM 7568 Industrial Computer SCSI Adapter w/Cache"}, /* special = 1 */ - {0x8ef8, "IBM Expansion Unit SCSI Controller"}, /* special = 2 */ - {0x8eff, "IBM SCSI Adapter w/Cache"}, /* special = 3 */ - {0x8efe, "IBM SCSI Adapter"}, /* special = 4 */ +static short ibmmca_id_table[] = { + 0x8efc, + 0x8efd, + 0x8ef8, + 0x8eff, + 0x8efe, + /* No entry for integrated SCSI, that's part of the register */ + 0 +}; + +static const char *ibmmca_description[] = { + "IBM SCSI-2 F/W Adapter", /* special = 0 */ + "IBM 7568 Industrial Computer SCSI Adapter w/Cache", /* special = 1 */ + "IBM Expansion Unit SCSI Controller", /* special = 2 */ + "IBM SCSI Adapter w/Cache", /* special = 3 */ + "IBM SCSI Adapter", /* special = 4 */ + "IBM Integrated SCSI Controller", /* special = 5 */ }; /* Max number of logical devices (can be up from 0 to 14). 15 is the address @@ -375,30 +388,30 @@ }; /* macros to access host data structure */ -#define subsystem_pun(hi) (hosts[(hi)]->this_id) -#define subsystem_maxid(hi) (hosts[(hi)]->max_id) -#define ld(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_ld) -#define get_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_ldn) -#define get_scsi(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_scsi) -#define local_checking_phase_flag(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_local_checking_phase_flag) -#define got_interrupt(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_got_interrupt) -#define stat_result(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_stat_result) -#define reset_status(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_reset_status) -#define last_scsi_command(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_command) -#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type) -#define last_scsi_blockcount(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_blockcount) -#define last_scsi_logical_block(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_logical_block) -#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type) -#define next_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_next_ldn) -#define IBM_DS(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_IBM_DS) -#define special(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_special) -#define subsystem_connector_size(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_connector_size) -#define adapter_speed(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_adapter_speed) -#define pos2(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[2]) -#define pos3(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[3]) -#define pos4(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[4]) -#define pos5(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[5]) -#define pos6(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[6]) +#define subsystem_pun(h) ((h)->this_id) +#define subsystem_maxid(h) ((h)->max_id) +#define ld(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_ld) +#define get_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_ldn) +#define get_scsi(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_scsi) +#define local_checking_phase_flag(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_local_checking_phase_flag) +#define got_interrupt(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_got_interrupt) +#define stat_result(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_stat_result) +#define reset_status(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_reset_status) +#define last_scsi_command(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_command) +#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type) +#define last_scsi_blockcount(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_blockcount) +#define last_scsi_logical_block(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_logical_block) +#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type) +#define next_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_next_ldn) +#define IBM_DS(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_IBM_DS) +#define special(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_special) +#define subsystem_connector_size(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_connector_size) +#define adapter_speed(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_adapter_speed) +#define pos2(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[2]) +#define pos3(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[3]) +#define pos4(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[4]) +#define pos5(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[5]) +#define pos6(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[6]) /* Define a arbitrary number as subsystem-marker-type. This number is, as described in the ANSI-SCSI-standard, not occupied by other device-types. */ @@ -459,11 +472,6 @@ /*counter of concurrent disk read/writes, to turn on/off disk led */ static int disk_rw_in_progress = 0; -/* host information */ -static int found = 0; -static struct Scsi_Host *hosts[IM_MAX_HOSTS + 1] = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL -}; static unsigned int pos[8]; /* whole pos register-line for diagnosis */ /* Taking into account the additions, made by ZP Gu. * This selects now the preset value from the configfile and @@ -474,70 +482,68 @@ static char ibm_ansi_order = 0; #endif -static void issue_cmd(int, unsigned long, unsigned char); +static void issue_cmd(struct Scsi_Host *, unsigned long, unsigned char); static void internal_done(Scsi_Cmnd * cmd); -static void check_devices(int, int); -static int immediate_assign(int, unsigned int, unsigned int, unsigned int, unsigned int); -static int immediate_feature(int, unsigned int, unsigned int); +static void check_devices(struct Scsi_Host *, int); +static int immediate_assign(struct Scsi_Host *, unsigned int, unsigned int, unsigned int, unsigned int); +static int immediate_feature(struct Scsi_Host *, unsigned int, unsigned int); #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET -static int immediate_reset(int, unsigned int); +static int immediate_reset(struct Scsi_Host *, unsigned int); #endif -static int device_inquiry(int, int); -static int read_capacity(int, int); -static int get_pos_info(int); +static int device_inquiry(struct Scsi_Host *, int); +static int read_capacity(struct Scsi_Host *, int); +static int get_pos_info(struct Scsi_Host *); static char *ti_p(int); static char *ti_l(int); static char *ibmrate(unsigned int, int); static int probe_display(int); -static int probe_bus_mode(int); -static int device_exists(int, int, int *, int *); -static struct Scsi_Host *ibmmca_register(struct scsi_host_template *, int, int, int, char *); +static int probe_bus_mode(struct Scsi_Host *); +static int device_exists(struct Scsi_Host *, int, int *, int *); static int option_setup(char *); /* local functions needed for proc_info */ -static int ldn_access_load(int, int); -static int ldn_access_total_read_write(int); +static int ldn_access_load(struct Scsi_Host *, int); +static int ldn_access_total_read_write(struct Scsi_Host *); static irqreturn_t interrupt_handler(int irq, void *dev_id) { - int host_index, ihost_index; unsigned int intr_reg; unsigned int cmd_result; unsigned int ldn; + unsigned long flags; Scsi_Cmnd *cmd; int lastSCSI; - struct Scsi_Host *dev = dev_id; + struct device *dev = dev_id; + struct Scsi_Host *shpnt = dev_get_drvdata(dev); + + spin_lock_irqsave(shpnt->host_lock, flags); - spin_lock(dev->host_lock); - /* search for one adapter-response on shared interrupt */ - for (host_index = 0; hosts[host_index] && !(inb(IM_STAT_REG(host_index)) & IM_INTR_REQUEST); host_index++); - /* return if some other device on this IRQ caused the interrupt */ - if (!hosts[host_index]) { - spin_unlock(dev->host_lock); + if(!(inb(IM_STAT_REG(shpnt)) & IM_INTR_REQUEST)) { + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_NONE; } /* the reset-function already did all the job, even ints got renabled on the subsystem, so just return */ - if ((reset_status(host_index) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(host_index) == IM_RESET_FINISHED_OK_NO_INT)) { - reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS; - spin_unlock(dev->host_lock); + if ((reset_status(shpnt) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(shpnt) == IM_RESET_FINISHED_OK_NO_INT)) { + reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS; + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; } /*must wait for attention reg not busy, then send EOI to subsystem */ while (1) { - if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) + if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) break; cpu_relax(); } - ihost_index = host_index; + /*get command result and logical device */ - intr_reg = (unsigned char) (inb(IM_INTR_REG(ihost_index))); + intr_reg = (unsigned char) (inb(IM_INTR_REG(shpnt))); cmd_result = intr_reg & 0xf0; ldn = intr_reg & 0x0f; /* get the last_scsi_command here */ - lastSCSI = last_scsi_command(ihost_index)[ldn]; - outb(IM_EOI | ldn, IM_ATTN_REG(ihost_index)); + lastSCSI = last_scsi_command(shpnt)[ldn]; + outb(IM_EOI | ldn, IM_ATTN_REG(shpnt)); /*these should never happen (hw fails, or a local programming bug) */ if (!global_command_error_excuse) { @@ -547,38 +553,38 @@ case IM_SOFTWARE_SEQUENCING_ERROR: case IM_CMD_ERROR: printk(KERN_ERR "IBM MCA SCSI: Fatal Subsystem ERROR!\n"); - printk(KERN_ERR " Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(ihost_index)[ldn].scb.enable); - if (ld(ihost_index)[ldn].cmd) - printk("%ld/%ld,", (long) (ld(ihost_index)[ldn].cmd->request_bufflen), (long) (ld(ihost_index)[ldn].scb.sys_buf_length)); + printk(KERN_ERR " Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(shpnt)[ldn].scb.enable); + if (ld(shpnt)[ldn].cmd) + printk("%ld/%ld,", (long) (scsi_bufflen(ld(shpnt)[ldn].cmd)), (long) (ld(shpnt)[ldn].scb.sys_buf_length)); else printk("none,"); - if (ld(ihost_index)[ldn].cmd) - printk("Blocksize=%d", ld(ihost_index)[ldn].scb.u2.blk.length); + if (ld(shpnt)[ldn].cmd) + printk("Blocksize=%d", ld(shpnt)[ldn].scb.u2.blk.length); else printk("Blocksize=none"); - printk(", host=0x%x, ldn=0x%x\n", ihost_index, ldn); - if (ld(ihost_index)[ldn].cmd) { - printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u2.blk.count); - printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u1.log_blk_adr); + printk(", host=%p, ldn=0x%x\n", shpnt, ldn); + if (ld(shpnt)[ldn].cmd) { + printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(shpnt)[ldn], ld(shpnt)[ldn].scb.u2.blk.count); + printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(shpnt)[ldn], ld(shpnt)[ldn].scb.u1.log_blk_adr); } printk(KERN_ERR "Reason given: %s\n", (cmd_result == IM_ADAPTER_HW_FAILURE) ? "HARDWARE FAILURE" : (cmd_result == IM_SOFTWARE_SEQUENCING_ERROR) ? "SOFTWARE SEQUENCING ERROR" : (cmd_result == IM_CMD_ERROR) ? "COMMAND ERROR" : "UNKNOWN"); /* if errors appear, enter this section to give detailed info */ printk(KERN_ERR "IBM MCA SCSI: Subsystem Error-Status follows:\n"); - printk(KERN_ERR " Command Type................: %x\n", last_scsi_type(ihost_index)[ldn]); - printk(KERN_ERR " Attention Register..........: %x\n", inb(IM_ATTN_REG(ihost_index))); - printk(KERN_ERR " Basic Control Register......: %x\n", inb(IM_CTR_REG(ihost_index))); + printk(KERN_ERR " Command Type................: %x\n", last_scsi_type(shpnt)[ldn]); + printk(KERN_ERR " Attention Register..........: %x\n", inb(IM_ATTN_REG(shpnt))); + printk(KERN_ERR " Basic Control Register......: %x\n", inb(IM_CTR_REG(shpnt))); printk(KERN_ERR " Interrupt Status Register...: %x\n", intr_reg); - printk(KERN_ERR " Basic Status Register.......: %x\n", inb(IM_STAT_REG(ihost_index))); - if ((last_scsi_type(ihost_index)[ldn] == IM_SCB) || (last_scsi_type(ihost_index)[ldn] == IM_LONG_SCB)) { - printk(KERN_ERR " SCB-Command.................: %x\n", ld(ihost_index)[ldn].scb.command); - printk(KERN_ERR " SCB-Enable..................: %x\n", ld(ihost_index)[ldn].scb.enable); - printk(KERN_ERR " SCB-logical block address...: %lx\n", ld(ihost_index)[ldn].scb.u1.log_blk_adr); - printk(KERN_ERR " SCB-system buffer address...: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_adr); - printk(KERN_ERR " SCB-system buffer length....: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_length); - printk(KERN_ERR " SCB-tsb address.............: %lx\n", ld(ihost_index)[ldn].scb.tsb_adr); - printk(KERN_ERR " SCB-Chain address...........: %lx\n", ld(ihost_index)[ldn].scb.scb_chain_adr); - printk(KERN_ERR " SCB-block count.............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.count); - printk(KERN_ERR " SCB-block length............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.length); + printk(KERN_ERR " Basic Status Register.......: %x\n", inb(IM_STAT_REG(shpnt))); + if ((last_scsi_type(shpnt)[ldn] == IM_SCB) || (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB)) { + printk(KERN_ERR " SCB-Command.................: %x\n", ld(shpnt)[ldn].scb.command); + printk(KERN_ERR " SCB-Enable..................: %x\n", ld(shpnt)[ldn].scb.enable); + printk(KERN_ERR " SCB-logical block address...: %lx\n", ld(shpnt)[ldn].scb.u1.log_blk_adr); + printk(KERN_ERR " SCB-system buffer address...: %lx\n", ld(shpnt)[ldn].scb.sys_buf_adr); + printk(KERN_ERR " SCB-system buffer length....: %lx\n", ld(shpnt)[ldn].scb.sys_buf_length); + printk(KERN_ERR " SCB-tsb address.............: %lx\n", ld(shpnt)[ldn].scb.tsb_adr); + printk(KERN_ERR " SCB-Chain address...........: %lx\n", ld(shpnt)[ldn].scb.scb_chain_adr); + printk(KERN_ERR " SCB-block count.............: %x\n", ld(shpnt)[ldn].scb.u2.blk.count); + printk(KERN_ERR " SCB-block length............: %x\n", ld(shpnt)[ldn].scb.u2.blk.length); } printk(KERN_ERR " Send this report to the maintainer.\n"); panic("IBM MCA SCSI: Fatal error message from the subsystem (0x%X,0x%X)!\n", lastSCSI, cmd_result); @@ -600,72 +606,73 @@ } } /* if no panic appeared, increase the interrupt-counter */ - IBM_DS(ihost_index).total_interrupts++; + IBM_DS(shpnt).total_interrupts++; /*only for local checking phase */ - if (local_checking_phase_flag(ihost_index)) { - stat_result(ihost_index) = cmd_result; - got_interrupt(ihost_index) = 1; - reset_status(ihost_index) = IM_RESET_FINISHED_OK; - last_scsi_command(ihost_index)[ldn] = NO_SCSI; - spin_unlock(dev->host_lock); + if (local_checking_phase_flag(shpnt)) { + stat_result(shpnt) = cmd_result; + got_interrupt(shpnt) = 1; + reset_status(shpnt) = IM_RESET_FINISHED_OK; + last_scsi_command(shpnt)[ldn] = NO_SCSI; + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; } /* handling of commands coming from upper level of scsi driver */ - if (last_scsi_type(ihost_index)[ldn] == IM_IMM_CMD) { + if (last_scsi_type(shpnt)[ldn] == IM_IMM_CMD) { /* verify ldn, and may handle rare reset immediate command */ - if ((reset_status(ihost_index) == IM_RESET_IN_PROGRESS) && (last_scsi_command(ihost_index)[ldn] == IM_RESET_IMM_CMD)) { + if ((reset_status(shpnt) == IM_RESET_IN_PROGRESS) && (last_scsi_command(shpnt)[ldn] == IM_RESET_IMM_CMD)) { if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) { disk_rw_in_progress = 0; PS2_DISK_LED_OFF(); - reset_status(ihost_index) = IM_RESET_FINISHED_FAIL; + reset_status(shpnt) = IM_RESET_FINISHED_FAIL; } else { /*reset disk led counter, turn off disk led */ disk_rw_in_progress = 0; PS2_DISK_LED_OFF(); - reset_status(ihost_index) = IM_RESET_FINISHED_OK; + reset_status(shpnt) = IM_RESET_FINISHED_OK; } - stat_result(ihost_index) = cmd_result; - last_scsi_command(ihost_index)[ldn] = NO_SCSI; - last_scsi_type(ihost_index)[ldn] = 0; - spin_unlock(dev->host_lock); + stat_result(shpnt) = cmd_result; + last_scsi_command(shpnt)[ldn] = NO_SCSI; + last_scsi_type(shpnt)[ldn] = 0; + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; - } else if (last_scsi_command(ihost_index)[ldn] == IM_ABORT_IMM_CMD) { + } else if (last_scsi_command(shpnt)[ldn] == IM_ABORT_IMM_CMD) { /* react on SCSI abort command */ #ifdef IM_DEBUG_PROBE printk("IBM MCA SCSI: Interrupt from SCSI-abort.\n"); #endif disk_rw_in_progress = 0; PS2_DISK_LED_OFF(); - cmd = ld(ihost_index)[ldn].cmd; - ld(ihost_index)[ldn].cmd = NULL; + cmd = ld(shpnt)[ldn].cmd; + ld(shpnt)[ldn].cmd = NULL; if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) cmd->result = DID_NO_CONNECT << 16; else cmd->result = DID_ABORT << 16; - stat_result(ihost_index) = cmd_result; - last_scsi_command(ihost_index)[ldn] = NO_SCSI; - last_scsi_type(ihost_index)[ldn] = 0; + stat_result(shpnt) = cmd_result; + last_scsi_command(shpnt)[ldn] = NO_SCSI; + last_scsi_type(shpnt)[ldn] = 0; if (cmd->scsi_done) (cmd->scsi_done) (cmd); /* should be the internal_done */ - spin_unlock(dev->host_lock); + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; } else { disk_rw_in_progress = 0; PS2_DISK_LED_OFF(); - reset_status(ihost_index) = IM_RESET_FINISHED_OK; - stat_result(ihost_index) = cmd_result; - last_scsi_command(ihost_index)[ldn] = NO_SCSI; - spin_unlock(dev->host_lock); + reset_status(shpnt) = IM_RESET_FINISHED_OK; + stat_result(shpnt) = cmd_result; + last_scsi_command(shpnt)[ldn] = NO_SCSI; + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; } } - last_scsi_command(ihost_index)[ldn] = NO_SCSI; - last_scsi_type(ihost_index)[ldn] = 0; - cmd = ld(ihost_index)[ldn].cmd; - ld(ihost_index)[ldn].cmd = NULL; + last_scsi_command(shpnt)[ldn] = NO_SCSI; + last_scsi_type(shpnt)[ldn] = 0; + cmd = ld(shpnt)[ldn].cmd; + ld(shpnt)[ldn].cmd = NULL; #ifdef IM_DEBUG_TIMEOUT if (cmd) { if ((cmd->target == TIMEOUT_PUN) && (cmd->device->lun == TIMEOUT_LUN)) { + spin_unlock_irqsave(shpnt->host_lock, flags); printk("IBM MCA SCSI: Ignoring interrupt from pun=%x, lun=%x.\n", cmd->target, cmd->device->lun); return IRQ_HANDLED; } @@ -674,15 +681,15 @@ /*if no command structure, just return, else clear cmd */ if (!cmd) { - spin_unlock(dev->host_lock); + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; } #ifdef IM_DEBUG_INT - printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(ihost_index)[ldn].tsb.dev_status, ld(ihost_index)[ldn].tsb.cmd_status, ld(ihost_index)[ldn].tsb.dev_error, ld(ihost_index)[ldn].tsb.cmd_error); + printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(shpnt)[ldn].tsb.dev_status, ld(shpnt)[ldn].tsb.cmd_status, ld(shpnt)[ldn].tsb.dev_error, ld(shpnt)[ldn].tsb.cmd_error); #endif /*if this is end of media read/write, may turn off PS/2 disk led */ - if ((ld(ihost_index)[ldn].device_type != TYPE_NO_LUN) && (ld(ihost_index)[ldn].device_type != TYPE_NO_DEVICE)) { + if ((ld(shpnt)[ldn].device_type != TYPE_NO_LUN) && (ld(shpnt)[ldn].device_type != TYPE_NO_DEVICE)) { /* only access this, if there was a valid device addressed */ if (--disk_rw_in_progress == 0) PS2_DISK_LED_OFF(); @@ -693,8 +700,8 @@ * adapters do not support CMD_TERMINATED, TASK_SET_FULL and * ACA_ACTIVE as returning statusbyte information. (ML) */ if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) { - cmd->result = (unsigned char) (ld(ihost_index)[ldn].tsb.dev_status & 0x1e); - IBM_DS(ihost_index).total_errors++; + cmd->result = (unsigned char) (ld(shpnt)[ldn].tsb.dev_status & 0x1e); + IBM_DS(shpnt).total_errors++; } else cmd->result = 0; /* write device status into cmd->result, and call done function */ @@ -705,24 +712,25 @@ cmd->result |= DID_OK << 16; if (cmd->scsi_done) (cmd->scsi_done) (cmd); - spin_unlock(dev->host_lock); + spin_unlock_irqrestore(shpnt->host_lock, flags); return IRQ_HANDLED; } -static void issue_cmd(int host_index, unsigned long cmd_reg, unsigned char attn_reg) +static void issue_cmd(struct Scsi_Host *shpnt, unsigned long cmd_reg, + unsigned char attn_reg) { unsigned long flags; /* must wait for attention reg not busy */ while (1) { - spin_lock_irqsave(hosts[host_index]->host_lock, flags); - if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) + spin_lock_irqsave(shpnt->host_lock, flags); + if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) break; - spin_unlock_irqrestore(hosts[host_index]->host_lock, flags); + spin_unlock_irqrestore(shpnt->host_lock, flags); } /* write registers and enable system interrupts */ - outl(cmd_reg, IM_CMD_REG(host_index)); - outb(attn_reg, IM_ATTN_REG(host_index)); - spin_unlock_irqrestore(hosts[host_index]->host_lock, flags); + outl(cmd_reg, IM_CMD_REG(shpnt)); + outb(attn_reg, IM_ATTN_REG(shpnt)); + spin_unlock_irqrestore(shpnt->host_lock, flags); } static void internal_done(Scsi_Cmnd * cmd) @@ -732,34 +740,34 @@ } /* SCSI-SCB-command for device_inquiry */ -static int device_inquiry(int host_index, int ldn) +static int device_inquiry(struct Scsi_Host *shpnt, int ldn) { int retr; struct im_scb *scb; struct im_tsb *tsb; unsigned char *buf; - scb = &(ld(host_index)[ldn].scb); - tsb = &(ld(host_index)[ldn].tsb); - buf = (unsigned char *) (&(ld(host_index)[ldn].buf)); - ld(host_index)[ldn].tsb.dev_status = 0; /* prepare statusblock */ + scb = &(ld(shpnt)[ldn].scb); + tsb = &(ld(shpnt)[ldn].tsb); + buf = (unsigned char *) (&(ld(shpnt)[ldn].buf)); + ld(shpnt)[ldn].tsb.dev_status = 0; /* prepare statusblock */ for (retr = 0; retr < 3; retr++) { /* fill scb with inquiry command */ scb->command = IM_DEVICE_INQUIRY_CMD | IM_NO_DISCONNECT; scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_RETRY_ENABLE | IM_BYPASS_BUFFER; - last_scsi_command(host_index)[ldn] = IM_DEVICE_INQUIRY_CMD; - last_scsi_type(host_index)[ldn] = IM_SCB; + last_scsi_command(shpnt)[ldn] = IM_DEVICE_INQUIRY_CMD; + last_scsi_type(shpnt)[ldn] = IM_SCB; scb->sys_buf_adr = isa_virt_to_bus(buf); scb->sys_buf_length = 255; /* maximum bufferlength gives max info */ scb->tsb_adr = isa_virt_to_bus(tsb); /* issue scb to passed ldn, and busy wait for interrupt */ - got_interrupt(host_index) = 0; - issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn); - while (!got_interrupt(host_index)) + got_interrupt(shpnt) = 0; + issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn); + while (!got_interrupt(shpnt)) barrier(); /*if command successful, break */ - if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) + if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) return 1; } /*if all three retries failed, return "no device at this ldn" */ @@ -769,34 +777,34 @@ return 1; } -static int read_capacity(int host_index, int ldn) +static int read_capacity(struct Scsi_Host *shpnt, int ldn) { int retr; struct im_scb *scb; struct im_tsb *tsb; unsigned char *buf; - scb = &(ld(host_index)[ldn].scb); - tsb = &(ld(host_index)[ldn].tsb); - buf = (unsigned char *) (&(ld(host_index)[ldn].buf)); - ld(host_index)[ldn].tsb.dev_status = 0; + scb = &(ld(shpnt)[ldn].scb); + tsb = &(ld(shpnt)[ldn].tsb); + buf = (unsigned char *) (&(ld(shpnt)[ldn].buf)); + ld(shpnt)[ldn].tsb.dev_status = 0; for (retr = 0; retr < 3; retr++) { /*fill scb with read capacity command */ scb->command = IM_READ_CAPACITY_CMD; scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_RETRY_ENABLE | IM_BYPASS_BUFFER; - last_scsi_command(host_index)[ldn] = IM_READ_CAPACITY_CMD; - last_scsi_type(host_index)[ldn] = IM_SCB; + last_scsi_command(shpnt)[ldn] = IM_READ_CAPACITY_CMD; + last_scsi_type(shpnt)[ldn] = IM_SCB; scb->sys_buf_adr = isa_virt_to_bus(buf); scb->sys_buf_length = 8; scb->tsb_adr = isa_virt_to_bus(tsb); /*issue scb to passed ldn, and busy wait for interrupt */ - got_interrupt(host_index) = 0; - issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn); - while (!got_interrupt(host_index)) + got_interrupt(shpnt) = 0; + issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn); + while (!got_interrupt(shpnt)) barrier(); /*if got capacity, get block length and return one device found */ - if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) + if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) return 1; } /*if all three retries failed, return "no device at this ldn" */ @@ -806,39 +814,39 @@ return 1; } -static int get_pos_info(int host_index) +static int get_pos_info(struct Scsi_Host *shpnt) { int retr; struct im_scb *scb; struct im_tsb *tsb; unsigned char *buf; - scb = &(ld(host_index)[MAX_LOG_DEV].scb); - tsb = &(ld(host_index)[MAX_LOG_DEV].tsb); - buf = (unsigned char *) (&(ld(host_index)[MAX_LOG_DEV].buf)); - ld(host_index)[MAX_LOG_DEV].tsb.dev_status = 0; + scb = &(ld(shpnt)[MAX_LOG_DEV].scb); + tsb = &(ld(shpnt)[MAX_LOG_DEV].tsb); + buf = (unsigned char *) (&(ld(shpnt)[MAX_LOG_DEV].buf)); + ld(shpnt)[MAX_LOG_DEV].tsb.dev_status = 0; for (retr = 0; retr < 3; retr++) { /*fill scb with get_pos_info command */ scb->command = IM_GET_POS_INFO_CMD; scb->enable = IM_READ_CONTROL | IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE | IM_BYPASS_BUFFER; - last_scsi_command(host_index)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD; - last_scsi_type(host_index)[MAX_LOG_DEV] = IM_SCB; + last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD; + last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_SCB; scb->sys_buf_adr = isa_virt_to_bus(buf); - if (special(host_index) == IBM_SCSI2_FW) + if (special(shpnt) == IBM_SCSI2_FW) scb->sys_buf_length = 256; /* get all info from F/W adapter */ else scb->sys_buf_length = 18; /* get exactly 18 bytes for other SCSI */ scb->tsb_adr = isa_virt_to_bus(tsb); /*issue scb to ldn=15, and busy wait for interrupt */ - got_interrupt(host_index) = 0; - issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV); + got_interrupt(shpnt) = 0; + issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV); /* FIXME: timeout */ - while (!got_interrupt(host_index)) + while (!got_interrupt(shpnt)) barrier(); /*if got POS-stuff, get block length and return one device found */ - if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) + if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) return 1; } /* if all three retries failed, return "no device at this ldn" */ @@ -851,14 +859,16 @@ /* SCSI-immediate-command for assign. This functions maps/unmaps specific ldn-numbers on SCSI (PUN,LUN). It is needed for presetting of the subsystem and for dynamical remapping od ldns. */ -static int immediate_assign(int host_index, unsigned int pun, unsigned int lun, unsigned int ldn, unsigned int operation) +static int immediate_assign(struct Scsi_Host *shpnt, unsigned int pun, + unsigned int lun, unsigned int ldn, + unsigned int operation) { int retr; unsigned long imm_cmd; for (retr = 0; retr < 3; retr++) { /* select mutation level of the SCSI-adapter */ - switch (special(host_index)) { + switch (special(shpnt)) { case IBM_SCSI2_FW: imm_cmd = (unsigned long) (IM_ASSIGN_IMM_CMD); imm_cmd |= (unsigned long) ((lun & 7) << 24); @@ -867,7 +877,7 @@ imm_cmd |= (unsigned long) ((ldn & 15) << 16); break; default: - imm_cmd = inl(IM_CMD_REG(host_index)); + imm_cmd = inl(IM_CMD_REG(shpnt)); imm_cmd &= (unsigned long) (0xF8000000); /* keep reserved bits */ imm_cmd |= (unsigned long) (IM_ASSIGN_IMM_CMD); imm_cmd |= (unsigned long) ((lun & 7) << 24); @@ -876,15 +886,15 @@ imm_cmd |= (unsigned long) ((ldn & 15) << 16); break; } - last_scsi_command(host_index)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD; - last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD; - got_interrupt(host_index) = 0; - issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); - while (!got_interrupt(host_index)) + last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD; + last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD; + got_interrupt(shpnt) = 0; + issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); + while (!got_interrupt(shpnt)) barrier(); /*if command successful, break */ - if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED) + if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED) return 1; } if (retr >= 3) @@ -893,7 +903,7 @@ return 1; } -static int immediate_feature(int host_index, unsigned int speed, unsigned int timeout) +static int immediate_feature(struct Scsi_Host *shpnt, unsigned int speed, unsigned int timeout) { int retr; unsigned long imm_cmd; @@ -903,16 +913,16 @@ imm_cmd = IM_FEATURE_CTR_IMM_CMD; imm_cmd |= (unsigned long) ((speed & 0x7) << 29); imm_cmd |= (unsigned long) ((timeout & 0x1fff) << 16); - last_scsi_command(host_index)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD; - last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD; - got_interrupt(host_index) = 0; + last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD; + last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD; + got_interrupt(shpnt) = 0; /* we need to run into command errors in order to probe for the * right speed! */ global_command_error_excuse = 1; - issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); + issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); /* FIXME: timeout */ - while (!got_interrupt(host_index)) + while (!got_interrupt(shpnt)) barrier(); if (global_command_error_excuse == CMD_FAIL) { global_command_error_excuse = 0; @@ -920,7 +930,7 @@ } else global_command_error_excuse = 0; /*if command successful, break */ - if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED) + if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED) return 1; } if (retr >= 3) @@ -930,35 +940,35 @@ } #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET -static int immediate_reset(int host_index, unsigned int ldn) +static int immediate_reset(struct Scsi_Host *shpnt, unsigned int ldn) { int retries; int ticks; unsigned long imm_command; for (retries = 0; retries < 3; retries++) { - imm_command = inl(IM_CMD_REG(host_index)); + imm_command = inl(IM_CMD_REG(shpnt)); imm_command &= (unsigned long) (0xFFFF0000); /* keep reserved bits */ imm_command |= (unsigned long) (IM_RESET_IMM_CMD); - last_scsi_command(host_index)[ldn] = IM_RESET_IMM_CMD; - last_scsi_type(host_index)[ldn] = IM_IMM_CMD; - got_interrupt(host_index) = 0; - reset_status(host_index) = IM_RESET_IN_PROGRESS; - issue_cmd(host_index, (unsigned long) (imm_command), IM_IMM_CMD | ldn); + last_scsi_command(shpnt)[ldn] = IM_RESET_IMM_CMD; + last_scsi_type(shpnt)[ldn] = IM_IMM_CMD; + got_interrupt(shpnt) = 0; + reset_status(shpnt) = IM_RESET_IN_PROGRESS; + issue_cmd(shpnt, (unsigned long) (imm_command), IM_IMM_CMD | ldn); ticks = IM_RESET_DELAY * HZ; - while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks) { + while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks) { udelay((1 + 999 / HZ) * 1000); barrier(); } /* if reset did not complete, just complain */ if (!ticks) { printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY); - reset_status(host_index) = IM_RESET_FINISHED_OK; + reset_status(shpnt) = IM_RESET_FINISHED_OK; /* did not work, finish */ return 1; } /*if command successful, break */ - if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED) + if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED) return 1; } if (retries >= 3) @@ -1060,35 +1070,35 @@ return 0; } -static int probe_bus_mode(int host_index) +static int probe_bus_mode(struct Scsi_Host *shpnt) { struct im_pos_info *info; int num_bus = 0; int ldn; - info = (struct im_pos_info *) (&(ld(host_index)[MAX_LOG_DEV].buf)); - if (get_pos_info(host_index)) { + info = (struct im_pos_info *) (&(ld(shpnt)[MAX_LOG_DEV].buf)); + if (get_pos_info(shpnt)) { if (info->connector_size & 0xf000) - subsystem_connector_size(host_index) = 16; + subsystem_connector_size(shpnt) = 16; else - subsystem_connector_size(host_index) = 32; + subsystem_connector_size(shpnt) = 32; num_bus |= (info->pos_4b & 8) >> 3; for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { - if ((special(host_index) == IBM_SCSI_WCACHE) || (special(host_index) == IBM_7568_WCACHE)) { + if ((special(shpnt) == IBM_SCSI_WCACHE) || (special(shpnt) == IBM_7568_WCACHE)) { if (!((info->cache_stat >> ldn) & 1)) - ld(host_index)[ldn].cache_flag = 0; + ld(shpnt)[ldn].cache_flag = 0; } if (!((info->retry_stat >> ldn) & 1)) - ld(host_index)[ldn].retry_flag = 0; + ld(shpnt)[ldn].retry_flag = 0; } #ifdef IM_DEBUG_PROBE printk("IBM MCA SCSI: SCSI-Cache bits: "); for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { - printk("%d", ld(host_index)[ldn].cache_flag); + printk("%d", ld(shpnt)[ldn].cache_flag); } printk("\nIBM MCA SCSI: SCSI-Retry bits: "); for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { - printk("%d", ld(host_index)[ldn].retry_flag); + printk("%d", ld(shpnt)[ldn].retry_flag); } printk("\n"); #endif @@ -1097,7 +1107,7 @@ } /* probing scsi devices */ -static void check_devices(int host_index, int adaptertype) +static void check_devices(struct Scsi_Host *shpnt, int adaptertype) { int id, lun, ldn, ticks; int count_devices; /* local counter for connected device */ @@ -1108,24 +1118,24 @@ /* assign default values to certain variables */ ticks = 0; count_devices = 0; - IBM_DS(host_index).dyn_flag = 0; /* normally no need for dynamical ldn management */ - IBM_DS(host_index).total_errors = 0; /* set errorcounter to 0 */ - next_ldn(host_index) = 7; /* next ldn to be assigned is 7, because 0-6 is 'hardwired' */ + IBM_DS(shpnt).dyn_flag = 0; /* normally no need for dynamical ldn management */ + IBM_DS(shpnt).total_errors = 0; /* set errorcounter to 0 */ + next_ldn(shpnt) = 7; /* next ldn to be assigned is 7, because 0-6 is 'hardwired' */ /* initialize the very important driver-informational arrays/structs */ - memset(ld(host_index), 0, sizeof(ld(host_index))); + memset(ld(shpnt), 0, sizeof(ld(shpnt))); for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { - last_scsi_command(host_index)[ldn] = NO_SCSI; /* emptify last SCSI-command storage */ - last_scsi_type(host_index)[ldn] = 0; - ld(host_index)[ldn].cache_flag = 1; - ld(host_index)[ldn].retry_flag = 1; + last_scsi_command(shpnt)[ldn] = NO_SCSI; /* emptify last SCSI-command storage */ + last_scsi_type(shpnt)[ldn] = 0; + ld(shpnt)[ldn].cache_flag = 1; + ld(shpnt)[ldn].retry_flag = 1; } - memset(get_ldn(host_index), TYPE_NO_DEVICE, sizeof(get_ldn(host_index))); /* this is essential ! */ - memset(get_scsi(host_index), TYPE_NO_DEVICE, sizeof(get_scsi(host_index))); /* this is essential ! */ + memset(get_ldn(shpnt), TYPE_NO_DEVICE, sizeof(get_ldn(shpnt))); /* this is essential ! */ + memset(get_scsi(shpnt), TYPE_NO_DEVICE, sizeof(get_scsi(shpnt))); /* this is essential ! */ for (lun = 0; lun < 8; lun++) { /* mark the adapter at its pun on all luns */ - get_scsi(host_index)[subsystem_pun(host_index)][lun] = TYPE_IBM_SCSI_ADAPTER; - get_ldn(host_index)[subsystem_pun(host_index)][lun] = MAX_LOG_DEV; /* make sure, the subsystem + get_scsi(shpnt)[subsystem_pun(shpnt)][lun] = TYPE_IBM_SCSI_ADAPTER; + get_ldn(shpnt)[subsystem_pun(shpnt)][lun] = MAX_LOG_DEV; /* make sure, the subsystem ldn is active for all luns. */ } @@ -1134,9 +1144,9 @@ /* monitor connected on model XX95. */ /* STEP 1: */ - adapter_speed(host_index) = global_adapter_speed; - speedrun = adapter_speed(host_index); - while (immediate_feature(host_index, speedrun, adapter_timeout) == 2) { + adapter_speed(shpnt) = global_adapter_speed; + speedrun = adapter_speed(shpnt); + while (immediate_feature(shpnt, speedrun, adapter_timeout) == 2) { probe_display(1); if (speedrun == 7) panic("IBM MCA SCSI: Cannot set Synchronous-Transfer-Rate!\n"); @@ -1144,30 +1154,30 @@ if (speedrun > 7) speedrun = 7; } - adapter_speed(host_index) = speedrun; + adapter_speed(shpnt) = speedrun; /* Get detailed information about the current adapter, necessary for * device operations: */ - num_bus = probe_bus_mode(host_index); + num_bus = probe_bus_mode(shpnt); /* num_bus contains only valid data for the F/W adapter! */ if (adaptertype == IBM_SCSI2_FW) { /* F/W SCSI adapter: */ /* F/W adapter PUN-space extension evaluation: */ if (num_bus) { printk(KERN_INFO "IBM MCA SCSI: Separate bus mode (wide-addressing enabled)\n"); - subsystem_maxid(host_index) = 16; + subsystem_maxid(shpnt) = 16; } else { printk(KERN_INFO "IBM MCA SCSI: Combined bus mode (wide-addressing disabled)\n"); - subsystem_maxid(host_index) = 8; + subsystem_maxid(shpnt) = 8; } printk(KERN_INFO "IBM MCA SCSI: Sync.-Rate (F/W: 20, Int.: 10, Ext.: %s) MBytes/s\n", ibmrate(speedrun, adaptertype)); } else /* all other IBM SCSI adapters: */ printk(KERN_INFO "IBM MCA SCSI: Synchronous-SCSI-Transfer-Rate: %s MBytes/s\n", ibmrate(speedrun, adaptertype)); /* assign correct PUN device space */ - max_pun = subsystem_maxid(host_index); + max_pun = subsystem_maxid(shpnt); #ifdef IM_DEBUG_PROBE - printk("IBM MCA SCSI: Current SCSI-host index: %d\n", host_index); + printk("IBM MCA SCSI: Current SCSI-host index: %d\n", shpnt); printk("IBM MCA SCSI: Removing default logical SCSI-device mapping."); #else printk(KERN_INFO "IBM MCA SCSI: Dev. Order: %s, Mapping (takes <2min): ", (ibm_ansi_order) ? "ANSI" : "New"); @@ -1177,7 +1187,7 @@ #ifdef IM_DEBUG_PROBE printk("."); #endif - immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN); /* remove ldn (wherever) */ + immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN); /* remove ldn (wherever) */ } lun = 0; /* default lun is 0 */ #ifndef IM_DEBUG_PROBE @@ -1196,18 +1206,18 @@ #ifdef IM_DEBUG_PROBE printk("."); #endif - if (id != subsystem_pun(host_index)) { + if (id != subsystem_pun(shpnt)) { /* if pun is not the adapter: */ /* set ldn=0 to pun,lun */ - immediate_assign(host_index, id, lun, PROBE_LDN, SET_LDN); - if (device_inquiry(host_index, PROBE_LDN)) { /* probe device */ - get_scsi(host_index)[id][lun] = (unsigned char) (ld(host_index)[PROBE_LDN].buf[0]); + immediate_assign(shpnt, id, lun, PROBE_LDN, SET_LDN); + if (device_inquiry(shpnt, PROBE_LDN)) { /* probe device */ + get_scsi(shpnt)[id][lun] = (unsigned char) (ld(shpnt)[PROBE_LDN].buf[0]); /* entry, even for NO_LUN */ - if (ld(host_index)[PROBE_LDN].buf[0] != TYPE_NO_LUN) + if (ld(shpnt)[PROBE_LDN].buf[0] != TYPE_NO_LUN) count_devices++; /* a existing device is found */ } /* remove ldn */ - immediate_assign(host_index, id, lun, PROBE_LDN, REMOVE_LDN); + immediate_assign(shpnt, id, lun, PROBE_LDN, REMOVE_LDN); } } #ifndef IM_DEBUG_PROBE @@ -1227,16 +1237,16 @@ #ifdef IM_DEBUG_PROBE printk("."); #endif - if (id != subsystem_pun(host_index)) { - if (get_scsi(host_index)[id][lun] != TYPE_NO_LUN && get_scsi(host_index)[id][lun] != TYPE_NO_DEVICE) { + if (id != subsystem_pun(shpnt)) { + if (get_scsi(shpnt)[id][lun] != TYPE_NO_LUN && get_scsi(shpnt)[id][lun] != TYPE_NO_DEVICE) { /* Only map if accepted type. Always enter for lun == 0 to get no gaps into ldn-mapping for ldn<7. */ - immediate_assign(host_index, id, lun, ldn, SET_LDN); - get_ldn(host_index)[id][lun] = ldn; /* map ldn */ - if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) { + immediate_assign(shpnt, id, lun, ldn, SET_LDN); + get_ldn(shpnt)[id][lun] = ldn; /* map ldn */ + if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) { #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET printk("resetting device at ldn=%x ... ", ldn); - immediate_reset(host_index, ldn); + immediate_reset(shpnt, ldn); #endif ldn++; } else { @@ -1244,15 +1254,15 @@ * handle it or because it has problems */ if (lun > 0) { /* remove mapping */ - get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE; - immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN); + get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE; + immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN); } else ldn++; } } else if (lun == 0) { /* map lun == 0, even if no device exists */ - immediate_assign(host_index, id, lun, ldn, SET_LDN); - get_ldn(host_index)[id][lun] = ldn; /* map ldn */ + immediate_assign(shpnt, id, lun, ldn, SET_LDN); + get_ldn(shpnt)[id][lun] = ldn; /* map ldn */ ldn++; } } @@ -1262,14 +1272,14 @@ /* map remaining ldns to non-existing devices */ for (lun = 1; lun < 8 && ldn < MAX_LOG_DEV; lun++) for (id = 0; id < max_pun && ldn < MAX_LOG_DEV; id++) { - if (get_scsi(host_index)[id][lun] == TYPE_NO_LUN || get_scsi(host_index)[id][lun] == TYPE_NO_DEVICE) { + if (get_scsi(shpnt)[id][lun] == TYPE_NO_LUN || get_scsi(shpnt)[id][lun] == TYPE_NO_DEVICE) { probe_display(1); /* Map remaining ldns only to NON-existing pun,lun combinations to make sure an inquiry will fail. For MULTI_LUN, it is needed to avoid adapter autonome SCSI-remapping. */ - immediate_assign(host_index, id, lun, ldn, SET_LDN); - get_ldn(host_index)[id][lun] = ldn; + immediate_assign(shpnt, id, lun, ldn, SET_LDN); + get_ldn(shpnt)[id][lun] = ldn; ldn++; } } @@ -1292,51 +1302,51 @@ for (id = 0; id < max_pun; id++) { printk("%2d ", id); for (lun = 0; lun < 8; lun++) - printk("%2s ", ti_p(get_scsi(host_index)[id][lun])); + printk("%2s ", ti_p(get_scsi(shpnt)[id][lun])); printk(" %2d ", id); for (lun = 0; lun < 8; lun++) - printk("%2s ", ti_l(get_ldn(host_index)[id][lun])); + printk("%2s ", ti_l(get_ldn(shpnt)[id][lun])); printk("\n"); } #endif /* assign total number of found SCSI-devices to the statistics struct */ - IBM_DS(host_index).total_scsi_devices = count_devices; + IBM_DS(shpnt).total_scsi_devices = count_devices; /* decide for output in /proc-filesystem, if the configuration of SCSI-devices makes dynamical reassignment of devices necessary */ if (count_devices >= MAX_LOG_DEV) - IBM_DS(host_index).dyn_flag = 1; /* dynamical assignment is necessary */ + IBM_DS(shpnt).dyn_flag = 1; /* dynamical assignment is necessary */ else - IBM_DS(host_index).dyn_flag = 0; /* dynamical assignment is not necessary */ + IBM_DS(shpnt).dyn_flag = 0; /* dynamical assignment is not necessary */ /* If no SCSI-devices are assigned, return 1 in order to cause message. */ if (ldn == 0) printk("IBM MCA SCSI: Warning: No SCSI-devices found/assigned!\n"); /* reset the counters for statistics on the current adapter */ - IBM_DS(host_index).scbs = 0; - IBM_DS(host_index).long_scbs = 0; - IBM_DS(host_index).total_accesses = 0; - IBM_DS(host_index).total_interrupts = 0; - IBM_DS(host_index).dynamical_assignments = 0; - memset(IBM_DS(host_index).ldn_access, 0x0, sizeof(IBM_DS(host_index).ldn_access)); - memset(IBM_DS(host_index).ldn_read_access, 0x0, sizeof(IBM_DS(host_index).ldn_read_access)); - memset(IBM_DS(host_index).ldn_write_access, 0x0, sizeof(IBM_DS(host_index).ldn_write_access)); - memset(IBM_DS(host_index).ldn_inquiry_access, 0x0, sizeof(IBM_DS(host_index).ldn_inquiry_access)); - memset(IBM_DS(host_index).ldn_modeselect_access, 0x0, sizeof(IBM_DS(host_index).ldn_modeselect_access)); - memset(IBM_DS(host_index).ldn_assignments, 0x0, sizeof(IBM_DS(host_index).ldn_assignments)); + IBM_DS(shpnt).scbs = 0; + IBM_DS(shpnt).long_scbs = 0; + IBM_DS(shpnt).total_accesses = 0; + IBM_DS(shpnt).total_interrupts = 0; + IBM_DS(shpnt).dynamical_assignments = 0; + memset(IBM_DS(shpnt).ldn_access, 0x0, sizeof(IBM_DS(shpnt).ldn_access)); + memset(IBM_DS(shpnt).ldn_read_access, 0x0, sizeof(IBM_DS(shpnt).ldn_read_access)); + memset(IBM_DS(shpnt).ldn_write_access, 0x0, sizeof(IBM_DS(shpnt).ldn_write_access)); + memset(IBM_DS(shpnt).ldn_inquiry_access, 0x0, sizeof(IBM_DS(shpnt).ldn_inquiry_access)); + memset(IBM_DS(shpnt).ldn_modeselect_access, 0x0, sizeof(IBM_DS(shpnt).ldn_modeselect_access)); + memset(IBM_DS(shpnt).ldn_assignments, 0x0, sizeof(IBM_DS(shpnt).ldn_assignments)); probe_display(0); return; } -static int device_exists(int host_index, int ldn, int *block_length, int *device_type) +static int device_exists(struct Scsi_Host *shpnt, int ldn, int *block_length, int *device_type) { unsigned char *buf; /* if no valid device found, return immediately with 0 */ - if (!(device_inquiry(host_index, ldn))) + if (!(device_inquiry(shpnt, ldn))) return 0; - buf = (unsigned char *) (&(ld(host_index)[ldn].buf)); + buf = (unsigned char *) (&(ld(shpnt)[ldn].buf)); if (*buf == TYPE_ROM) { *device_type = TYPE_ROM; *block_length = 2048; /* (standard blocksize for yellow-/red-book) */ @@ -1349,7 +1359,7 @@ } if (*buf == TYPE_DISK) { *device_type = TYPE_DISK; - if (read_capacity(host_index, ldn)) { + if (read_capacity(shpnt, ldn)) { *block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24); return 1; } else @@ -1357,7 +1367,7 @@ } if (*buf == TYPE_MOD) { *device_type = TYPE_MOD; - if (read_capacity(host_index, ldn)) { + if (read_capacity(shpnt, ldn)) { *block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24); return 1; } else @@ -1430,6 +1440,9 @@ return; } +#if 0 + FIXME NEED TO MOVE TO SYSFS + static int ibmmca_getinfo(char *buf, int slot, void *dev_id) { struct Scsi_Host *shpnt; @@ -1480,58 +1493,34 @@ return len; } +#endif -int ibmmca_detect(struct scsi_host_template * scsi_template) +static struct scsi_host_template ibmmca_driver_template = { + .proc_name = "ibmmca", + .proc_info = ibmmca_proc_info, + .name = "IBM SCSI-Subsystem", + .queuecommand = ibmmca_queuecommand, + .eh_abort_handler = ibmmca_abort, + .eh_host_reset_handler = ibmmca_host_reset, + .bios_param = ibmmca_biosparam, + .can_queue = 16, + .this_id = 7, + .sg_tablesize = 16, + .cmd_per_lun = 1, + .use_clustering = ENABLE_CLUSTERING, +}; + +static int ibmmca_probe(struct device *dev) { struct Scsi_Host *shpnt; - int port, id, i, j, k, slot; - int devices_on_irq_11 = 0; - int devices_on_irq_14 = 0; - int IRQ14_registered = 0; - int IRQ11_registered = 0; - - found = 0; /* make absolutely sure, that found is set to 0 */ + int port, id, i, j, k, irq, enabled, ret = -EINVAL; + struct mca_device *mca_dev = to_mca_device(dev); + const char *description = ibmmca_description[mca_dev->index]; /* First of all, print the version number of the driver. This is * important to allow better user bugreports in case of already * having problems with the MCA_bus probing. */ printk(KERN_INFO "IBM MCA SCSI: Version %s\n", IBMMCA_SCSI_DRIVER_VERSION); - /* if this is not MCA machine, return "nothing found" */ - if (!MCA_bus) { - printk(KERN_INFO "IBM MCA SCSI: No Microchannel-bus present --> Aborting.\n" " This machine does not have any IBM MCA-bus\n" " or the MCA-Kernel-support is not enabled!\n"); - return 0; - } - -#ifdef MODULE - /* If the driver is run as module, read from conf.modules or cmd-line */ - if (boot_options) - option_setup(boot_options); -#endif - - /* get interrupt request level */ - if (request_irq(IM_IRQ, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) { - printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ); - return 0; - } else - IRQ14_registered++; - - /* if ibmmcascsi setup option was passed to kernel, return "found" */ - for (i = 0; i < IM_MAX_HOSTS; i++) - if (io_port[i] > 0 && scsi_id[i] >= 0 && scsi_id[i] < 8) { - printk("IBM MCA SCSI: forced detected SCSI Adapter, io=0x%x, scsi id=%d.\n", io_port[i], scsi_id[i]); - if ((shpnt = ibmmca_register(scsi_template, io_port[i], scsi_id[i], FORCED_DETECTION, "forced detected SCSI Adapter"))) { - for (k = 2; k < 7; k++) - ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = 0; - ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = FORCED_DETECTION; - mca_set_adapter_name(MCA_INTEGSCSI, "forced detected SCSI Adapter"); - mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt); - mca_mark_as_used(MCA_INTEGSCSI); - devices_on_irq_14++; - } - } - if (found) - return found; - /* The POS2-register of all PS/2 model SCSI-subsystems has the following * interpretation of bits: * Bit 7 - 4 : Chip Revision ID (Release) @@ -1558,7 +1547,14 @@ /* first look for the IBM SCSI integrated subsystem on the motherboard */ for (j = 0; j < 8; j++) /* read the pos-information */ - pos[j] = mca_read_stored_pos(MCA_INTEGSCSI, j); + pos[j] = mca_device_read_pos(mca_dev, j); + id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */ + enabled = (pos[2] &0x01); + if (!enabled) { + printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n"); + printk(KERN_WARNING " SCSI-operations may not work.\n"); + } + /* pos2 = pos3 = 0xff if there is no integrated SCSI-subsystem present, but * if we ignore the settings of all surrounding pos registers, it is not * completely sufficient to only check pos2 and pos3. */ @@ -1566,232 +1562,137 @@ * make sure, we see a real integrated onboard SCSI-interface and no * internal system information, which gets mapped to some pos registers * on models 95xx. */ - if ((!pos[0] && !pos[1] && pos[2] > 0 && pos[3] > 0 && !pos[4] && !pos[5] && !pos[6] && !pos[7]) || (pos[0] == 0xff && pos[1] == 0xff && pos[2] < 0xff && pos[3] < 0xff && pos[4] == 0xff && pos[5] == 0xff && pos[6] == 0xff && pos[7] == 0xff)) { - if ((pos[2] & 1) == 1) /* is the subsystem chip enabled ? */ + if (mca_dev->slot == MCA_INTEGSCSI && + ((!pos[0] && !pos[1] && pos[2] > 0 && + pos[3] > 0 && !pos[4] && !pos[5] && + !pos[6] && !pos[7]) || + (pos[0] == 0xff && pos[1] == 0xff && + pos[2] < 0xff && pos[3] < 0xff && + pos[4] == 0xff && pos[5] == 0xff && + pos[6] == 0xff && pos[7] == 0xff))) { + irq = IM_IRQ; port = IM_IO_PORT; - else { /* if disabled, no IRQs will be generated, as the chip won't - * listen to the incoming commands and will do really nothing, - * except for listening to the pos-register settings. If this - * happens, I need to hugely think about it, as one has to - * write something to the MCA-Bus pos register in order to - * enable the chip. Normally, IBM-SCSI won't pass the POST, - * when the chip is disabled (see IBM tech. ref.). */ - port = IM_IO_PORT; /* anyway, set the portnumber and warn */ - printk("IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n" " SCSI-operations may not work.\n"); - } - id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */ - /* give detailed information on the subsystem. This helps me - * additionally during debugging and analyzing bug-reports. */ - printk(KERN_INFO "IBM MCA SCSI: IBM Integrated SCSI Controller ffound, io=0x%x, scsi id=%d,\n", port, id); - printk(KERN_INFO " chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled."); - - /* register the found integrated SCSI-subsystem */ - if ((shpnt = ibmmca_register(scsi_template, port, id, INTEGRATED_SCSI, "IBM Integrated SCSI Controller"))) - { - for (k = 2; k < 7; k++) - ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; - ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI; - mca_set_adapter_name(MCA_INTEGSCSI, "IBM Integrated SCSI Controller"); - mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt); - mca_mark_as_used(MCA_INTEGSCSI); - devices_on_irq_14++; - } - } - - /* now look for other adapters in MCA slots, */ - /* determine the number of known IBM-SCSI-subsystem types */ - /* see the pos[2] dependence to get the adapter port-offset. */ - for (i = 0; i < ARRAY_SIZE(subsys_list); i++) { - /* scan each slot for a fitting adapter id */ - slot = 0; /* start at slot 0 */ - while ((slot = mca_find_adapter(subsys_list[i].mca_id, slot)) - != MCA_NOTFOUND) { /* scan through all slots */ - for (j = 0; j < 8; j++) /* read the pos-information */ - pos[j] = mca_read_stored_pos(slot, j); - if ((pos[2] & 1) == 1) - /* is the subsystem chip enabled ? */ - /* (explanations see above) */ - port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); - else { - /* anyway, set the portnumber and warn */ - port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); - printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n"); - printk(KERN_WARNING " SCSI-operations may not work.\n"); - } - if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) { + } else { + irq = IM_IRQ; + port = IM_IO_PORT + ((pos[2] &0x0e) << 2); + if ((mca_dev->index == IBM_SCSI2_FW) && (pos[6] != 0)) { printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n"); printk(KERN_ERR " Impossible to determine adapter PUN!\n"); printk(KERN_ERR " Guessing adapter PUN = 7.\n"); id = 7; } else { id = (pos[3] & 0xe0) >> 5; /* get subsystem PUN */ - if (i == IBM_SCSI2_FW) { + if (mca_dev->index == IBM_SCSI2_FW) { id |= (pos[3] & 0x10) >> 1; /* get subsystem PUN high-bit * for F/W adapters */ } } - if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) { + if ((mca_dev->index == IBM_SCSI2_FW) && + (pos[4] & 0x01) && (pos[6] == 0)) { /* IRQ11 is used by SCSI-2 F/W Adapter/A */ printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n"); - /* get interrupt request level */ - if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) { - printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW); - } else - IRQ11_registered++; + irq = IM_IRQ_FW; } - printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id); - if ((pos[2] & 0xf0) == 0xf0) - printk(KERN_DEBUG" ROM Addr.=off,"); - else - printk(KERN_DEBUG " ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000); - printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled."); - - /* register the hostadapter */ - if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) { - for (k = 2; k < 8; k++) - ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; - ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i; - mca_set_adapter_name(slot, subsys_list[i].description); - mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt); - mca_mark_as_used(slot); - if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) - devices_on_irq_11++; - else - devices_on_irq_14++; - } - slot++; /* advance to next slot */ - } /* advance to next adapter id in the list of IBM-SCSI-subsystems */ } - /* now check for SCSI-adapters, mapped to the integrated SCSI - * area. E.g. a W/Cache in MCA-slot 9(!). Do the check correct here, - * as this is a known effect on some models 95xx. */ - for (i = 0; i < ARRAY_SIZE(subsys_list); i++) { - /* scan each slot for a fitting adapter id */ - slot = mca_find_adapter(subsys_list[i].mca_id, MCA_INTEGSCSI); - if (slot != MCA_NOTFOUND) { /* scan through all slots */ - for (j = 0; j < 8; j++) /* read the pos-information */ - pos[j] = mca_read_stored_pos(slot, j); - if ((pos[2] & 1) == 1) { /* is the subsystem chip enabled ? */ - /* (explanations see above) */ - port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); - } else { /* anyway, set the portnumber and warn */ - port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); - printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n"); - printk(KERN_WARNING " SCSI-operations may not work.\n"); - } - if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) { - printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n"); - printk(KERN_ERR " Impossible to determine adapter PUN!\n"); - printk(KERN_ERR " Guessing adapter PUN = 7.\n"); - id = 7; - } else { - id = (pos[3] & 0xe0) >> 5; /* get subsystem PUN */ - if (i == IBM_SCSI2_FW) - id |= (pos[3] & 0x10) >> 1; /* get subsystem PUN high-bit - * for F/W adapters */ - } - if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) { - /* IRQ11 is used by SCSI-2 F/W Adapter/A */ - printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n"); - /* get interrupt request level */ - if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) - printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW); - else - IRQ11_registered++; - } - printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id); + + + /* give detailed information on the subsystem. This helps me + * additionally during debugging and analyzing bug-reports. */ + printk(KERN_INFO "IBM MCA SCSI: %s found, io=0x%x, scsi id=%d,\n", + description, port, id); + if (mca_dev->slot == MCA_INTEGSCSI) + printk(KERN_INFO " chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled."); + else { if ((pos[2] & 0xf0) == 0xf0) printk(KERN_DEBUG " ROM Addr.=off,"); else printk(KERN_DEBUG " ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000); - printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled."); - /* register the hostadapter */ - if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) { - for (k = 2; k < 7; k++) - ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; - ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i; - mca_set_adapter_name(slot, subsys_list[i].description); - mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt); - mca_mark_as_used(slot); - if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) - devices_on_irq_11++; - else - devices_on_irq_14++; - } - slot++; /* advance to next slot */ - } /* advance to next adapter id in the list of IBM-SCSI-subsystems */ + printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled."); } - if (IRQ11_registered && !devices_on_irq_11) - free_irq(IM_IRQ_FW, hosts); /* no devices on IRQ 11 */ - if (IRQ14_registered && !devices_on_irq_14) - free_irq(IM_IRQ, hosts); /* no devices on IRQ 14 */ - if (!devices_on_irq_11 && !devices_on_irq_14) - printk(KERN_WARNING "IBM MCA SCSI: No IBM SCSI-subsystem adapter attached.\n"); - return found; /* return the number of found SCSI hosts. Should be 1 or 0. */ -} - -static struct Scsi_Host *ibmmca_register(struct scsi_host_template * scsi_template, int port, int id, int adaptertype, char *hostname) -{ - struct Scsi_Host *shpnt; - int i, j; - unsigned int ctrl; /* check I/O region */ - if (!request_region(port, IM_N_IO_PORT, hostname)) { + if (!request_region(port, IM_N_IO_PORT, description)) { printk(KERN_ERR "IBM MCA SCSI: Unable to get I/O region 0x%x-0x%x (%d ports).\n", port, port + IM_N_IO_PORT - 1, IM_N_IO_PORT); - return NULL; + goto out_fail; } /* register host */ - shpnt = scsi_register(scsi_template, sizeof(struct ibmmca_hostdata)); + shpnt = scsi_host_alloc(&ibmmca_driver_template, + sizeof(struct ibmmca_hostdata)); if (!shpnt) { printk(KERN_ERR "IBM MCA SCSI: Unable to register host.\n"); - release_region(port, IM_N_IO_PORT); - return NULL; + goto out_release; + } + + dev_set_drvdata(dev, shpnt); + if(request_irq(irq, interrupt_handler, IRQF_SHARED, description, dev)) { + printk(KERN_ERR "IBM MCA SCSI: failed to request interrupt %d\n", irq); + goto out_free_host; } /* request I/O region */ - hosts[found] = shpnt; /* add new found hostadapter to the list */ - special(found) = adaptertype; /* important assignment or else crash! */ - subsystem_connector_size(found) = 0; /* preset slot-size */ - shpnt->irq = IM_IRQ; /* assign necessary stuff for the adapter */ + special(shpnt) = mca_dev->index; /* important assignment or else crash! */ + subsystem_connector_size(shpnt) = 0; /* preset slot-size */ + shpnt->irq = irq; /* assign necessary stuff for the adapter */ shpnt->io_port = port; shpnt->n_io_port = IM_N_IO_PORT; shpnt->this_id = id; shpnt->max_id = 8; /* 8 PUNs are default */ /* now, the SCSI-subsystem is connected to Linux */ - ctrl = (unsigned int) (inb(IM_CTR_REG(found))); /* get control-register status */ #ifdef IM_DEBUG_PROBE + ctrl = (unsigned int) (inb(IM_CTR_REG(found))); /* get control-register status */ printk("IBM MCA SCSI: Control Register contents: %x, status: %x\n", ctrl, inb(IM_STAT_REG(found))); printk("IBM MCA SCSI: This adapters' POS-registers: "); for (i = 0; i < 8; i++) printk("%x ", pos[i]); printk("\n"); #endif - reset_status(found) = IM_RESET_NOT_IN_PROGRESS; + reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS; for (i = 0; i < 16; i++) /* reset the tables */ for (j = 0; j < 8; j++) - get_ldn(found)[i][j] = MAX_LOG_DEV; + get_ldn(shpnt)[i][j] = MAX_LOG_DEV; /* check which logical devices exist */ /* after this line, local interrupting is possible: */ - local_checking_phase_flag(found) = 1; - check_devices(found, adaptertype); /* call by value, using the global variable hosts */ - local_checking_phase_flag(found) = 0; - found++; /* now increase index to be prepared for next found subsystem */ + local_checking_phase_flag(shpnt) = 1; + check_devices(shpnt, mca_dev->index); /* call by value, using the global variable hosts */ + local_checking_phase_flag(shpnt) = 0; + /* an ibm mca subsystem has been detected */ - return shpnt; + + for (k = 2; k < 7; k++) + ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; + ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI; + mca_device_set_name(mca_dev, description); + /* FIXME: NEED TO REPLUMB TO SYSFS + mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt); + */ + mca_device_set_claim(mca_dev, 1); + if (scsi_add_host(shpnt, dev)) { + dev_printk(KERN_ERR, dev, "IBM MCA SCSI: scsi_add_host failed\n"); + goto out_free_host; + } + scsi_scan_host(shpnt); + + return 0; + out_free_host: + scsi_host_put(shpnt); + out_release: + release_region(port, IM_N_IO_PORT); + out_fail: + return ret; } -static int ibmmca_release(struct Scsi_Host *shpnt) +static int __devexit ibmmca_remove(struct device *dev) { + struct Scsi_Host *shpnt = dev_get_drvdata(dev); + scsi_remove_host(shpnt); release_region(shpnt->io_port, shpnt->n_io_port); - if (!(--found)) - free_irq(shpnt->irq, hosts); + free_irq(shpnt->irq, dev); return 0; } @@ -1805,33 +1706,24 @@ int current_ldn; int id, lun; int target; - int host_index; int max_pun; int i; - struct scatterlist *sl; + struct scatterlist *sg; shpnt = cmd->device->host; - /* search for the right hostadapter */ - for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++); - if (!hosts[host_index]) { /* invalid hostadapter descriptor address */ - cmd->result = DID_NO_CONNECT << 16; - if (done) - done(cmd); - return 0; - } - max_pun = subsystem_maxid(host_index); + max_pun = subsystem_maxid(shpnt); if (ibm_ansi_order) { target = max_pun - 1 - cmd->device->id; - if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index))) + if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt))) target--; - else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index))) + else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt))) target++; } else target = cmd->device->id; /* if (target,lun) is NO LUN or not existing at all, return error */ - if ((get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_DEVICE)) { + if ((get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_DEVICE)) { cmd->result = DID_NO_CONNECT << 16; if (done) done(cmd); @@ -1839,16 +1731,16 @@ } /*if (target,lun) unassigned, do further checks... */ - ldn = get_ldn(host_index)[target][cmd->device->lun]; + ldn = get_ldn(shpnt)[target][cmd->device->lun]; if (ldn >= MAX_LOG_DEV) { /* on invalid ldn do special stuff */ if (ldn > MAX_LOG_DEV) { /* dynamical remapping if ldn unassigned */ - current_ldn = next_ldn(host_index); /* stop-value for one circle */ - while (ld(host_index)[next_ldn(host_index)].cmd) { /* search for a occupied, but not in */ + current_ldn = next_ldn(shpnt); /* stop-value for one circle */ + while (ld(shpnt)[next_ldn(shpnt)].cmd) { /* search for a occupied, but not in */ /* command-processing ldn. */ - next_ldn(host_index)++; - if (next_ldn(host_index) >= MAX_LOG_DEV) - next_ldn(host_index) = 7; - if (current_ldn == next_ldn(host_index)) { /* One circle done ? */ + next_ldn(shpnt)++; + if (next_ldn(shpnt) >= MAX_LOG_DEV) + next_ldn(shpnt) = 7; + if (current_ldn == next_ldn(shpnt)) { /* One circle done ? */ /* no non-processing ldn found */ scmd_printk(KERN_WARNING, cmd, "IBM MCA SCSI: Cannot assign SCSI-device dynamically!\n" @@ -1864,56 +1756,56 @@ /* unmap non-processing ldn */ for (id = 0; id < max_pun; id++) for (lun = 0; lun < 8; lun++) { - if (get_ldn(host_index)[id][lun] == next_ldn(host_index)) { - get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE; - get_scsi(host_index)[id][lun] = TYPE_NO_DEVICE; + if (get_ldn(shpnt)[id][lun] == next_ldn(shpnt)) { + get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE; + get_scsi(shpnt)[id][lun] = TYPE_NO_DEVICE; /* unmap entry */ } } /* set reduced interrupt_handler-mode for checking */ - local_checking_phase_flag(host_index) = 1; + local_checking_phase_flag(shpnt) = 1; /* map found ldn to pun,lun */ - get_ldn(host_index)[target][cmd->device->lun] = next_ldn(host_index); + get_ldn(shpnt)[target][cmd->device->lun] = next_ldn(shpnt); /* change ldn to the right value, that is now next_ldn */ - ldn = next_ldn(host_index); + ldn = next_ldn(shpnt); /* unassign all ldns (pun,lun,ldn does not matter for remove) */ - immediate_assign(host_index, 0, 0, 0, REMOVE_LDN); + immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN); /* set only LDN for remapped device */ - immediate_assign(host_index, target, cmd->device->lun, ldn, SET_LDN); + immediate_assign(shpnt, target, cmd->device->lun, ldn, SET_LDN); /* get device information for ld[ldn] */ - if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) { - ld(host_index)[ldn].cmd = NULL; /* To prevent panic set 0, because + if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) { + ld(shpnt)[ldn].cmd = NULL; /* To prevent panic set 0, because devices that were not assigned, should have nothing in progress. */ - get_scsi(host_index)[target][cmd->device->lun] = ld(host_index)[ldn].device_type; + get_scsi(shpnt)[target][cmd->device->lun] = ld(shpnt)[ldn].device_type; /* increase assignment counters for statistics in /proc */ - IBM_DS(host_index).dynamical_assignments++; - IBM_DS(host_index).ldn_assignments[ldn]++; + IBM_DS(shpnt).dynamical_assignments++; + IBM_DS(shpnt).ldn_assignments[ldn]++; } else /* panic here, because a device, found at boottime has vanished */ panic("IBM MCA SCSI: ldn=0x%x, SCSI-device on (%d,%d) vanished!\n", ldn, target, cmd->device->lun); /* unassign again all ldns (pun,lun,ldn does not matter for remove) */ - immediate_assign(host_index, 0, 0, 0, REMOVE_LDN); + immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN); /* remap all ldns, as written in the pun/lun table */ lun = 0; #ifdef CONFIG_SCSI_MULTI_LUN for (lun = 0; lun < 8; lun++) #endif for (id = 0; id < max_pun; id++) { - if (get_ldn(host_index)[id][lun] <= MAX_LOG_DEV) - immediate_assign(host_index, id, lun, get_ldn(host_index)[id][lun], SET_LDN); + if (get_ldn(shpnt)[id][lun] <= MAX_LOG_DEV) + immediate_assign(shpnt, id, lun, get_ldn(shpnt)[id][lun], SET_LDN); } /* set back to normal interrupt_handling */ - local_checking_phase_flag(host_index) = 0; + local_checking_phase_flag(shpnt) = 0; #ifdef IM_DEBUG_PROBE /* Information on syslog terminal */ printk("IBM MCA SCSI: ldn=0x%x dynamically reassigned to (%d,%d).\n", ldn, target, cmd->device->lun); #endif /* increase next_ldn for next dynamical assignment */ - next_ldn(host_index)++; - if (next_ldn(host_index) >= MAX_LOG_DEV) - next_ldn(host_index) = 7; + next_ldn(shpnt)++; + if (next_ldn(shpnt) >= MAX_LOG_DEV) + next_ldn(shpnt) = 7; } else { /* wall against Linux accesses to the subsystem adapter */ cmd->result = DID_BAD_TARGET << 16; if (done) @@ -1923,34 +1815,32 @@ } /*verify there is no command already in progress for this log dev */ - if (ld(host_index)[ldn].cmd) + if (ld(shpnt)[ldn].cmd) panic("IBM MCA SCSI: cmd already in progress for this ldn.\n"); /*save done in cmd, and save cmd for the interrupt handler */ cmd->scsi_done = done; - ld(host_index)[ldn].cmd = cmd; + ld(shpnt)[ldn].cmd = cmd; /*fill scb information independent of the scsi command */ - scb = &(ld(host_index)[ldn].scb); - ld(host_index)[ldn].tsb.dev_status = 0; + scb = &(ld(shpnt)[ldn].scb); + ld(shpnt)[ldn].tsb.dev_status = 0; scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE; - scb->tsb_adr = isa_virt_to_bus(&(ld(host_index)[ldn].tsb)); + scb->tsb_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].tsb)); scsi_cmd = cmd->cmnd[0]; - if (cmd->use_sg) { - i = cmd->use_sg; - sl = (struct scatterlist *) (cmd->request_buffer); - if (i > 16) - panic("IBM MCA SCSI: scatter-gather list too long.\n"); - while (--i >= 0) { - ld(host_index)[ldn].sge[i].address = (void *) (isa_page_to_bus(sl[i].page) + sl[i].offset); - ld(host_index)[ldn].sge[i].byte_length = sl[i].length; + if (scsi_sg_count(cmd)) { + BUG_ON(scsi_sg_count(cmd) > 16); + + scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) { + ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset); + ld(shpnt)[ldn].sge[i].byte_length = sg->length; } scb->enable |= IM_POINTER_TO_LIST; - scb->sys_buf_adr = isa_virt_to_bus(&(ld(host_index)[ldn].sge[0])); - scb->sys_buf_length = cmd->use_sg * sizeof(struct im_sge); + scb->sys_buf_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].sge[0])); + scb->sys_buf_length = scsi_sg_count(cmd) * sizeof(struct im_sge); } else { - scb->sys_buf_adr = isa_virt_to_bus(cmd->request_buffer); + scb->sys_buf_adr = isa_virt_to_bus(scsi_sglist(cmd)); /* recent Linux midlevel SCSI places 1024 byte for inquiry * command. Far too much for old PS/2 hardware. */ switch (scsi_cmd) { @@ -1961,16 +1851,16 @@ case REQUEST_SENSE: case MODE_SENSE: case MODE_SELECT: - if (cmd->request_bufflen > 255) + if (scsi_bufflen(cmd) > 255) scb->sys_buf_length = 255; else - scb->sys_buf_length = cmd->request_bufflen; + scb->sys_buf_length = scsi_bufflen(cmd); break; case TEST_UNIT_READY: scb->sys_buf_length = 0; break; default: - scb->sys_buf_length = cmd->request_bufflen; + scb->sys_buf_length = scsi_bufflen(cmd); break; } } @@ -1982,16 +1872,16 @@ /* for specific device-type debugging: */ #ifdef IM_DEBUG_CMD_SPEC_DEV - if (ld(host_index)[ldn].device_type == IM_DEBUG_CMD_DEVICE) - printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(host_index)[ldn].device_type, scsi_cmd, ldn); + if (ld(shpnt)[ldn].device_type == IM_DEBUG_CMD_DEVICE) + printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(shpnt)[ldn].device_type, scsi_cmd, ldn); #endif /* for possible panics store current command */ - last_scsi_command(host_index)[ldn] = scsi_cmd; - last_scsi_type(host_index)[ldn] = IM_SCB; + last_scsi_command(shpnt)[ldn] = scsi_cmd; + last_scsi_type(shpnt)[ldn] = IM_SCB; /* update statistical info */ - IBM_DS(host_index).total_accesses++; - IBM_DS(host_index).ldn_access[ldn]++; + IBM_DS(shpnt).total_accesses++; + IBM_DS(shpnt).ldn_access[ldn]++; switch (scsi_cmd) { case READ_6: @@ -2003,17 +1893,17 @@ /* Distinguish between disk and other devices. Only disks (that are the most frequently accessed devices) should be supported by the IBM-SCSI-Subsystem commands. */ - switch (ld(host_index)[ldn].device_type) { + switch (ld(shpnt)[ldn].device_type) { case TYPE_DISK: /* for harddisks enter here ... */ case TYPE_MOD: /* ... try it also for MO-drives (send flames as */ /* you like, if this won't work.) */ if (scsi_cmd == READ_6 || scsi_cmd == READ_10 || scsi_cmd == READ_12) { /* read command preparations */ scb->enable |= IM_READ_CONTROL; - IBM_DS(host_index).ldn_read_access[ldn]++; /* increase READ-access on ldn stat. */ + IBM_DS(shpnt).ldn_read_access[ldn]++; /* increase READ-access on ldn stat. */ scb->command = IM_READ_DATA_CMD | IM_NO_DISCONNECT; } else { /* write command preparations */ - IBM_DS(host_index).ldn_write_access[ldn]++; /* increase write-count on ldn stat. */ + IBM_DS(shpnt).ldn_write_access[ldn]++; /* increase write-count on ldn stat. */ scb->command = IM_WRITE_DATA_CMD | IM_NO_DISCONNECT; } if (scsi_cmd == READ_6 || scsi_cmd == WRITE_6) { @@ -2023,9 +1913,9 @@ scb->u1.log_blk_adr = (((unsigned) cmd->cmnd[5]) << 0) | (((unsigned) cmd->cmnd[4]) << 8) | (((unsigned) cmd->cmnd[3]) << 16) | (((unsigned) cmd->cmnd[2]) << 24); scb->u2.blk.count = (((unsigned) cmd->cmnd[8]) << 0) | (((unsigned) cmd->cmnd[7]) << 8); } - last_scsi_logical_block(host_index)[ldn] = scb->u1.log_blk_adr; - last_scsi_blockcount(host_index)[ldn] = scb->u2.blk.count; - scb->u2.blk.length = ld(host_index)[ldn].block_length; + last_scsi_logical_block(shpnt)[ldn] = scb->u1.log_blk_adr; + last_scsi_blockcount(shpnt)[ldn] = scb->u2.blk.count; + scb->u2.blk.length = ld(shpnt)[ldn].block_length; break; /* for other devices, enter here. Other types are not known by Linux! TYPE_NO_LUN is forbidden as valid device. */ @@ -2046,14 +1936,14 @@ scb->enable |= IM_BYPASS_BUFFER; scb->u1.scsi_cmd_length = cmd->cmd_len; memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len); - last_scsi_type(host_index)[ldn] = IM_LONG_SCB; + last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; /* Read/write on this non-disk devices is also displayworthy, so flash-up the LED/display. */ break; } break; case INQUIRY: - IBM_DS(host_index).ldn_inquiry_access[ldn]++; + IBM_DS(shpnt).ldn_inquiry_access[ldn]++; scb->command = IM_DEVICE_INQUIRY_CMD; scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; scb->u1.log_blk_adr = 0; @@ -2064,7 +1954,7 @@ scb->u1.log_blk_adr = 0; scb->u1.scsi_cmd_length = 6; memcpy(scb->u2.scsi_command, cmd->cmnd, 6); - last_scsi_type(host_index)[ldn] = IM_LONG_SCB; + last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; break; case READ_CAPACITY: /* the length of system memory buffer must be exactly 8 bytes */ @@ -2081,12 +1971,12 @@ /* Commands that need write-only-mode (system -> device): */ case MODE_SELECT: case MODE_SELECT_10: - IBM_DS(host_index).ldn_modeselect_access[ldn]++; + IBM_DS(shpnt).ldn_modeselect_access[ldn]++; scb->command = IM_OTHER_SCSI_CMD_CMD; scb->enable |= IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; /*Select needs WRITE-enabled */ scb->u1.scsi_cmd_length = cmd->cmd_len; memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len); - last_scsi_type(host_index)[ldn] = IM_LONG_SCB; + last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; break; /* For other commands, read-only is useful. Most other commands are running without an input-data-block. */ @@ -2095,19 +1985,19 @@ scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; scb->u1.scsi_cmd_length = cmd->cmd_len; memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len); - last_scsi_type(host_index)[ldn] = IM_LONG_SCB; + last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; break; } /*issue scb command, and return */ if (++disk_rw_in_progress == 1) PS2_DISK_LED_ON(shpnt->host_no, target); - if (last_scsi_type(host_index)[ldn] == IM_LONG_SCB) { - issue_cmd(host_index, isa_virt_to_bus(scb), IM_LONG_SCB | ldn); - IBM_DS(host_index).long_scbs++; + if (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB) { + issue_cmd(shpnt, isa_virt_to_bus(scb), IM_LONG_SCB | ldn); + IBM_DS(shpnt).long_scbs++; } else { - issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn); - IBM_DS(host_index).scbs++; + issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn); + IBM_DS(shpnt).scbs++; } return 0; } @@ -2122,7 +2012,6 @@ unsigned int ldn; void (*saved_done) (Scsi_Cmnd *); int target; - int host_index; int max_pun; unsigned long imm_command; @@ -2131,35 +2020,23 @@ #endif shpnt = cmd->device->host; - /* search for the right hostadapter */ - for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++); - if (!hosts[host_index]) { /* invalid hostadapter descriptor address */ - cmd->result = DID_NO_CONNECT << 16; - if (cmd->scsi_done) - (cmd->scsi_done) (cmd); - shpnt = cmd->device->host; -#ifdef IM_DEBUG_PROBE - printk(KERN_DEBUG "IBM MCA SCSI: Abort adapter selection failed!\n"); -#endif - return SUCCESS; - } - max_pun = subsystem_maxid(host_index); + max_pun = subsystem_maxid(shpnt); if (ibm_ansi_order) { target = max_pun - 1 - cmd->device->id; - if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index))) + if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt))) target--; - else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index))) + else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt))) target++; } else target = cmd->device->id; /* get logical device number, and disable system interrupts */ printk(KERN_WARNING "IBM MCA SCSI: Sending abort to device pun=%d, lun=%d.\n", target, cmd->device->lun); - ldn = get_ldn(host_index)[target][cmd->device->lun]; + ldn = get_ldn(shpnt)[target][cmd->device->lun]; /*if cmd for this ldn has already finished, no need to abort */ - if (!ld(host_index)[ldn].cmd) { + if (!ld(shpnt)[ldn].cmd) { return SUCCESS; } @@ -2170,20 +2047,20 @@ saved_done = cmd->scsi_done; cmd->scsi_done = internal_done; cmd->SCp.Status = 0; - last_scsi_command(host_index)[ldn] = IM_ABORT_IMM_CMD; - last_scsi_type(host_index)[ldn] = IM_IMM_CMD; - imm_command = inl(IM_CMD_REG(host_index)); + last_scsi_command(shpnt)[ldn] = IM_ABORT_IMM_CMD; + last_scsi_type(shpnt)[ldn] = IM_IMM_CMD; + imm_command = inl(IM_CMD_REG(shpnt)); imm_command &= (unsigned long) (0xffff0000); /* mask reserved stuff */ imm_command |= (unsigned long) (IM_ABORT_IMM_CMD); /* must wait for attention reg not busy */ /* FIXME - timeout, politeness */ while (1) { - if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) + if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) break; } /* write registers and enable system interrupts */ - outl(imm_command, IM_CMD_REG(host_index)); - outb(IM_IMM_CMD | ldn, IM_ATTN_REG(host_index)); + outl(imm_command, IM_CMD_REG(shpnt)); + outb(IM_IMM_CMD | ldn, IM_ATTN_REG(shpnt)); #ifdef IM_DEBUG_PROBE printk("IBM MCA SCSI: Abort queued to adapter...\n"); #endif @@ -2202,7 +2079,7 @@ cmd->result |= DID_ABORT << 16; if (cmd->scsi_done) (cmd->scsi_done) (cmd); - ld(host_index)[ldn].cmd = NULL; + ld(shpnt)[ldn].cmd = NULL; #ifdef IM_DEBUG_PROBE printk("IBM MCA SCSI: Abort finished with success.\n"); #endif @@ -2211,7 +2088,7 @@ cmd->result |= DID_NO_CONNECT << 16; if (cmd->scsi_done) (cmd->scsi_done) (cmd); - ld(host_index)[ldn].cmd = NULL; + ld(shpnt)[ldn].cmd = NULL; #ifdef IM_DEBUG_PROBE printk("IBM MCA SCSI: Abort failed.\n"); #endif @@ -2236,71 +2113,65 @@ struct Scsi_Host *shpnt; Scsi_Cmnd *cmd_aid; int ticks, i; - int host_index; unsigned long imm_command; BUG_ON(cmd == NULL); ticks = IM_RESET_DELAY * HZ; shpnt = cmd->device->host; - /* search for the right hostadapter */ - for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++); - if (!hosts[host_index]) /* invalid hostadapter descriptor address */ - return FAILED; - - if (local_checking_phase_flag(host_index)) { + if (local_checking_phase_flag(shpnt)) { printk(KERN_WARNING "IBM MCA SCSI: unable to reset while checking devices.\n"); return FAILED; } /* issue reset immediate command to subsystem, and wait for interrupt */ printk("IBM MCA SCSI: resetting all devices.\n"); - reset_status(host_index) = IM_RESET_IN_PROGRESS; - last_scsi_command(host_index)[0xf] = IM_RESET_IMM_CMD; - last_scsi_type(host_index)[0xf] = IM_IMM_CMD; - imm_command = inl(IM_CMD_REG(host_index)); + reset_status(shpnt) = IM_RESET_IN_PROGRESS; + last_scsi_command(shpnt)[0xf] = IM_RESET_IMM_CMD; + last_scsi_type(shpnt)[0xf] = IM_IMM_CMD; + imm_command = inl(IM_CMD_REG(shpnt)); imm_command &= (unsigned long) (0xffff0000); /* mask reserved stuff */ imm_command |= (unsigned long) (IM_RESET_IMM_CMD); /* must wait for attention reg not busy */ while (1) { - if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) + if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) break; spin_unlock_irq(shpnt->host_lock); yield(); spin_lock_irq(shpnt->host_lock); } /*write registers and enable system interrupts */ - outl(imm_command, IM_CMD_REG(host_index)); - outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(host_index)); + outl(imm_command, IM_CMD_REG(shpnt)); + outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(shpnt)); /* wait for interrupt finished or intr_stat register to be set, as the * interrupt will not be executed, while we are in here! */ /* FIXME: This is really really icky we so want a sleeping version of this ! */ - while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(host_index)) & 0x8f) != 0x8f)) { + while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(shpnt)) & 0x8f) != 0x8f)) { udelay((1 + 999 / HZ) * 1000); barrier(); } /* if reset did not complete, just return an error */ if (!ticks) { printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY); - reset_status(host_index) = IM_RESET_FINISHED_FAIL; + reset_status(shpnt) = IM_RESET_FINISHED_FAIL; return FAILED; } - if ((inb(IM_INTR_REG(host_index)) & 0x8f) == 0x8f) { + if ((inb(IM_INTR_REG(shpnt)) & 0x8f) == 0x8f) { /* analysis done by this routine and not by the intr-routine */ - if (inb(IM_INTR_REG(host_index)) == 0xaf) - reset_status(host_index) = IM_RESET_FINISHED_OK_NO_INT; - else if (inb(IM_INTR_REG(host_index)) == 0xcf) - reset_status(host_index) = IM_RESET_FINISHED_FAIL; + if (inb(IM_INTR_REG(shpnt)) == 0xaf) + reset_status(shpnt) = IM_RESET_FINISHED_OK_NO_INT; + else if (inb(IM_INTR_REG(shpnt)) == 0xcf) + reset_status(shpnt) = IM_RESET_FINISHED_FAIL; else /* failed, 4get it */ - reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS_NO_INT; - outb(IM_EOI | 0xf, IM_ATTN_REG(host_index)); + reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS_NO_INT; + outb(IM_EOI | 0xf, IM_ATTN_REG(shpnt)); } /* if reset failed, just return an error */ - if (reset_status(host_index) == IM_RESET_FINISHED_FAIL) { + if (reset_status(shpnt) == IM_RESET_FINISHED_FAIL) { printk(KERN_ERR "IBM MCA SCSI: reset failed.\n"); return FAILED; } @@ -2308,9 +2179,9 @@ /* so reset finished ok - call outstanding done's, and return success */ printk(KERN_INFO "IBM MCA SCSI: Reset successfully completed.\n"); for (i = 0; i < MAX_LOG_DEV; i++) { - cmd_aid = ld(host_index)[i].cmd; + cmd_aid = ld(shpnt)[i].cmd; if (cmd_aid && cmd_aid->scsi_done) { - ld(host_index)[i].cmd = NULL; + ld(shpnt)[i].cmd = NULL; cmd_aid->result = DID_RESET << 16; } } @@ -2351,46 +2222,46 @@ } /* calculate percentage of total accesses on a ldn */ -static int ldn_access_load(int host_index, int ldn) +static int ldn_access_load(struct Scsi_Host *shpnt, int ldn) { - if (IBM_DS(host_index).total_accesses == 0) + if (IBM_DS(shpnt).total_accesses == 0) return (0); - if (IBM_DS(host_index).ldn_access[ldn] == 0) + if (IBM_DS(shpnt).ldn_access[ldn] == 0) return (0); - return (IBM_DS(host_index).ldn_access[ldn] * 100) / IBM_DS(host_index).total_accesses; + return (IBM_DS(shpnt).ldn_access[ldn] * 100) / IBM_DS(shpnt).total_accesses; } /* calculate total amount of r/w-accesses */ -static int ldn_access_total_read_write(int host_index) +static int ldn_access_total_read_write(struct Scsi_Host *shpnt) { int a; int i; a = 0; for (i = 0; i <= MAX_LOG_DEV; i++) - a += IBM_DS(host_index).ldn_read_access[i] + IBM_DS(host_index).ldn_write_access[i]; + a += IBM_DS(shpnt).ldn_read_access[i] + IBM_DS(shpnt).ldn_write_access[i]; return (a); } -static int ldn_access_total_inquiry(int host_index) +static int ldn_access_total_inquiry(struct Scsi_Host *shpnt) { int a; int i; a = 0; for (i = 0; i <= MAX_LOG_DEV; i++) - a += IBM_DS(host_index).ldn_inquiry_access[i]; + a += IBM_DS(shpnt).ldn_inquiry_access[i]; return (a); } -static int ldn_access_total_modeselect(int host_index) +static int ldn_access_total_modeselect(struct Scsi_Host *shpnt) { int a; int i; a = 0; for (i = 0; i <= MAX_LOG_DEV; i++) - a += IBM_DS(host_index).ldn_modeselect_access[i]; + a += IBM_DS(shpnt).ldn_modeselect_access[i]; return (a); } @@ -2398,19 +2269,14 @@ static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout) { int len = 0; - int i, id, lun, host_index; + int i, id, lun; unsigned long flags; int max_pun; - for (i = 0; hosts[i] && hosts[i] != shpnt; i++); - spin_lock_irqsave(hosts[i]->host_lock, flags); /* Check it */ - host_index = i; - if (!shpnt) { - len += sprintf(buffer + len, "\nIBM MCA SCSI: Can't find adapter"); - return len; - } - max_pun = subsystem_maxid(host_index); + spin_lock_irqsave(shpnt->host_lock, flags); /* Check it */ + + max_pun = subsystem_maxid(shpnt); len += sprintf(buffer + len, "\n IBM-SCSI-Subsystem-Linux-Driver, Version %s\n\n\n", IBMMCA_SCSI_DRIVER_VERSION); len += sprintf(buffer + len, " SCSI Access-Statistics:\n"); @@ -2421,40 +2287,40 @@ len += sprintf(buffer + len, " Multiple LUN probing.....: No\n"); #endif len += sprintf(buffer + len, " This Hostnumber..........: %d\n", shpnt->host_no); - len += sprintf(buffer + len, " Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(host_index))); + len += sprintf(buffer + len, " Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(shpnt))); len += sprintf(buffer + len, " (Shared) IRQ.............: %d\n", IM_IRQ); - len += sprintf(buffer + len, " Total Interrupts.........: %d\n", IBM_DS(host_index).total_interrupts); - len += sprintf(buffer + len, " Total SCSI Accesses......: %d\n", IBM_DS(host_index).total_accesses); - len += sprintf(buffer + len, " Total short SCBs.........: %d\n", IBM_DS(host_index).scbs); - len += sprintf(buffer + len, " Total long SCBs..........: %d\n", IBM_DS(host_index).long_scbs); - len += sprintf(buffer + len, " Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(host_index)); - len += sprintf(buffer + len, " Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(host_index)); - len += sprintf(buffer + len, " Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(host_index)); - len += sprintf(buffer + len, " Total SCSI other cmds..: %d\n", IBM_DS(host_index).total_accesses - ldn_access_total_read_write(host_index) - - ldn_access_total_modeselect(host_index) - - ldn_access_total_inquiry(host_index)); - len += sprintf(buffer + len, " Total SCSI command fails.: %d\n\n", IBM_DS(host_index).total_errors); + len += sprintf(buffer + len, " Total Interrupts.........: %d\n", IBM_DS(shpnt).total_interrupts); + len += sprintf(buffer + len, " Total SCSI Accesses......: %d\n", IBM_DS(shpnt).total_accesses); + len += sprintf(buffer + len, " Total short SCBs.........: %d\n", IBM_DS(shpnt).scbs); + len += sprintf(buffer + len, " Total long SCBs..........: %d\n", IBM_DS(shpnt).long_scbs); + len += sprintf(buffer + len, " Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(shpnt)); + len += sprintf(buffer + len, " Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(shpnt)); + len += sprintf(buffer + len, " Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(shpnt)); + len += sprintf(buffer + len, " Total SCSI other cmds..: %d\n", IBM_DS(shpnt).total_accesses - ldn_access_total_read_write(shpnt) + - ldn_access_total_modeselect(shpnt) + - ldn_access_total_inquiry(shpnt)); + len += sprintf(buffer + len, " Total SCSI command fails.: %d\n\n", IBM_DS(shpnt).total_errors); len += sprintf(buffer + len, " Logical-Device-Number (LDN) Access-Statistics:\n"); len += sprintf(buffer + len, " LDN | Accesses [%%] | READ | WRITE | ASSIGNMENTS\n"); len += sprintf(buffer + len, " -----|--------------|-----------|-----------|--------------\n"); for (i = 0; i <= MAX_LOG_DEV; i++) - len += sprintf(buffer + len, " %2X | %3d | %8d | %8d | %8d\n", i, ldn_access_load(host_index, i), IBM_DS(host_index).ldn_read_access[i], IBM_DS(host_index).ldn_write_access[i], IBM_DS(host_index).ldn_assignments[i]); + len += sprintf(buffer + len, " %2X | %3d | %8d | %8d | %8d\n", i, ldn_access_load(shpnt, i), IBM_DS(shpnt).ldn_read_access[i], IBM_DS(shpnt).ldn_write_access[i], IBM_DS(shpnt).ldn_assignments[i]); len += sprintf(buffer + len, " -----------------------------------------------------------\n\n"); len += sprintf(buffer + len, " Dynamical-LDN-Assignment-Statistics:\n"); - len += sprintf(buffer + len, " Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(host_index).total_scsi_devices); - len += sprintf(buffer + len, " Dynamical Assignment necessary...: %s\n", IBM_DS(host_index).dyn_flag ? "Yes" : "No "); - len += sprintf(buffer + len, " Next LDN to be assigned..........: 0x%x\n", next_ldn(host_index)); - len += sprintf(buffer + len, " Dynamical assignments done yet...: %d\n", IBM_DS(host_index).dynamical_assignments); + len += sprintf(buffer + len, " Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(shpnt).total_scsi_devices); + len += sprintf(buffer + len, " Dynamical Assignment necessary...: %s\n", IBM_DS(shpnt).dyn_flag ? "Yes" : "No "); + len += sprintf(buffer + len, " Next LDN to be assigned..........: 0x%x\n", next_ldn(shpnt)); + len += sprintf(buffer + len, " Dynamical assignments done yet...: %d\n", IBM_DS(shpnt).dynamical_assignments); len += sprintf(buffer + len, "\n Current SCSI-Device-Mapping:\n"); len += sprintf(buffer + len, " Physical SCSI-Device Map Logical SCSI-Device Map\n"); len += sprintf(buffer + len, " ID\\LUN 0 1 2 3 4 5 6 7 ID\\LUN 0 1 2 3 4 5 6 7\n"); for (id = 0; id < max_pun; id++) { len += sprintf(buffer + len, " %2d ", id); for (lun = 0; lun < 8; lun++) - len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(host_index)[id][lun])); + len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(shpnt)[id][lun])); len += sprintf(buffer + len, " %2d ", id); for (lun = 0; lun < 8; lun++) - len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(host_index)[id][lun])); + len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(shpnt)[id][lun])); len += sprintf(buffer + len, "\n"); } @@ -2488,20 +2354,31 @@ __setup("ibmmcascsi=", option_setup); -static struct scsi_host_template driver_template = { - .proc_name = "ibmmca", - .proc_info = ibmmca_proc_info, - .name = "IBM SCSI-Subsystem", - .detect = ibmmca_detect, - .release = ibmmca_release, - .queuecommand = ibmmca_queuecommand, - .eh_abort_handler = ibmmca_abort, - .eh_host_reset_handler = ibmmca_host_reset, - .bios_param = ibmmca_biosparam, - .can_queue = 16, - .this_id = 7, - .sg_tablesize = 16, - .cmd_per_lun = 1, - .use_clustering = ENABLE_CLUSTERING, +static struct mca_driver ibmmca_driver = { + .id_table = ibmmca_id_table, + .driver = { + .name = "ibmmca", + .bus = &mca_bus_type, + .probe = ibmmca_probe, + .remove = __devexit_p(ibmmca_remove), + }, }; -#include "scsi_module.c" + +static int __init ibmmca_init(void) +{ +#ifdef MODULE + /* If the driver is run as module, read from conf.modules or cmd-line */ + if (boot_options) + option_setup(boot_options); +#endif + + return mca_register_driver_integrated(&ibmmca_driver, MCA_INTEGSCSI); +} + +static void __exit ibmmca_exit(void) +{ + mca_unregister_driver(&ibmmca_driver); +} + +module_init(ibmmca_init); +module_exit(ibmmca_exit); diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.h linux-2.6.22-591/drivers/scsi/ibmmca.h --- linux-2.6.22-570/drivers/scsi/ibmmca.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ibmmca.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,21 +0,0 @@ -/* - * Low Level Driver for the IBM Microchannel SCSI Subsystem - * (Headerfile, see Documentation/scsi/ibmmca.txt for description of the - * IBM MCA SCSI-driver. - * For use under the GNU General Public License within the Linux-kernel project. - * This include file works only correctly with kernel 2.4.0 or higher!!! */ - -#ifndef _IBMMCA_H -#define _IBMMCA_H - -/* Common forward declarations for all Linux-versions: */ - -/* Interfaces to the midlevel Linux SCSI driver */ -static int ibmmca_detect (struct scsi_host_template *); -static int ibmmca_release (struct Scsi_Host *); -static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *)); -static int ibmmca_abort (Scsi_Cmnd *); -static int ibmmca_host_reset (Scsi_Cmnd *); -static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *); - -#endif /* _IBMMCA_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.c --- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -173,8 +173,7 @@ } } if (in_use) - printk(KERN_WARNING - "ibmvscsi: releasing event pool with %d " + dev_warn(hostdata->dev, "releasing event pool with %d " "events still in use?\n", in_use); kfree(pool->events); dma_free_coherent(hostdata->dev, @@ -210,14 +209,12 @@ struct srp_event_struct *evt) { if (!valid_event_struct(pool, evt)) { - printk(KERN_ERR - "ibmvscsi: Freeing invalid event_struct %p " + dev_err(evt->hostdata->dev, "Freeing invalid event_struct %p " "(not in pool %p)\n", evt, pool->events); return; } if (atomic_inc_return(&evt->free) != 1) { - printk(KERN_ERR - "ibmvscsi: Freeing event_struct %p " + dev_err(evt->hostdata->dev, "Freeing event_struct %p " "which is not in use!\n", evt); return; } @@ -408,13 +405,6 @@ return 1; } - if (sg_mapped > SG_ALL) { - printk(KERN_ERR - "ibmvscsi: More than %d mapped sg entries, got %d\n", - SG_ALL, sg_mapped); - return 0; - } - indirect->table_desc.va = 0; indirect->table_desc.len = sg_mapped * sizeof(struct srp_direct_buf); indirect->table_desc.key = 0; @@ -433,10 +423,9 @@ SG_ALL * sizeof(struct srp_direct_buf), &evt_struct->ext_list_token, 0); if (!evt_struct->ext_list) { - printk(KERN_ERR - "ibmvscsi: Can't allocate memory for indirect table\n"); + sdev_printk(KERN_ERR, cmd->device, + "Can't allocate memory for indirect table\n"); return 0; - } } @@ -471,8 +460,8 @@ cmd->request_bufflen, DMA_BIDIRECTIONAL); if (dma_mapping_error(data->va)) { - printk(KERN_ERR - "ibmvscsi: Unable to map request_buffer for command!\n"); + sdev_printk(KERN_ERR, cmd->device, + "Unable to map request_buffer for command!\n"); return 0; } data->len = cmd->request_bufflen; @@ -503,12 +492,12 @@ case DMA_NONE: return 1; case DMA_BIDIRECTIONAL: - printk(KERN_ERR - "ibmvscsi: Can't map DMA_BIDIRECTIONAL to read/write\n"); + sdev_printk(KERN_ERR, cmd->device, + "Can't map DMA_BIDIRECTIONAL to read/write\n"); return 0; default: - printk(KERN_ERR - "ibmvscsi: Unknown data direction 0x%02x; can't map!\n", + sdev_printk(KERN_ERR, cmd->device, + "Unknown data direction 0x%02x; can't map!\n", cmd->sc_data_direction); return 0; } @@ -520,6 +509,70 @@ return map_single_data(cmd, srp_cmd, dev); } +/** + * purge_requests: Our virtual adapter just shut down. purge any sent requests + * @hostdata: the adapter + */ +static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) +{ + struct srp_event_struct *tmp_evt, *pos; + unsigned long flags; + + spin_lock_irqsave(hostdata->host->host_lock, flags); + list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) { + list_del(&tmp_evt->list); + del_timer(&tmp_evt->timer); + if (tmp_evt->cmnd) { + tmp_evt->cmnd->result = (error_code << 16); + unmap_cmd_data(&tmp_evt->iu.srp.cmd, + tmp_evt, + tmp_evt->hostdata->dev); + if (tmp_evt->cmnd_done) + tmp_evt->cmnd_done(tmp_evt->cmnd); + } else if (tmp_evt->done) + tmp_evt->done(tmp_evt); + free_event_struct(&tmp_evt->hostdata->pool, tmp_evt); + } + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +} + +/** + * ibmvscsi_reset_host - Reset the connection to the server + * @hostdata: struct ibmvscsi_host_data to reset +*/ +static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata) +{ + scsi_block_requests(hostdata->host); + atomic_set(&hostdata->request_limit, 0); + + purge_requests(hostdata, DID_ERROR); + if ((ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata)) || + (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0)) || + (vio_enable_interrupts(to_vio_dev(hostdata->dev)))) { + atomic_set(&hostdata->request_limit, -1); + dev_err(hostdata->dev, "error after reset\n"); + } + + scsi_unblock_requests(hostdata->host); +} + +/** + * ibmvscsi_timeout - Internal command timeout handler + * @evt_struct: struct srp_event_struct that timed out + * + * Called when an internally generated command times out +*/ +static void ibmvscsi_timeout(struct srp_event_struct *evt_struct) +{ + struct ibmvscsi_host_data *hostdata = evt_struct->hostdata; + + dev_err(hostdata->dev, "Command timed out (%x). Resetting connection\n", + evt_struct->iu.srp.cmd.opcode); + + ibmvscsi_reset_host(hostdata); +} + + /* ------------------------------------------------------------ * Routines for sending and receiving SRPs */ @@ -527,12 +580,14 @@ * ibmvscsi_send_srp_event: - Transforms event to u64 array and calls send_crq() * @evt_struct: evt_struct to be sent * @hostdata: ibmvscsi_host_data of host + * @timeout: timeout in seconds - 0 means do not time command * * Returns the value returned from ibmvscsi_send_crq(). (Zero for success) * Note that this routine assumes that host_lock is held for synchronization */ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct, - struct ibmvscsi_host_data *hostdata) + struct ibmvscsi_host_data *hostdata, + unsigned long timeout) { u64 *crq_as_u64 = (u64 *) &evt_struct->crq; int request_status; @@ -588,12 +643,20 @@ */ list_add_tail(&evt_struct->list, &hostdata->sent); + init_timer(&evt_struct->timer); + if (timeout) { + evt_struct->timer.data = (unsigned long) evt_struct; + evt_struct->timer.expires = jiffies + (timeout * HZ); + evt_struct->timer.function = (void (*)(unsigned long))ibmvscsi_timeout; + add_timer(&evt_struct->timer); + } + if ((rc = ibmvscsi_send_crq(hostdata, crq_as_u64[0], crq_as_u64[1])) != 0) { list_del(&evt_struct->list); + del_timer(&evt_struct->timer); - printk(KERN_ERR "ibmvscsi: send error %d\n", - rc); + dev_err(hostdata->dev, "send error %d\n", rc); atomic_inc(&hostdata->request_limit); goto send_error; } @@ -634,9 +697,8 @@ if (unlikely(rsp->opcode != SRP_RSP)) { if (printk_ratelimit()) - printk(KERN_WARNING - "ibmvscsi: bad SRP RSP type %d\n", - rsp->opcode); + dev_warn(evt_struct->hostdata->dev, + "bad SRP RSP type %d\n", rsp->opcode); } if (cmnd) { @@ -697,7 +759,7 @@ srp_cmd->lun = ((u64) lun) << 48; if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { - printk(KERN_ERR "ibmvscsi: couldn't convert cmd to srp_cmd\n"); + sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n"); free_event_struct(&hostdata->pool, evt_struct); return SCSI_MLQUEUE_HOST_BUSY; } @@ -722,7 +784,7 @@ offsetof(struct srp_indirect_buf, desc_list); } - return ibmvscsi_send_srp_event(evt_struct, hostdata); + return ibmvscsi_send_srp_event(evt_struct, hostdata, 0); } /* ------------------------------------------------------------ @@ -744,10 +806,10 @@ DMA_BIDIRECTIONAL); if (evt_struct->xfer_iu->mad.adapter_info.common.status) { - printk("ibmvscsi: error %d getting adapter info\n", + dev_err(hostdata->dev, "error %d getting adapter info\n", evt_struct->xfer_iu->mad.adapter_info.common.status); } else { - printk("ibmvscsi: host srp version: %s, " + dev_info(hostdata->dev, "host srp version: %s, " "host partition %s (%d), OS %d, max io %u\n", hostdata->madapter_info.srp_version, hostdata->madapter_info.partition_name, @@ -761,10 +823,9 @@ if (hostdata->madapter_info.os_type == 3 && strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) { - printk("ibmvscsi: host (Ver. %s) doesn't support large" - "transfers\n", + dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n", hostdata->madapter_info.srp_version); - printk("ibmvscsi: limiting scatterlists to %d\n", + dev_err(hostdata->dev, "limiting scatterlists to %d\n", MAX_INDIRECT_BUFS); hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; } @@ -784,12 +845,13 @@ { struct viosrp_adapter_info *req; struct srp_event_struct *evt_struct; + unsigned long flags; dma_addr_t addr; evt_struct = get_event_struct(&hostdata->pool); if (!evt_struct) { - printk(KERN_ERR "ibmvscsi: couldn't allocate an event " - "for ADAPTER_INFO_REQ!\n"); + dev_err(hostdata->dev, + "couldn't allocate an event for ADAPTER_INFO_REQ!\n"); return; } @@ -809,20 +871,20 @@ DMA_BIDIRECTIONAL); if (dma_mapping_error(req->buffer)) { - printk(KERN_ERR - "ibmvscsi: Unable to map request_buffer " - "for adapter_info!\n"); + dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n"); free_event_struct(&hostdata->pool, evt_struct); return; } - if (ibmvscsi_send_srp_event(evt_struct, hostdata)) { - printk(KERN_ERR "ibmvscsi: couldn't send ADAPTER_INFO_REQ!\n"); + spin_lock_irqsave(hostdata->host->host_lock, flags); + if (ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2)) { + dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n"); dma_unmap_single(hostdata->dev, addr, sizeof(hostdata->madapter_info), DMA_BIDIRECTIONAL); } + spin_unlock_irqrestore(hostdata->host->host_lock, flags); }; /** @@ -839,24 +901,23 @@ case SRP_LOGIN_RSP: /* it worked! */ break; case SRP_LOGIN_REJ: /* refused! */ - printk(KERN_INFO "ibmvscsi: SRP_LOGIN_REJ reason %u\n", + dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n", evt_struct->xfer_iu->srp.login_rej.reason); /* Login failed. */ atomic_set(&hostdata->request_limit, -1); return; default: - printk(KERN_ERR - "ibmvscsi: Invalid login response typecode 0x%02x!\n", + dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n", evt_struct->xfer_iu->srp.login_rsp.opcode); /* Login failed. */ atomic_set(&hostdata->request_limit, -1); return; } - printk(KERN_INFO "ibmvscsi: SRP_LOGIN succeeded\n"); + dev_info(hostdata->dev, "SRP_LOGIN succeeded\n"); if (evt_struct->xfer_iu->srp.login_rsp.req_lim_delta < 0) - printk(KERN_ERR "ibmvscsi: Invalid request_limit.\n"); + dev_err(hostdata->dev, "Invalid request_limit.\n"); /* Now we know what the real request-limit is. * This value is set rather than added to request_limit because @@ -885,8 +946,7 @@ struct srp_login_req *login; struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool); if (!evt_struct) { - printk(KERN_ERR - "ibmvscsi: couldn't allocate an event for login req!\n"); + dev_err(hostdata->dev, "couldn't allocate an event for login req!\n"); return FAILED; } @@ -907,9 +967,9 @@ */ atomic_set(&hostdata->request_limit, 1); - rc = ibmvscsi_send_srp_event(evt_struct, hostdata); + rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2); spin_unlock_irqrestore(hostdata->host->host_lock, flags); - printk("ibmvscsic: sent SRP login\n"); + dev_info(hostdata->dev, "sent SRP login\n"); return rc; }; @@ -958,13 +1018,13 @@ if (!found_evt) { spin_unlock_irqrestore(hostdata->host->host_lock, flags); - return FAILED; + return SUCCESS; } evt = get_event_struct(&hostdata->pool); if (evt == NULL) { spin_unlock_irqrestore(hostdata->host->host_lock, flags); - printk(KERN_ERR "ibmvscsi: failed to allocate abort event\n"); + sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n"); return FAILED; } @@ -982,15 +1042,16 @@ tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; tsk_mgmt->task_tag = (u64) found_evt; - printk(KERN_INFO "ibmvscsi: aborting command. lun 0x%lx, tag 0x%lx\n", + sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n", tsk_mgmt->lun, tsk_mgmt->task_tag); evt->sync_srp = &srp_rsp; init_completion(&evt->comp); - rsp_rc = ibmvscsi_send_srp_event(evt, hostdata); + rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (rsp_rc != 0) { - printk(KERN_ERR "ibmvscsi: failed to send abort() event\n"); + sdev_printk(KERN_ERR, cmd->device, + "failed to send abort() event. rc=%d\n", rsp_rc); return FAILED; } @@ -999,8 +1060,7 @@ /* make sure we got a good response */ if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) { if (printk_ratelimit()) - printk(KERN_WARNING - "ibmvscsi: abort bad SRP RSP type %d\n", + sdev_printk(KERN_WARNING, cmd->device, "abort bad SRP RSP type %d\n", srp_rsp.srp.rsp.opcode); return FAILED; } @@ -1012,10 +1072,9 @@ if (rsp_rc) { if (printk_ratelimit()) - printk(KERN_WARNING - "ibmvscsi: abort code %d for task tag 0x%lx\n", - rsp_rc, - tsk_mgmt->task_tag); + sdev_printk(KERN_WARNING, cmd->device, + "abort code %d for task tag 0x%lx\n", + rsp_rc, tsk_mgmt->task_tag); return FAILED; } @@ -1034,14 +1093,12 @@ if (found_evt == NULL) { spin_unlock_irqrestore(hostdata->host->host_lock, flags); - printk(KERN_INFO - "ibmvscsi: aborted task tag 0x%lx completed\n", + sdev_printk(KERN_INFO, cmd->device, "aborted task tag 0x%lx completed\n", tsk_mgmt->task_tag); return SUCCESS; } - printk(KERN_INFO - "ibmvscsi: successfully aborted task tag 0x%lx\n", + sdev_printk(KERN_INFO, cmd->device, "successfully aborted task tag 0x%lx\n", tsk_mgmt->task_tag); cmd->result = (DID_ABORT << 16); @@ -1076,7 +1133,7 @@ evt = get_event_struct(&hostdata->pool); if (evt == NULL) { spin_unlock_irqrestore(hostdata->host->host_lock, flags); - printk(KERN_ERR "ibmvscsi: failed to allocate reset event\n"); + sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n"); return FAILED; } @@ -1093,15 +1150,16 @@ tsk_mgmt->lun = ((u64) lun) << 48; tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; - printk(KERN_INFO "ibmvscsi: resetting device. lun 0x%lx\n", + sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n", tsk_mgmt->lun); evt->sync_srp = &srp_rsp; init_completion(&evt->comp); - rsp_rc = ibmvscsi_send_srp_event(evt, hostdata); + rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (rsp_rc != 0) { - printk(KERN_ERR "ibmvscsi: failed to send reset event\n"); + sdev_printk(KERN_ERR, cmd->device, + "failed to send reset event. rc=%d\n", rsp_rc); return FAILED; } @@ -1110,8 +1168,7 @@ /* make sure we got a good response */ if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) { if (printk_ratelimit()) - printk(KERN_WARNING - "ibmvscsi: reset bad SRP RSP type %d\n", + sdev_printk(KERN_WARNING, cmd->device, "reset bad SRP RSP type %d\n", srp_rsp.srp.rsp.opcode); return FAILED; } @@ -1123,8 +1180,8 @@ if (rsp_rc) { if (printk_ratelimit()) - printk(KERN_WARNING - "ibmvscsi: reset code %d for task tag 0x%lx\n", + sdev_printk(KERN_WARNING, cmd->device, + "reset code %d for task tag 0x%lx\n", rsp_rc, tsk_mgmt->task_tag); return FAILED; } @@ -1154,32 +1211,30 @@ } /** - * purge_requests: Our virtual adapter just shut down. purge any sent requests - * @hostdata: the adapter - */ -static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) + * ibmvscsi_eh_host_reset_handler - Reset the connection to the server + * @cmd: struct scsi_cmnd having problems +*/ +static int ibmvscsi_eh_host_reset_handler(struct scsi_cmnd *cmd) { - struct srp_event_struct *tmp_evt, *pos; - unsigned long flags; + unsigned long wait_switch = 0; + struct ibmvscsi_host_data *hostdata = + (struct ibmvscsi_host_data *)cmd->device->host->hostdata; - spin_lock_irqsave(hostdata->host->host_lock, flags); - list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) { - list_del(&tmp_evt->list); - if (tmp_evt->cmnd) { - tmp_evt->cmnd->result = (error_code << 16); - unmap_cmd_data(&tmp_evt->iu.srp.cmd, - tmp_evt, - tmp_evt->hostdata->dev); - if (tmp_evt->cmnd_done) - tmp_evt->cmnd_done(tmp_evt->cmnd); - } else { - if (tmp_evt->done) { - tmp_evt->done(tmp_evt); - } - } - free_event_struct(&tmp_evt->hostdata->pool, tmp_evt); + dev_err(hostdata->dev, "Resetting connection due to error recovery\n"); + + ibmvscsi_reset_host(hostdata); + + for (wait_switch = jiffies + (init_timeout * HZ); + time_before(jiffies, wait_switch) && + atomic_read(&hostdata->request_limit) < 2;) { + + msleep(10); } - spin_unlock_irqrestore(hostdata->host->host_lock, flags); + + if (atomic_read(&hostdata->request_limit) <= 0) + return FAILED; + + return SUCCESS; } /** @@ -1191,6 +1246,7 @@ void ibmvscsi_handle_crq(struct viosrp_crq *crq, struct ibmvscsi_host_data *hostdata) { + long rc; unsigned long flags; struct srp_event_struct *evt_struct = (struct srp_event_struct *)crq->IU_data_ptr; @@ -1198,27 +1254,25 @@ case 0xC0: /* initialization */ switch (crq->format) { case 0x01: /* Initialization message */ - printk(KERN_INFO "ibmvscsi: partner initialized\n"); + dev_info(hostdata->dev, "partner initialized\n"); /* Send back a response */ - if (ibmvscsi_send_crq(hostdata, - 0xC002000000000000LL, 0) == 0) { + if ((rc = ibmvscsi_send_crq(hostdata, + 0xC002000000000000LL, 0)) == 0) { /* Now login */ send_srp_login(hostdata); } else { - printk(KERN_ERR - "ibmvscsi: Unable to send init rsp\n"); + dev_err(hostdata->dev, "Unable to send init rsp. rc=%ld\n", rc); } break; case 0x02: /* Initialization response */ - printk(KERN_INFO - "ibmvscsi: partner initialization complete\n"); + dev_info(hostdata->dev, "partner initialization complete\n"); /* Now login */ send_srp_login(hostdata); break; default: - printk(KERN_ERR "ibmvscsi: unknown crq message type\n"); + dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format); } return; case 0xFF: /* Hypervisor telling us the connection is closed */ @@ -1226,8 +1280,7 @@ atomic_set(&hostdata->request_limit, 0); if (crq->format == 0x06) { /* We need to re-setup the interpartition connection */ - printk(KERN_INFO - "ibmvscsi: Re-enabling adapter!\n"); + dev_info(hostdata->dev, "Re-enabling adapter!\n"); purge_requests(hostdata, DID_REQUEUE); if ((ibmvscsi_reenable_crq_queue(&hostdata->queue, hostdata)) || @@ -1235,13 +1288,10 @@ 0xC001000000000000LL, 0))) { atomic_set(&hostdata->request_limit, -1); - printk(KERN_ERR - "ibmvscsi: error after" - " enable\n"); + dev_err(hostdata->dev, "error after enable\n"); } } else { - printk(KERN_INFO - "ibmvscsi: Virtual adapter failed rc %d!\n", + dev_err(hostdata->dev, "Virtual adapter failed rc %d!\n", crq->format); purge_requests(hostdata, DID_ERROR); @@ -1251,8 +1301,7 @@ 0xC001000000000000LL, 0))) { atomic_set(&hostdata->request_limit, -1); - printk(KERN_ERR - "ibmvscsi: error after reset\n"); + dev_err(hostdata->dev, "error after reset\n"); } } scsi_unblock_requests(hostdata->host); @@ -1260,8 +1309,7 @@ case 0x80: /* real payload */ break; default: - printk(KERN_ERR - "ibmvscsi: got an invalid message type 0x%02x\n", + dev_err(hostdata->dev, "got an invalid message type 0x%02x\n", crq->valid); return; } @@ -1271,15 +1319,13 @@ * actually sent */ if (!valid_event_struct(&hostdata->pool, evt_struct)) { - printk(KERN_ERR - "ibmvscsi: returned correlation_token 0x%p is invalid!\n", + dev_err(hostdata->dev, "returned correlation_token 0x%p is invalid!\n", (void *)crq->IU_data_ptr); return; } if (atomic_read(&evt_struct->free)) { - printk(KERN_ERR - "ibmvscsi: received duplicate correlation_token 0x%p!\n", + dev_err(hostdata->dev, "received duplicate correlation_token 0x%p!\n", (void *)crq->IU_data_ptr); return; } @@ -1288,11 +1334,12 @@ atomic_add(evt_struct->xfer_iu->srp.rsp.req_lim_delta, &hostdata->request_limit); + del_timer(&evt_struct->timer); + if (evt_struct->done) evt_struct->done(evt_struct); else - printk(KERN_ERR - "ibmvscsi: returned done() is NULL; not running it!\n"); + dev_err(hostdata->dev, "returned done() is NULL; not running it!\n"); /* * Lock the host_lock before messing with these structures, since we @@ -1313,13 +1360,13 @@ { struct viosrp_host_config *host_config; struct srp_event_struct *evt_struct; + unsigned long flags; dma_addr_t addr; int rc; evt_struct = get_event_struct(&hostdata->pool); if (!evt_struct) { - printk(KERN_ERR - "ibmvscsi: could't allocate event for HOST_CONFIG!\n"); + dev_err(hostdata->dev, "couldn't allocate event for HOST_CONFIG!\n"); return -1; } @@ -1339,14 +1386,15 @@ DMA_BIDIRECTIONAL); if (dma_mapping_error(host_config->buffer)) { - printk(KERN_ERR - "ibmvscsi: dma_mapping error " "getting host config\n"); + dev_err(hostdata->dev, "dma_mapping error getting host config\n"); free_event_struct(&hostdata->pool, evt_struct); return -1; } init_completion(&evt_struct->comp); - rc = ibmvscsi_send_srp_event(evt_struct, hostdata); + spin_lock_irqsave(hostdata->host->host_lock, flags); + rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (rc == 0) wait_for_completion(&evt_struct->comp); dma_unmap_single(hostdata->dev, addr, length, DMA_BIDIRECTIONAL); @@ -1375,6 +1423,23 @@ return 0; } +/** + * ibmvscsi_change_queue_depth - Change the device's queue depth + * @sdev: scsi device struct + * @qdepth: depth to set + * + * Return value: + * actual depth set + **/ +static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth) +{ + if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN) + qdepth = IBMVSCSI_MAX_CMDS_PER_LUN; + + scsi_adjust_queue_depth(sdev, 0, qdepth); + return sdev->queue_depth; +} + /* ------------------------------------------------------------ * sysfs attributes */ @@ -1520,7 +1585,9 @@ .queuecommand = ibmvscsi_queuecommand, .eh_abort_handler = ibmvscsi_eh_abort_handler, .eh_device_reset_handler = ibmvscsi_eh_device_reset_handler, + .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler, .slave_configure = ibmvscsi_slave_configure, + .change_queue_depth = ibmvscsi_change_queue_depth, .cmd_per_lun = 16, .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, .this_id = -1, @@ -1545,7 +1612,7 @@ driver_template.can_queue = max_requests; host = scsi_host_alloc(&driver_template, sizeof(*hostdata)); if (!host) { - printk(KERN_ERR "ibmvscsi: couldn't allocate host data\n"); + dev_err(&vdev->dev, "couldn't allocate host data\n"); goto scsi_host_alloc_failed; } @@ -1559,11 +1626,11 @@ rc = ibmvscsi_init_crq_queue(&hostdata->queue, hostdata, max_requests); if (rc != 0 && rc != H_RESOURCE) { - printk(KERN_ERR "ibmvscsi: couldn't initialize crq\n"); + dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc); goto init_crq_failed; } if (initialize_event_pool(&hostdata->pool, max_requests, hostdata) != 0) { - printk(KERN_ERR "ibmvscsi: couldn't initialize event pool\n"); + dev_err(&vdev->dev, "couldn't initialize event pool\n"); goto init_pool_failed; } diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.h --- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.h 2007-12-21 15:36:12.000000000 -0500 @@ -45,6 +45,7 @@ #define MAX_INDIRECT_BUFS 10 #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100 +#define IBMVSCSI_MAX_CMDS_PER_LUN 64 /* ------------------------------------------------------------ * Data Structures @@ -69,6 +70,7 @@ union viosrp_iu iu; void (*cmnd_done) (struct scsi_cmnd *); struct completion comp; + struct timer_list timer; union viosrp_iu *sync_srp; struct srp_direct_buf *ext_list; dma_addr_t ext_list_token; diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c linux-2.6.22-591/drivers/scsi/ibmvscsi/rpa_vscsi.c --- linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ibmvscsi/rpa_vscsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -177,7 +177,7 @@ memset(&hostdata->madapter_info, 0x00, sizeof(hostdata->madapter_info)); - printk(KERN_INFO "rpa_vscsi: SPR_VERSION: %s\n", SRP_VERSION); + dev_info(hostdata->dev, "SRP_VERSION: %s\n", SRP_VERSION); strcpy(hostdata->madapter_info.srp_version, SRP_VERSION); strncpy(hostdata->madapter_info.partition_name, partition_name, @@ -232,25 +232,24 @@ if (rc == 2) { /* Adapter is good, but other end is not ready */ - printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n"); + dev_warn(hostdata->dev, "Partner adapter not ready\n"); retrc = 0; } else if (rc != 0) { - printk(KERN_WARNING "ibmvscsi: Error %d opening adapter\n", rc); + dev_warn(hostdata->dev, "Error %d opening adapter\n", rc); goto reg_crq_failed; } if (request_irq(vdev->irq, ibmvscsi_handle_event, 0, "ibmvscsi", (void *)hostdata) != 0) { - printk(KERN_ERR "ibmvscsi: couldn't register irq 0x%x\n", + dev_err(hostdata->dev, "couldn't register irq 0x%x\n", vdev->irq); goto req_irq_failed; } rc = vio_enable_interrupts(vdev); if (rc != 0) { - printk(KERN_ERR "ibmvscsi: Error %d enabling interrupts!!!\n", - rc); + dev_err(hostdata->dev, "Error %d enabling interrupts!!!\n", rc); goto req_irq_failed; } @@ -294,7 +293,7 @@ } while ((rc == H_IN_PROGRESS) || (rc == H_BUSY) || (H_IS_LONG_BUSY(rc))); if (rc) - printk(KERN_ERR "ibmvscsi: Error %d enabling adapter\n", rc); + dev_err(hostdata->dev, "Error %d enabling adapter\n", rc); return rc; } @@ -327,10 +326,9 @@ queue->msg_token, PAGE_SIZE); if (rc == 2) { /* Adapter is good, but other end is not ready */ - printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n"); + dev_warn(hostdata->dev, "Partner adapter not ready\n"); } else if (rc != 0) { - printk(KERN_WARNING - "ibmvscsi: couldn't register crq--rc 0x%x\n", rc); + dev_warn(hostdata->dev, "couldn't register crq--rc 0x%x\n", rc); } return rc; } diff -Nurb linux-2.6.22-570/drivers/scsi/initio.c linux-2.6.22-591/drivers/scsi/initio.c --- linux-2.6.22-570/drivers/scsi/initio.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/initio.c 2007-12-21 15:36:12.000000000 -0500 @@ -3,7 +3,8 @@ * * Copyright (c) 1994-1998 Initio Corporation * Copyright (c) 1998 Bas Vermeulen - * All rights reserved. + * Copyright (c) 2004 Christoph Hellwig + * Copyright (c) 2007 Red Hat * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,38 +20,6 @@ * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * - * -------------------------------------------------------------------------- - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Where this Software is combined with software released under the terms of - * the GNU General Public License ("GPL") and the terms of the GPL would require the - * combined work to also be released under the terms of the GPL, the terms - * and conditions of this License will apply in addition to those of the - * GPL with the exception of any terms or conditions of this License that - * conflict with, or are expressly prohibited by, the GPL. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. * ************************************************************************* * @@ -70,14 +39,14 @@ * - Fix memory allocation problem * 03/04/98 hc - v1.01l * - Fix tape rewind which will hang the system problem - * - Set can_queue to tul_num_scb + * - Set can_queue to initio_num_scb * 06/25/98 hc - v1.01m * - Get it work for kernel version >= 2.1.75 - * - Dynamic assign SCSI bus reset holding time in init_tulip() + * - Dynamic assign SCSI bus reset holding time in initio_init() * 07/02/98 hc - v1.01n * - Support 0002134A * 08/07/98 hc - v1.01o - * - Change the tul_abort_srb routine to use scsi_done. <01> + * - Change the initio_abort_srb routine to use scsi_done. <01> * 09/07/98 hl - v1.02 * - Change the INI9100U define and proc_dir_entry to * reflect the newer Kernel 2.1.118, but the v1.o1o @@ -150,23 +119,13 @@ static unsigned int i91u_debug = DEBUG_DEFAULT; #endif -#define TUL_RDWORD(x,y) (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) )) - -typedef struct PCI_ID_Struc { - unsigned short vendor_id; - unsigned short device_id; -} PCI_ID; - -static int tul_num_ch = 4; /* Maximum 4 adapters */ -static int tul_num_scb; -static int tul_tag_enable = 1; -static SCB *tul_scb; +static int initio_tag_enable = 1; #ifdef DEBUG_i91u static int setup_debug = 0; #endif -static void i91uSCBPost(BYTE * pHcb, BYTE * pScb); +static void i91uSCBPost(u8 * pHcb, u8 * pScb); /* PCI Devices supported by this driver */ static struct pci_device_id i91u_pci_devices[] = { @@ -184,74 +143,66 @@ #define DEBUG_STATE 0 #define INT_DISC 0 -/*--- external functions --*/ -static void tul_se2_wait(void); +/*--- forward references ---*/ +static struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun); +static struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host); + +static int tulip_main(struct initio_host * host); + +static int initio_next_state(struct initio_host * host); +static int initio_state_1(struct initio_host * host); +static int initio_state_2(struct initio_host * host); +static int initio_state_3(struct initio_host * host); +static int initio_state_4(struct initio_host * host); +static int initio_state_5(struct initio_host * host); +static int initio_state_6(struct initio_host * host); +static int initio_state_7(struct initio_host * host); +static int initio_xfer_data_in(struct initio_host * host); +static int initio_xfer_data_out(struct initio_host * host); +static int initio_xpad_in(struct initio_host * host); +static int initio_xpad_out(struct initio_host * host); +static int initio_status_msg(struct initio_host * host); + +static int initio_msgin(struct initio_host * host); +static int initio_msgin_sync(struct initio_host * host); +static int initio_msgin_accept(struct initio_host * host); +static int initio_msgout_reject(struct initio_host * host); +static int initio_msgin_extend(struct initio_host * host); + +static int initio_msgout_ide(struct initio_host * host); +static int initio_msgout_abort_targ(struct initio_host * host); +static int initio_msgout_abort_tag(struct initio_host * host); + +static int initio_bus_device_reset(struct initio_host * host); +static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb); +static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb); +static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb); +static int int_initio_busfree(struct initio_host * host); +static int int_initio_scsi_rst(struct initio_host * host); +static int int_initio_bad_seq(struct initio_host * host); +static int int_initio_resel(struct initio_host * host); +static int initio_sync_done(struct initio_host * host); +static int wdtr_done(struct initio_host * host); +static int wait_tulip(struct initio_host * host); +static int initio_wait_done_disc(struct initio_host * host); +static int initio_wait_disc(struct initio_host * host); +static void tulip_scsi(struct initio_host * host); +static int initio_post_scsi_rst(struct initio_host * host); + +static void initio_se2_ew_en(unsigned long base); +static void initio_se2_ew_ds(unsigned long base); +static int initio_se2_rd_all(unsigned long base); +static void initio_se2_update_all(unsigned long base); /* setup default pattern */ +static void initio_read_eeprom(unsigned long base); -/*--- forward refrence ---*/ -static SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun); -static SCB *tul_find_done_scb(HCS * pCurHcb); - -static int tulip_main(HCS * pCurHcb); - -static int tul_next_state(HCS * pCurHcb); -static int tul_state_1(HCS * pCurHcb); -static int tul_state_2(HCS * pCurHcb); -static int tul_state_3(HCS * pCurHcb); -static int tul_state_4(HCS * pCurHcb); -static int tul_state_5(HCS * pCurHcb); -static int tul_state_6(HCS * pCurHcb); -static int tul_state_7(HCS * pCurHcb); -static int tul_xfer_data_in(HCS * pCurHcb); -static int tul_xfer_data_out(HCS * pCurHcb); -static int tul_xpad_in(HCS * pCurHcb); -static int tul_xpad_out(HCS * pCurHcb); -static int tul_status_msg(HCS * pCurHcb); - -static int tul_msgin(HCS * pCurHcb); -static int tul_msgin_sync(HCS * pCurHcb); -static int tul_msgin_accept(HCS * pCurHcb); -static int tul_msgout_reject(HCS * pCurHcb); -static int tul_msgin_extend(HCS * pCurHcb); - -static int tul_msgout_ide(HCS * pCurHcb); -static int tul_msgout_abort_targ(HCS * pCurHcb); -static int tul_msgout_abort_tag(HCS * pCurHcb); - -static int tul_bus_device_reset(HCS * pCurHcb); -static void tul_select_atn(HCS * pCurHcb, SCB * pCurScb); -static void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb); -static void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb); -static int int_tul_busfree(HCS * pCurHcb); -static int int_tul_scsi_rst(HCS * pCurHcb); -static int int_tul_bad_seq(HCS * pCurHcb); -static int int_tul_resel(HCS * pCurHcb); -static int tul_sync_done(HCS * pCurHcb); -static int wdtr_done(HCS * pCurHcb); -static int wait_tulip(HCS * pCurHcb); -static int tul_wait_done_disc(HCS * pCurHcb); -static int tul_wait_disc(HCS * pCurHcb); -static void tulip_scsi(HCS * pCurHcb); -static int tul_post_scsi_rst(HCS * pCurHcb); - -static void tul_se2_ew_en(WORD CurBase); -static void tul_se2_ew_ds(WORD CurBase); -static int tul_se2_rd_all(WORD CurBase); -static void tul_se2_update_all(WORD CurBase); /* setup default pattern */ -static void tul_read_eeprom(WORD CurBase); - - /* ---- INTERNAL VARIABLES ---- */ -static HCS tul_hcs[MAX_SUPPORTED_ADAPTERS]; -static INI_ADPT_STRUCT i91u_adpt[MAX_SUPPORTED_ADAPTERS]; +/* ---- INTERNAL VARIABLES ---- */ -/*NVRAM nvram, *nvramp = &nvram; */ static NVRAM i91unvram; static NVRAM *i91unvramp; - - -static UCHAR i91udftNvRam[64] = +static u8 i91udftNvRam[64] = { -/*----------- header -----------*/ + /*----------- header -----------*/ 0x25, 0xc9, /* Signature */ 0x40, /* Size */ 0x01, /* Revision */ @@ -289,7 +240,7 @@ 0, 0}; /* - CheckSum - */ -static UCHAR tul_rate_tbl[8] = /* fast 20 */ +static u8 initio_rate_tbl[8] = /* fast 20 */ { /* nanosecond devide by 4 */ 12, /* 50ns, 20M */ @@ -302,53 +253,17 @@ 62 /* 250ns, 4M */ }; -static void tul_do_pause(unsigned amount) -{ /* Pause for amount jiffies */ +static void initio_do_pause(unsigned amount) +{ + /* Pause for amount jiffies */ unsigned long the_time = jiffies + amount; - while (time_before_eq(jiffies, the_time)); + while (time_before_eq(jiffies, the_time)) + cpu_relax(); } /*-- forward reference --*/ -/******************************************************************* - Use memeory refresh time ~ 15us * 2 -********************************************************************/ -void tul_se2_wait(void) -{ -#if 1 - udelay(30); -#else - UCHAR readByte; - - readByte = TUL_RD(0, 0x61); - if ((readByte & 0x10) == 0x10) { - for (;;) { - readByte = TUL_RD(0, 0x61); - if ((readByte & 0x10) == 0x10) - break; - } - for (;;) { - readByte = TUL_RD(0, 0x61); - if ((readByte & 0x10) != 0x10) - break; - } - } else { - for (;;) { - readByte = TUL_RD(0, 0x61); - if ((readByte & 0x10) == 0x10) - break; - } - for (;;) { - readByte = TUL_RD(0, 0x61); - if ((readByte & 0x10) != 0x10) - break; - } - } -#endif -} - - /****************************************************************** Input: instruction for Serial E2PROM @@ -379,1174 +294,1019 @@ ******************************************************************/ -static void tul_se2_instr(WORD CurBase, UCHAR instr) + +/** + * initio_se2_instr - bitbang an instruction + * @base: Base of InitIO controller + * @instr: Instruction for serial E2PROM + * + * Bitbang an instruction out to the serial E2Prom + */ + +static void initio_se2_instr(unsigned long base, u8 instr) { int i; - UCHAR b; + u8 b; - TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO); /* cs+start bit */ - tul_se2_wait(); - TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK | SE2DO); /* +CLK */ - tul_se2_wait(); + outb(SE2CS | SE2DO, base + TUL_NVRAM); /* cs+start bit */ + udelay(30); + outb(SE2CS | SE2CLK | SE2DO, base + TUL_NVRAM); /* +CLK */ + udelay(30); for (i = 0; i < 8; i++) { if (instr & 0x80) b = SE2CS | SE2DO; /* -CLK+dataBit */ else b = SE2CS; /* -CLK */ - TUL_WR(CurBase + TUL_NVRAM, b); - tul_se2_wait(); - TUL_WR(CurBase + TUL_NVRAM, b | SE2CLK); /* +CLK */ - tul_se2_wait(); + outb(b, base + TUL_NVRAM); + udelay(30); + outb(b | SE2CLK, base + TUL_NVRAM); /* +CLK */ + udelay(30); instr <<= 1; } - TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ - tul_se2_wait(); - return; + outb(SE2CS, base + TUL_NVRAM); /* -CLK */ + udelay(30); } -/****************************************************************** - Function name : tul_se2_ew_en - Description : Enable erase/write state of serial EEPROM -******************************************************************/ -void tul_se2_ew_en(WORD CurBase) +/** + * initio_se2_ew_en - Enable erase/write + * @base: Base address of InitIO controller + * + * Enable erase/write state of serial EEPROM + */ +void initio_se2_ew_en(unsigned long base) { - tul_se2_instr(CurBase, 0x30); /* EWEN */ - TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ - tul_se2_wait(); - return; + initio_se2_instr(base, 0x30); /* EWEN */ + outb(0, base + TUL_NVRAM); /* -CS */ + udelay(30); } -/************************************************************************ - Disable erase/write state of serial EEPROM -*************************************************************************/ -void tul_se2_ew_ds(WORD CurBase) -{ - tul_se2_instr(CurBase, 0); /* EWDS */ - TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ - tul_se2_wait(); - return; +/** + * initio_se2_ew_ds - Disable erase/write + * @base: Base address of InitIO controller + * + * Disable erase/write state of serial EEPROM + */ +void initio_se2_ew_ds(unsigned long base) +{ + initio_se2_instr(base, 0); /* EWDS */ + outb(0, base + TUL_NVRAM); /* -CS */ + udelay(30); } -/****************************************************************** - Input :address of Serial E2PROM - Output :value stored in Serial E2PROM -*******************************************************************/ -static USHORT tul_se2_rd(WORD CurBase, ULONG adr) +/** + * initio_se2_rd - read E2PROM word + * @base: Base of InitIO controller + * @addr: Address of word in E2PROM + * + * Read a word from the NV E2PROM device + */ +static u16 initio_se2_rd(unsigned long base, u8 addr) { - UCHAR instr, readByte; - USHORT readWord; + u8 instr, rb; + u16 val = 0; int i; - instr = (UCHAR) (adr | 0x80); - tul_se2_instr(CurBase, instr); /* READ INSTR */ - readWord = 0; + instr = (u8) (addr | 0x80); + initio_se2_instr(base, instr); /* READ INSTR */ for (i = 15; i >= 0; i--) { - TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */ - tul_se2_wait(); - TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ + outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */ + udelay(30); + outb(SE2CS, base + TUL_NVRAM); /* -CLK */ /* sample data after the following edge of clock */ - readByte = TUL_RD(CurBase, TUL_NVRAM); - readByte &= SE2DI; - readWord += (readByte << i); - tul_se2_wait(); /* 6/20/95 */ + rb = inb(base + TUL_NVRAM); + rb &= SE2DI; + val += (rb << i); + udelay(30); /* 6/20/95 */ } - TUL_WR(CurBase + TUL_NVRAM, 0); /* no chip select */ - tul_se2_wait(); - return readWord; + outb(0, base + TUL_NVRAM); /* no chip select */ + udelay(30); + return val; } - -/****************************************************************** - Input: new value in Serial E2PROM, address of Serial E2PROM -*******************************************************************/ -static void tul_se2_wr(WORD CurBase, UCHAR adr, USHORT writeWord) +/** + * initio_se2_wr - read E2PROM word + * @base: Base of InitIO controller + * @addr: Address of word in E2PROM + * @val: Value to write + * + * Write a word to the NV E2PROM device. Used when recovering from + * a problem with the NV. + */ +static void initio_se2_wr(unsigned long base, u8 addr, u16 val) { - UCHAR readByte; - UCHAR instr; + u8 rb; + u8 instr; int i; - instr = (UCHAR) (adr | 0x40); - tul_se2_instr(CurBase, instr); /* WRITE INSTR */ + instr = (u8) (addr | 0x40); + initio_se2_instr(base, instr); /* WRITE INSTR */ for (i = 15; i >= 0; i--) { - if (writeWord & 0x8000) - TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO); /* -CLK+dataBit 1 */ + if (val & 0x8000) + outb(SE2CS | SE2DO, base + TUL_NVRAM); /* -CLK+dataBit 1 */ else - TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK+dataBit 0 */ - tul_se2_wait(); - TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */ - tul_se2_wait(); - writeWord <<= 1; - } - TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ - tul_se2_wait(); - TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ - tul_se2_wait(); + outb(SE2CS, base + TUL_NVRAM); /* -CLK+dataBit 0 */ + udelay(30); + outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */ + udelay(30); + val <<= 1; + } + outb(SE2CS, base + TUL_NVRAM); /* -CLK */ + udelay(30); + outb(0, base + TUL_NVRAM); /* -CS */ + udelay(30); - TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* +CS */ - tul_se2_wait(); + outb(SE2CS, base + TUL_NVRAM); /* +CS */ + udelay(30); for (;;) { - TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */ - tul_se2_wait(); - TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ - tul_se2_wait(); - if ((readByte = TUL_RD(CurBase, TUL_NVRAM)) & SE2DI) + outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */ + udelay(30); + outb(SE2CS, base + TUL_NVRAM); /* -CLK */ + udelay(30); + if ((rb = inb(base + TUL_NVRAM)) & SE2DI) break; /* write complete */ } - TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ - return; + outb(0, base + TUL_NVRAM); /* -CS */ } +/** + * initio_se2_rd_all - read hostadapter NV configuration + * @base: Base address of InitIO controller + * + * Reads the E2PROM data into main memory. Ensures that the checksum + * and header marker are valid. Returns 1 on success -1 on error. + */ -/*********************************************************************** - Read SCSI H/A configuration parameters from serial EEPROM -************************************************************************/ -int tul_se2_rd_all(WORD CurBase) +static int initio_se2_rd_all(unsigned long base) { int i; - ULONG chksum = 0; - USHORT *np; + u16 chksum = 0; + u16 *np; i91unvramp = &i91unvram; - np = (USHORT *) i91unvramp; - for (i = 0; i < 32; i++) { - *np++ = tul_se2_rd(CurBase, i); - } + np = (u16 *) i91unvramp; + for (i = 0; i < 32; i++) + *np++ = initio_se2_rd(base, i); -/*--------------------Is signature "ini" ok ? ----------------*/ + /* Is signature "ini" ok ? */ if (i91unvramp->NVM_Signature != INI_SIGNATURE) return -1; -/*---------------------- Is ckecksum ok ? ----------------------*/ - np = (USHORT *) i91unvramp; + /* Is ckecksum ok ? */ + np = (u16 *) i91unvramp; for (i = 0; i < 31; i++) chksum += *np++; - if (i91unvramp->NVM_CheckSum != (USHORT) chksum) + if (i91unvramp->NVM_CheckSum != chksum) return -1; return 1; } - -/*********************************************************************** - Update SCSI H/A configuration parameters from serial EEPROM -************************************************************************/ -void tul_se2_update_all(WORD CurBase) +/** + * initio_se2_update_all - Update E2PROM + * @base: Base of InitIO controller + * + * Update the E2PROM by wrting any changes into the E2PROM + * chip, rewriting the checksum. + */ +static void initio_se2_update_all(unsigned long base) { /* setup default pattern */ int i; - ULONG chksum = 0; - USHORT *np, *np1; + u16 chksum = 0; + u16 *np, *np1; i91unvramp = &i91unvram; /* Calculate checksum first */ - np = (USHORT *) i91udftNvRam; + np = (u16 *) i91udftNvRam; for (i = 0; i < 31; i++) chksum += *np++; - *np = (USHORT) chksum; - tul_se2_ew_en(CurBase); /* Enable write */ + *np = chksum; + initio_se2_ew_en(base); /* Enable write */ - np = (USHORT *) i91udftNvRam; - np1 = (USHORT *) i91unvramp; + np = (u16 *) i91udftNvRam; + np1 = (u16 *) i91unvramp; for (i = 0; i < 32; i++, np++, np1++) { - if (*np != *np1) { - tul_se2_wr(CurBase, i, *np); - } + if (*np != *np1) + initio_se2_wr(base, i, *np); } - - tul_se2_ew_ds(CurBase); /* Disable write */ - return; + initio_se2_ew_ds(base); /* Disable write */ } -/************************************************************************* - Function name : read_eeprom -**************************************************************************/ -void tul_read_eeprom(WORD CurBase) -{ - UCHAR gctrl; - - i91unvramp = &i91unvram; -/*------Enable EEProm programming ---*/ - gctrl = TUL_RD(CurBase, TUL_GCTRL); - TUL_WR(CurBase + TUL_GCTRL, gctrl | TUL_GCTRL_EEPROM_BIT); - if (tul_se2_rd_all(CurBase) != 1) { - tul_se2_update_all(CurBase); /* setup default pattern */ - tul_se2_rd_all(CurBase); /* load again */ - } -/*------ Disable EEProm programming ---*/ - gctrl = TUL_RD(CurBase, TUL_GCTRL); - TUL_WR(CurBase + TUL_GCTRL, gctrl & ~TUL_GCTRL_EEPROM_BIT); -} /* read_eeprom */ +/** + * initio_read_eeprom - Retrieve configuration + * @base: Base of InitIO Host Adapter + * + * Retrieve the host adapter configuration data from E2Prom. If the + * data is invalid then the defaults are used and are also restored + * into the E2PROM. This forms the access point for the SCSI driver + * into the E2PROM layer, the other functions for the E2PROM are all + * internal use. + * + * Must be called single threaded, uses a shared global area. + */ -static int Addi91u_into_Adapter_table(WORD wBIOS, WORD wBASE, BYTE bInterrupt, - BYTE bBus, BYTE bDevice) +static void initio_read_eeprom(unsigned long base) { - int i, j; + u8 gctrl; - for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) { - if (i91u_adpt[i].ADPT_BIOS < wBIOS) - continue; - if (i91u_adpt[i].ADPT_BIOS == wBIOS) { - if (i91u_adpt[i].ADPT_BASE == wBASE) { - if (i91u_adpt[i].ADPT_Bus != 0xFF) - return 1; - } else if (i91u_adpt[i].ADPT_BASE < wBASE) - continue; - } - for (j = MAX_SUPPORTED_ADAPTERS - 1; j > i; j--) { - i91u_adpt[j].ADPT_BASE = i91u_adpt[j - 1].ADPT_BASE; - i91u_adpt[j].ADPT_INTR = i91u_adpt[j - 1].ADPT_INTR; - i91u_adpt[j].ADPT_BIOS = i91u_adpt[j - 1].ADPT_BIOS; - i91u_adpt[j].ADPT_Bus = i91u_adpt[j - 1].ADPT_Bus; - i91u_adpt[j].ADPT_Device = i91u_adpt[j - 1].ADPT_Device; - } - i91u_adpt[i].ADPT_BASE = wBASE; - i91u_adpt[i].ADPT_INTR = bInterrupt; - i91u_adpt[i].ADPT_BIOS = wBIOS; - i91u_adpt[i].ADPT_Bus = bBus; - i91u_adpt[i].ADPT_Device = bDevice; - return 0; - } - return 1; + i91unvramp = &i91unvram; + /* Enable EEProm programming */ + gctrl = inb(base + TUL_GCTRL); + outb(gctrl | TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL); + if (initio_se2_rd_all(base) != 1) { + initio_se2_update_all(base); /* setup default pattern */ + initio_se2_rd_all(base); /* load again */ + } + /* Disable EEProm programming */ + gctrl = inb(base + TUL_GCTRL); + outb(gctrl & ~TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL); } -static void init_i91uAdapter_table(void) -{ - int i; - - for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) { /* Initialize adapter structure */ - i91u_adpt[i].ADPT_BIOS = 0xffff; - i91u_adpt[i].ADPT_BASE = 0xffff; - i91u_adpt[i].ADPT_INTR = 0xff; - i91u_adpt[i].ADPT_Bus = 0xff; - i91u_adpt[i].ADPT_Device = 0xff; - } - return; -} +/** + * initio_stop_bm - stop bus master + * @host: InitIO we are stopping + * + * Stop any pending DMA operation, aborting the DMA if neccessary + */ -static void tul_stop_bm(HCS * pCurHcb) +static void initio_stop_bm(struct initio_host * host) { - if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO); + if (inb(host->addr + TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ + outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd); /* wait Abort DMA xfer done */ - while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0); + while ((inb(host->addr + TUL_Int) & XABT) == 0) + cpu_relax(); } - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); } -/***************************************************************************/ -static void get_tulipPCIConfig(HCS * pCurHcb, int ch_idx) -{ - pCurHcb->HCS_Base = i91u_adpt[ch_idx].ADPT_BASE; /* Supply base address */ - pCurHcb->HCS_BIOS = i91u_adpt[ch_idx].ADPT_BIOS; /* Supply BIOS address */ - pCurHcb->HCS_Intr = i91u_adpt[ch_idx].ADPT_INTR; /* Supply interrupt line */ - return; -} +/** + * initio_reset_scsi - Reset SCSI host controller + * @host: InitIO host to reset + * @seconds: Recovery time + * + * Perform a full reset of the SCSI subsystem. + */ -/***************************************************************************/ -static int tul_reset_scsi(HCS * pCurHcb, int seconds) +static int initio_reset_scsi(struct initio_host * host, int seconds) { - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_BUS); + outb(TSC_RST_BUS, host->addr + TUL_SCtrl0); - while (!((pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt)) & TSS_SCSIRST_INT)); - /* reset tulip chip */ + while (!((host->jsint = inb(host->addr + TUL_SInt)) & TSS_SCSIRST_INT)) + cpu_relax(); - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, 0); + /* reset tulip chip */ + outb(0, host->addr + TUL_SSignal); /* Stall for a while, wait for target's firmware ready,make it 2 sec ! */ /* SONY 5200 tape drive won't work if only stall for 1 sec */ - tul_do_pause(seconds * HZ); - - TUL_RD(pCurHcb->HCS_Base, TUL_SInt); + /* FIXME: this is a very long busy wait right now */ + initio_do_pause(seconds * HZ); - return (SCSI_RESET_SUCCESS); + inb(host->addr + TUL_SInt); + return SCSI_RESET_SUCCESS; } -/***************************************************************************/ -static int init_tulip(HCS * pCurHcb, SCB * scbp, int tul_num_scb, - BYTE * pbBiosAdr, int seconds) +/** + * initio_init - set up an InitIO host adapter + * @host: InitIO host adapter + * @num_scbs: Number of SCBS + * @bios_addr: BIOS address + * + * Set up the host adapter and devices according to the configuration + * retrieved from the E2PROM. + * + * Locking: Calls E2PROM layer code which is not re-enterable so must + * run single threaded for now. + */ + +static void initio_init(struct initio_host * host, u8 *bios_addr) { int i; - BYTE *pwFlags; - BYTE *pbHeads; - SCB *pTmpScb, *pPrevScb = NULL; - - pCurHcb->HCS_NumScbs = tul_num_scb; - pCurHcb->HCS_Semaph = 1; - spin_lock_init(&pCurHcb->HCS_SemaphLock); - pCurHcb->HCS_JSStatus0 = 0; - pCurHcb->HCS_Scb = scbp; - pCurHcb->HCS_NxtPend = scbp; - pCurHcb->HCS_NxtAvail = scbp; - for (i = 0, pTmpScb = scbp; i < tul_num_scb; i++, pTmpScb++) { - pTmpScb->SCB_TagId = i; - if (i != 0) - pPrevScb->SCB_NxtScb = pTmpScb; - pPrevScb = pTmpScb; - } - pPrevScb->SCB_NxtScb = NULL; - pCurHcb->HCS_ScbEnd = pTmpScb; - pCurHcb->HCS_FirstAvail = scbp; - pCurHcb->HCS_LastAvail = pPrevScb; - spin_lock_init(&pCurHcb->HCS_AvailLock); - pCurHcb->HCS_FirstPend = NULL; - pCurHcb->HCS_LastPend = NULL; - pCurHcb->HCS_FirstBusy = NULL; - pCurHcb->HCS_LastBusy = NULL; - pCurHcb->HCS_FirstDone = NULL; - pCurHcb->HCS_LastDone = NULL; - pCurHcb->HCS_ActScb = NULL; - pCurHcb->HCS_ActTcs = NULL; + u8 *flags; + u8 *heads; - tul_read_eeprom(pCurHcb->HCS_Base); -/*---------- get H/A configuration -------------*/ + /* Get E2Prom configuration */ + initio_read_eeprom(host->addr); if (i91unvramp->NVM_SCSIInfo[0].NVM_NumOfTarg == 8) - pCurHcb->HCS_MaxTar = 8; + host->max_tar = 8; else - pCurHcb->HCS_MaxTar = 16; + host->max_tar = 16; - pCurHcb->HCS_Config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1; + host->config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1; - pCurHcb->HCS_SCSI_ID = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID; - pCurHcb->HCS_IdMask = ~(1 << pCurHcb->HCS_SCSI_ID); + host->scsi_id = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID; + host->idmask = ~(1 << host->scsi_id); #ifdef CHK_PARITY /* Enable parity error response */ - TUL_WR(pCurHcb->HCS_Base + TUL_PCMD, TUL_RD(pCurHcb->HCS_Base, TUL_PCMD) | 0x40); + outb(inb(host->addr + TUL_PCMD) | 0x40, host->addr + TUL_PCMD); #endif /* Mask all the interrupt */ - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); + outb(0x1F, host->addr + TUL_Mask); - tul_stop_bm(pCurHcb); + initio_stop_bm(host); /* --- Initialize the tulip --- */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_CHIP); + outb(TSC_RST_CHIP, host->addr + TUL_SCtrl0); /* program HBA's SCSI ID */ - TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId, pCurHcb->HCS_SCSI_ID << 4); + outb(host->scsi_id << 4, host->addr + TUL_SScsiId); /* Enable Initiator Mode ,phase latch,alternate sync period mode, disable SCSI reset */ - if (pCurHcb->HCS_Config & HCC_EN_PAR) - pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR); + if (host->config & HCC_EN_PAR) + host->sconf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR); else - pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT); - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_SConf1); + host->sconf1 = (TSC_INITDEFAULT); + outb(host->sconf1, host->addr + TUL_SConfig); /* Enable HW reselect */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); - TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, 0); + outb(0, host->addr + TUL_SPeriod); /* selection time out = 250 ms */ - TUL_WR(pCurHcb->HCS_Base + TUL_STimeOut, 153); + outb(153, host->addr + TUL_STimeOut); -/*--------- Enable SCSI terminator -----*/ - TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, (pCurHcb->HCS_Config & (HCC_ACT_TERM1 | HCC_ACT_TERM2))); - TUL_WR(pCurHcb->HCS_Base + TUL_GCTRL1, - ((pCurHcb->HCS_Config & HCC_AUTO_TERM) >> 4) | (TUL_RD(pCurHcb->HCS_Base, TUL_GCTRL1) & 0xFE)); + /* Enable SCSI terminator */ + outb((host->config & (HCC_ACT_TERM1 | HCC_ACT_TERM2)), + host->addr + TUL_XCtrl); + outb(((host->config & HCC_AUTO_TERM) >> 4) | + (inb(host->addr + TUL_GCTRL1) & 0xFE), + host->addr + TUL_GCTRL1); for (i = 0, - pwFlags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config), - pbHeads = pbBiosAdr + 0x180; - i < pCurHcb->HCS_MaxTar; - i++, pwFlags++) { - pCurHcb->HCS_Tcs[i].TCS_Flags = *pwFlags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE); - if (pCurHcb->HCS_Tcs[i].TCS_Flags & TCF_EN_255) - pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63; + flags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config), + heads = bios_addr + 0x180; + i < host->max_tar; + i++, flags++) { + host->targets[i].flags = *flags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE); + if (host->targets[i].flags & TCF_EN_255) + host->targets[i].drv_flags = TCF_DRV_255_63; else - pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0; - pCurHcb->HCS_Tcs[i].TCS_JS_Period = 0; - pCurHcb->HCS_Tcs[i].TCS_SConfig0 = pCurHcb->HCS_SConf1; - pCurHcb->HCS_Tcs[i].TCS_DrvHead = *pbHeads++; - if (pCurHcb->HCS_Tcs[i].TCS_DrvHead == 255) - pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63; + host->targets[i].drv_flags = 0; + host->targets[i].js_period = 0; + host->targets[i].sconfig0 = host->sconf1; + host->targets[i].heads = *heads++; + if (host->targets[i].heads == 255) + host->targets[i].drv_flags = TCF_DRV_255_63; else - pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0; - pCurHcb->HCS_Tcs[i].TCS_DrvSector = *pbHeads++; - pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY; - pCurHcb->HCS_ActTags[i] = 0; - pCurHcb->HCS_MaxTags[i] = 0xFF; + host->targets[i].drv_flags = 0; + host->targets[i].sectors = *heads++; + host->targets[i].flags &= ~TCF_BUSY; + host->act_tags[i] = 0; + host->max_tags[i] = 0xFF; } /* for */ printk("i91u: PCI Base=0x%04X, IRQ=%d, BIOS=0x%04X0, SCSI ID=%d\n", - pCurHcb->HCS_Base, pCurHcb->HCS_Intr, - pCurHcb->HCS_BIOS, pCurHcb->HCS_SCSI_ID); -/*------------------- reset SCSI Bus ---------------------------*/ - if (pCurHcb->HCS_Config & HCC_SCSI_RESET) { - printk("i91u: Reset SCSI Bus ... \n"); - tul_reset_scsi(pCurHcb, seconds); - } - TUL_WR(pCurHcb->HCS_Base + TUL_SCFG1, 0x17); - TUL_WR(pCurHcb->HCS_Base + TUL_SIntEnable, 0xE9); - return (0); + host->addr, host->irq, + host->bios_addr, host->scsi_id); + /* Reset SCSI Bus */ + if (host->config & HCC_SCSI_RESET) { + printk(KERN_INFO "i91u: Reset SCSI Bus ... \n"); + initio_reset_scsi(host, 10); + } + outb(0x17, host->addr + TUL_SCFG1); + outb(0xE9, host->addr + TUL_SIntEnable); } -/***************************************************************************/ -static SCB *tul_alloc_scb(HCS * hcsp) +/** + * initio_alloc_scb - Allocate an SCB + * @host: InitIO host we are allocating for + * + * Walk the SCB list for the controller and allocate a free SCB if + * one exists. + */ +static struct scsi_ctrl_blk *initio_alloc_scb(struct initio_host *host) { - SCB *pTmpScb; - ULONG flags; - spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags); - if ((pTmpScb = hcsp->HCS_FirstAvail) != NULL) { + struct scsi_ctrl_blk *scb; + unsigned long flags; + + spin_lock_irqsave(&host->avail_lock, flags); + if ((scb = host->first_avail) != NULL) { #if DEBUG_QUEUE - printk("find scb at %08lx\n", (ULONG) pTmpScb); + printk("find scb at %p\n", scb); #endif - if ((hcsp->HCS_FirstAvail = pTmpScb->SCB_NxtScb) == NULL) - hcsp->HCS_LastAvail = NULL; - pTmpScb->SCB_NxtScb = NULL; - pTmpScb->SCB_Status = SCB_RENT; + if ((host->first_avail = scb->next) == NULL) + host->last_avail = NULL; + scb->next = NULL; + scb->status = SCB_RENT; } - spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags); - return (pTmpScb); + spin_unlock_irqrestore(&host->avail_lock, flags); + return scb; } -/***************************************************************************/ -static void tul_release_scb(HCS * hcsp, SCB * scbp) +/** + * initio_release_scb - Release an SCB + * @host: InitIO host that owns the SCB + * @cmnd: SCB command block being returned + * + * Return an allocated SCB to the host free list + */ + +static void initio_release_scb(struct initio_host * host, struct scsi_ctrl_blk * cmnd) { - ULONG flags; + unsigned long flags; #if DEBUG_QUEUE - printk("Release SCB %lx; ", (ULONG) scbp); + printk("Release SCB %p; ", cmnd); #endif - spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags); - scbp->SCB_Srb = NULL; - scbp->SCB_Status = 0; - scbp->SCB_NxtScb = NULL; - if (hcsp->HCS_LastAvail != NULL) { - hcsp->HCS_LastAvail->SCB_NxtScb = scbp; - hcsp->HCS_LastAvail = scbp; + spin_lock_irqsave(&(host->avail_lock), flags); + cmnd->srb = NULL; + cmnd->status = 0; + cmnd->next = NULL; + if (host->last_avail != NULL) { + host->last_avail->next = cmnd; + host->last_avail = cmnd; } else { - hcsp->HCS_FirstAvail = scbp; - hcsp->HCS_LastAvail = scbp; + host->first_avail = cmnd; + host->last_avail = cmnd; } - spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags); + spin_unlock_irqrestore(&(host->avail_lock), flags); } /***************************************************************************/ -static void tul_append_pend_scb(HCS * pCurHcb, SCB * scbp) +static void initio_append_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) { #if DEBUG_QUEUE - printk("Append pend SCB %lx; ", (ULONG) scbp); + printk("Append pend SCB %p; ", scbp); #endif - scbp->SCB_Status = SCB_PEND; - scbp->SCB_NxtScb = NULL; - if (pCurHcb->HCS_LastPend != NULL) { - pCurHcb->HCS_LastPend->SCB_NxtScb = scbp; - pCurHcb->HCS_LastPend = scbp; + scbp->status = SCB_PEND; + scbp->next = NULL; + if (host->last_pending != NULL) { + host->last_pending->next = scbp; + host->last_pending = scbp; } else { - pCurHcb->HCS_FirstPend = scbp; - pCurHcb->HCS_LastPend = scbp; + host->first_pending = scbp; + host->last_pending = scbp; } } /***************************************************************************/ -static void tul_push_pend_scb(HCS * pCurHcb, SCB * scbp) +static void initio_push_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) { #if DEBUG_QUEUE - printk("Push pend SCB %lx; ", (ULONG) scbp); + printk("Push pend SCB %p; ", scbp); #endif - scbp->SCB_Status = SCB_PEND; - if ((scbp->SCB_NxtScb = pCurHcb->HCS_FirstPend) != NULL) { - pCurHcb->HCS_FirstPend = scbp; + scbp->status = SCB_PEND; + if ((scbp->next = host->first_pending) != NULL) { + host->first_pending = scbp; } else { - pCurHcb->HCS_FirstPend = scbp; - pCurHcb->HCS_LastPend = scbp; + host->first_pending = scbp; + host->last_pending = scbp; } } -/***************************************************************************/ -static SCB *tul_find_first_pend_scb(HCS * pCurHcb) +static struct scsi_ctrl_blk *initio_find_first_pend_scb(struct initio_host * host) { - SCB *pFirstPend; + struct scsi_ctrl_blk *first; - pFirstPend = pCurHcb->HCS_FirstPend; - while (pFirstPend != NULL) { - if (pFirstPend->SCB_Opcode != ExecSCSI) { - return (pFirstPend); - } - if (pFirstPend->SCB_TagMsg == 0) { - if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] == 0) && - !(pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) { - return (pFirstPend); - } + first = host->first_pending; + while (first != NULL) { + if (first->opcode != ExecSCSI) + return first; + if (first->tagmsg == 0) { + if ((host->act_tags[first->target] == 0) && + !(host->targets[first->target].flags & TCF_BUSY)) + return first; } else { - if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] >= - pCurHcb->HCS_MaxTags[pFirstPend->SCB_Target]) | - (pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) { - pFirstPend = pFirstPend->SCB_NxtScb; + if ((host->act_tags[first->target] >= + host->max_tags[first->target]) | + (host->targets[first->target].flags & TCF_BUSY)) { + first = first->next; continue; } - return (pFirstPend); + return first; } - pFirstPend = pFirstPend->SCB_NxtScb; + first = first->next; } - - - return (pFirstPend); + return first; } -/***************************************************************************/ -static void tul_unlink_pend_scb(HCS * pCurHcb, SCB * pCurScb) + +static void initio_unlink_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scb) { - SCB *pTmpScb, *pPrevScb; + struct scsi_ctrl_blk *tmp, *prev; #if DEBUG_QUEUE - printk("unlink pend SCB %lx; ", (ULONG) pCurScb); + printk("unlink pend SCB %p; ", scb); #endif - pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend; - while (pTmpScb != NULL) { - if (pCurScb == pTmpScb) { /* Unlink this SCB */ - if (pTmpScb == pCurHcb->HCS_FirstPend) { - if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastPend = NULL; + prev = tmp = host->first_pending; + while (tmp != NULL) { + if (scb == tmp) { /* Unlink this SCB */ + if (tmp == host->first_pending) { + if ((host->first_pending = tmp->next) == NULL) + host->last_pending = NULL; } else { - pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; - if (pTmpScb == pCurHcb->HCS_LastPend) - pCurHcb->HCS_LastPend = pPrevScb; + prev->next = tmp->next; + if (tmp == host->last_pending) + host->last_pending = prev; } - pTmpScb->SCB_NxtScb = NULL; + tmp->next = NULL; break; } - pPrevScb = pTmpScb; - pTmpScb = pTmpScb->SCB_NxtScb; + prev = tmp; + tmp = tmp->next; } - return; } -/***************************************************************************/ -static void tul_append_busy_scb(HCS * pCurHcb, SCB * scbp) + +static void initio_append_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) { #if DEBUG_QUEUE - printk("append busy SCB %lx; ", (ULONG) scbp); + printk("append busy SCB %o; ", scbp); #endif - if (scbp->SCB_TagMsg) - pCurHcb->HCS_ActTags[scbp->SCB_Target]++; + if (scbp->tagmsg) + host->act_tags[scbp->target]++; else - pCurHcb->HCS_Tcs[scbp->SCB_Target].TCS_Flags |= TCF_BUSY; - scbp->SCB_Status = SCB_BUSY; - scbp->SCB_NxtScb = NULL; - if (pCurHcb->HCS_LastBusy != NULL) { - pCurHcb->HCS_LastBusy->SCB_NxtScb = scbp; - pCurHcb->HCS_LastBusy = scbp; + host->targets[scbp->target].flags |= TCF_BUSY; + scbp->status = SCB_BUSY; + scbp->next = NULL; + if (host->last_busy != NULL) { + host->last_busy->next = scbp; + host->last_busy = scbp; } else { - pCurHcb->HCS_FirstBusy = scbp; - pCurHcb->HCS_LastBusy = scbp; + host->first_busy = scbp; + host->last_busy = scbp; } } /***************************************************************************/ -static SCB *tul_pop_busy_scb(HCS * pCurHcb) +static struct scsi_ctrl_blk *initio_pop_busy_scb(struct initio_host * host) { - SCB *pTmpScb; + struct scsi_ctrl_blk *tmp; - if ((pTmpScb = pCurHcb->HCS_FirstBusy) != NULL) { - if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastBusy = NULL; - pTmpScb->SCB_NxtScb = NULL; - if (pTmpScb->SCB_TagMsg) - pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--; + if ((tmp = host->first_busy) != NULL) { + if ((host->first_busy = tmp->next) == NULL) + host->last_busy = NULL; + tmp->next = NULL; + if (tmp->tagmsg) + host->act_tags[tmp->target]--; else - pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY; + host->targets[tmp->target].flags &= ~TCF_BUSY; } #if DEBUG_QUEUE - printk("Pop busy SCB %lx; ", (ULONG) pTmpScb); + printk("Pop busy SCB %p; ", tmp); #endif - return (pTmpScb); + return tmp; } /***************************************************************************/ -static void tul_unlink_busy_scb(HCS * pCurHcb, SCB * pCurScb) +static void initio_unlink_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scb) { - SCB *pTmpScb, *pPrevScb; + struct scsi_ctrl_blk *tmp, *prev; #if DEBUG_QUEUE - printk("unlink busy SCB %lx; ", (ULONG) pCurScb); + printk("unlink busy SCB %p; ", scb); #endif - pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; - while (pTmpScb != NULL) { - if (pCurScb == pTmpScb) { /* Unlink this SCB */ - if (pTmpScb == pCurHcb->HCS_FirstBusy) { - if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastBusy = NULL; - } else { - pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; - if (pTmpScb == pCurHcb->HCS_LastBusy) - pCurHcb->HCS_LastBusy = pPrevScb; - } - pTmpScb->SCB_NxtScb = NULL; - if (pTmpScb->SCB_TagMsg) - pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--; + prev = tmp = host->first_busy; + while (tmp != NULL) { + if (scb == tmp) { /* Unlink this SCB */ + if (tmp == host->first_busy) { + if ((host->first_busy = tmp->next) == NULL) + host->last_busy = NULL; + } else { + prev->next = tmp->next; + if (tmp == host->last_busy) + host->last_busy = prev; + } + tmp->next = NULL; + if (tmp->tagmsg) + host->act_tags[tmp->target]--; else - pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY; + host->targets[tmp->target].flags &= ~TCF_BUSY; break; } - pPrevScb = pTmpScb; - pTmpScb = pTmpScb->SCB_NxtScb; + prev = tmp; + tmp = tmp->next; } return; } -/***************************************************************************/ -SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun) +struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun) { - SCB *pTmpScb, *pPrevScb; - WORD scbp_tarlun; + struct scsi_ctrl_blk *tmp, *prev; + u16 scbp_tarlun; - pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; - while (pTmpScb != NULL) { - scbp_tarlun = (pTmpScb->SCB_Lun << 8) | (pTmpScb->SCB_Target); + prev = tmp = host->first_busy; + while (tmp != NULL) { + scbp_tarlun = (tmp->lun << 8) | (tmp->target); if (scbp_tarlun == tarlun) { /* Unlink this SCB */ break; } - pPrevScb = pTmpScb; - pTmpScb = pTmpScb->SCB_NxtScb; + prev = tmp; + tmp = tmp->next; } #if DEBUG_QUEUE - printk("find busy SCB %lx; ", (ULONG) pTmpScb); + printk("find busy SCB %p; ", tmp); #endif - return (pTmpScb); + return tmp; } -/***************************************************************************/ -static void tul_append_done_scb(HCS * pCurHcb, SCB * scbp) +static void initio_append_done_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) { - #if DEBUG_QUEUE - printk("append done SCB %lx; ", (ULONG) scbp); + printk("append done SCB %p; ", scbp); #endif - scbp->SCB_Status = SCB_DONE; - scbp->SCB_NxtScb = NULL; - if (pCurHcb->HCS_LastDone != NULL) { - pCurHcb->HCS_LastDone->SCB_NxtScb = scbp; - pCurHcb->HCS_LastDone = scbp; + scbp->status = SCB_DONE; + scbp->next = NULL; + if (host->last_done != NULL) { + host->last_done->next = scbp; + host->last_done = scbp; } else { - pCurHcb->HCS_FirstDone = scbp; - pCurHcb->HCS_LastDone = scbp; + host->first_done = scbp; + host->last_done = scbp; } } -/***************************************************************************/ -SCB *tul_find_done_scb(HCS * pCurHcb) +struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host) { - SCB *pTmpScb; - + struct scsi_ctrl_blk *tmp; - if ((pTmpScb = pCurHcb->HCS_FirstDone) != NULL) { - if ((pCurHcb->HCS_FirstDone = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastDone = NULL; - pTmpScb->SCB_NxtScb = NULL; + if ((tmp = host->first_done) != NULL) { + if ((host->first_done = tmp->next) == NULL) + host->last_done = NULL; + tmp->next = NULL; } #if DEBUG_QUEUE - printk("find done SCB %lx; ", (ULONG) pTmpScb); + printk("find done SCB %p; ",tmp); #endif - return (pTmpScb); + return tmp; } -/***************************************************************************/ -static int tul_abort_srb(HCS * pCurHcb, struct scsi_cmnd *srbp) +static int initio_abort_srb(struct initio_host * host, struct scsi_cmnd *srbp) { - ULONG flags; - SCB *pTmpScb, *pPrevScb; + unsigned long flags; + struct scsi_ctrl_blk *tmp, *prev; - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); + spin_lock_irqsave(&host->semaph_lock, flags); - if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) { - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); + if ((host->semaph == 0) && (host->active == NULL)) { /* disable Jasmin SCSI Int */ - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - tulip_main(pCurHcb); - - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); - - pCurHcb->HCS_Semaph = 1; - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - + outb(0x1F, host->addr + TUL_Mask); + spin_unlock_irqrestore(&host->semaph_lock, flags); + /* FIXME: synchronize_irq needed ? */ + tulip_main(host); + spin_lock_irqsave(&host->semaph_lock, flags); + host->semaph = 1; + outb(0x0F, host->addr + TUL_Mask); + spin_unlock_irqrestore(&host->semaph_lock, flags); return SCSI_ABORT_SNOOZE; } - pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend; /* Check Pend queue */ - while (pTmpScb != NULL) { + prev = tmp = host->first_pending; /* Check Pend queue */ + while (tmp != NULL) { /* 07/27/98 */ - if (pTmpScb->SCB_Srb == srbp) { - if (pTmpScb == pCurHcb->HCS_ActScb) { - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); + if (tmp->srb == srbp) { + if (tmp == host->active) { + spin_unlock_irqrestore(&host->semaph_lock, flags); return SCSI_ABORT_BUSY; - } else if (pTmpScb == pCurHcb->HCS_FirstPend) { - if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastPend = NULL; - } else { - pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; - if (pTmpScb == pCurHcb->HCS_LastPend) - pCurHcb->HCS_LastPend = pPrevScb; - } - pTmpScb->SCB_HaStat = HOST_ABORTED; - pTmpScb->SCB_Flags |= SCF_DONE; - if (pTmpScb->SCB_Flags & SCF_POST) - (*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb); - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); + } else if (tmp == host->first_pending) { + if ((host->first_pending = tmp->next) == NULL) + host->last_pending = NULL; + } else { + prev->next = tmp->next; + if (tmp == host->last_pending) + host->last_pending = prev; + } + tmp->hastat = HOST_ABORTED; + tmp->flags |= SCF_DONE; + if (tmp->flags & SCF_POST) + (*tmp->post) ((u8 *) host, (u8 *) tmp); + spin_unlock_irqrestore(&host->semaph_lock, flags); return SCSI_ABORT_SUCCESS; } - pPrevScb = pTmpScb; - pTmpScb = pTmpScb->SCB_NxtScb; + prev = tmp; + tmp = tmp->next; } - pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */ - while (pTmpScb != NULL) { - - if (pTmpScb->SCB_Srb == srbp) { - - if (pTmpScb == pCurHcb->HCS_ActScb) { - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); + prev = tmp = host->first_busy; /* Check Busy queue */ + while (tmp != NULL) { + if (tmp->srb == srbp) { + if (tmp == host->active) { + spin_unlock_irqrestore(&host->semaph_lock, flags); return SCSI_ABORT_BUSY; - } else if (pTmpScb->SCB_TagMsg == 0) { - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); + } else if (tmp->tagmsg == 0) { + spin_unlock_irqrestore(&host->semaph_lock, flags); return SCSI_ABORT_BUSY; } else { - pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--; - if (pTmpScb == pCurHcb->HCS_FirstBusy) { - if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastBusy = NULL; - } else { - pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; - if (pTmpScb == pCurHcb->HCS_LastBusy) - pCurHcb->HCS_LastBusy = pPrevScb; - } - pTmpScb->SCB_NxtScb = NULL; - - - pTmpScb->SCB_HaStat = HOST_ABORTED; - pTmpScb->SCB_Flags |= SCF_DONE; - if (pTmpScb->SCB_Flags & SCF_POST) - (*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb); - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); + host->act_tags[tmp->target]--; + if (tmp == host->first_busy) { + if ((host->first_busy = tmp->next) == NULL) + host->last_busy = NULL; + } else { + prev->next = tmp->next; + if (tmp == host->last_busy) + host->last_busy = prev; + } + tmp->next = NULL; + + + tmp->hastat = HOST_ABORTED; + tmp->flags |= SCF_DONE; + if (tmp->flags & SCF_POST) + (*tmp->post) ((u8 *) host, (u8 *) tmp); + spin_unlock_irqrestore(&host->semaph_lock, flags); return SCSI_ABORT_SUCCESS; } } - pPrevScb = pTmpScb; - pTmpScb = pTmpScb->SCB_NxtScb; + prev = tmp; + tmp = tmp->next; } - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - return (SCSI_ABORT_NOT_RUNNING); + spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_NOT_RUNNING; } /***************************************************************************/ -static int tul_bad_seq(HCS * pCurHcb) -{ - SCB *pCurScb; - - printk("tul_bad_seg c=%d\n", pCurHcb->HCS_Index); - - if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) { - tul_unlink_busy_scb(pCurHcb, pCurScb); - pCurScb->SCB_HaStat = HOST_BAD_PHAS; - pCurScb->SCB_TaStat = 0; - tul_append_done_scb(pCurHcb, pCurScb); - } - tul_stop_bm(pCurHcb); - - tul_reset_scsi(pCurHcb, 8); /* 7/29/98 */ - - return (tul_post_scsi_rst(pCurHcb)); -} - -#if 0 - -/************************************************************************/ -static int tul_device_reset(HCS * pCurHcb, struct scsi_cmnd *pSrb, - unsigned int target, unsigned int ResetFlags) +static int initio_bad_seq(struct initio_host * host) { - ULONG flags; - SCB *pScb; - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); - - if (ResetFlags & SCSI_RESET_ASYNCHRONOUS) { - - if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) { - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); - /* disable Jasmin SCSI Int */ - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - tulip_main(pCurHcb); - - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); - - pCurHcb->HCS_Semaph = 1; - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - return SCSI_RESET_SNOOZE; - } - pScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */ - while (pScb != NULL) { - if (pScb->SCB_Srb == pSrb) - break; - pScb = pScb->SCB_NxtScb; - } - if (pScb == NULL) { - printk("Unable to Reset - No SCB Found\n"); - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - return SCSI_RESET_NOT_RUNNING; - } - } - if ((pScb = tul_alloc_scb(pCurHcb)) == NULL) { - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - return SCSI_RESET_NOT_RUNNING; - } - pScb->SCB_Opcode = BusDevRst; - pScb->SCB_Flags = SCF_POST; - pScb->SCB_Target = target; - pScb->SCB_Mode = 0; - - pScb->SCB_Srb = NULL; - if (ResetFlags & SCSI_RESET_SYNCHRONOUS) { - pScb->SCB_Srb = pSrb; - } - tul_push_pend_scb(pCurHcb, pScb); /* push this SCB to Pending queue */ + struct scsi_ctrl_blk *scb; - if (pCurHcb->HCS_Semaph == 1) { - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); - /* disable Jasmin SCSI Int */ - pCurHcb->HCS_Semaph = 0; - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - tulip_main(pCurHcb); - - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); + printk("initio_bad_seg c=%d\n", host->index); - pCurHcb->HCS_Semaph = 1; - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); + if ((scb = host->active) != NULL) { + initio_unlink_busy_scb(host, scb); + scb->hastat = HOST_BAD_PHAS; + scb->tastat = 0; + initio_append_done_scb(host, scb); } - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - return SCSI_RESET_PENDING; -} - -static int tul_reset_scsi_bus(HCS * pCurHcb) -{ - ULONG flags; - - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); - pCurHcb->HCS_Semaph = 0; - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - tul_stop_bm(pCurHcb); - - tul_reset_scsi(pCurHcb, 2); /* 7/29/98 */ - - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); - tul_post_scsi_rst(pCurHcb); - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - tulip_main(pCurHcb); - - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); - - pCurHcb->HCS_Semaph = 1; - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - return (SCSI_RESET_SUCCESS | SCSI_RESET_HOST_RESET); + initio_stop_bm(host); + initio_reset_scsi(host, 8); /* 7/29/98 */ + return initio_post_scsi_rst(host); } -#endif /* 0 */ /************************************************************************/ -static void tul_exec_scb(HCS * pCurHcb, SCB * pCurScb) +static void initio_exec_scb(struct initio_host * host, struct scsi_ctrl_blk * scb) { - ULONG flags; + unsigned long flags; - pCurScb->SCB_Mode = 0; + scb->mode = 0; - pCurScb->SCB_SGIdx = 0; - pCurScb->SCB_SGMax = pCurScb->SCB_SGLen; + scb->sgidx = 0; + scb->sgmax = scb->sglen; - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); + spin_lock_irqsave(&host->semaph_lock, flags); - tul_append_pend_scb(pCurHcb, pCurScb); /* Append this SCB to Pending queue */ + initio_append_pend_scb(host, scb); /* Append this SCB to Pending queue */ /* VVVVV 07/21/98 */ - if (pCurHcb->HCS_Semaph == 1) { - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); - /* disable Jasmin SCSI Int */ - pCurHcb->HCS_Semaph = 0; - - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); - - tulip_main(pCurHcb); + if (host->semaph == 1) { + /* Disable Jasmin SCSI Int */ + outb(0x1F, host->addr + TUL_Mask); + host->semaph = 0; + spin_unlock_irqrestore(&host->semaph_lock, flags); - spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); + tulip_main(host); - pCurHcb->HCS_Semaph = 1; - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); + spin_lock_irqsave(&host->semaph_lock, flags); + host->semaph = 1; + outb(0x0F, host->addr + TUL_Mask); } - spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); + spin_unlock_irqrestore(&host->semaph_lock, flags); return; } /***************************************************************************/ -static int tul_isr(HCS * pCurHcb) +static int initio_isr(struct initio_host * host) { - /* Enter critical section */ - - if (TUL_RD(pCurHcb->HCS_Base, TUL_Int) & TSS_INT_PENDING) { - if (pCurHcb->HCS_Semaph == 1) { - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); + if (inb(host->addr + TUL_Int) & TSS_INT_PENDING) { + if (host->semaph == 1) { + outb(0x1F, host->addr + TUL_Mask); /* Disable Tulip SCSI Int */ - pCurHcb->HCS_Semaph = 0; + host->semaph = 0; - tulip_main(pCurHcb); + tulip_main(host); - pCurHcb->HCS_Semaph = 1; - TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); - return (1); + host->semaph = 1; + outb(0x0F, host->addr + TUL_Mask); + return 1; } } - return (0); + return 0; } -/***************************************************************************/ -int tulip_main(HCS * pCurHcb) +static int tulip_main(struct initio_host * host) { - SCB *pCurScb; + struct scsi_ctrl_blk *scb; for (;;) { + tulip_scsi(host); /* Call tulip_scsi */ - tulip_scsi(pCurHcb); /* Call tulip_scsi */ - - while ((pCurScb = tul_find_done_scb(pCurHcb)) != NULL) { /* find done entry */ - if (pCurScb->SCB_TaStat == INI_QUEUE_FULL) { - pCurHcb->HCS_MaxTags[pCurScb->SCB_Target] = - pCurHcb->HCS_ActTags[pCurScb->SCB_Target] - 1; - pCurScb->SCB_TaStat = 0; - tul_append_pend_scb(pCurHcb, pCurScb); + /* Walk the list of completed SCBs */ + while ((scb = initio_find_done_scb(host)) != NULL) { /* find done entry */ + if (scb->tastat == INI_QUEUE_FULL) { + host->max_tags[scb->target] = + host->act_tags[scb->target] - 1; + scb->tastat = 0; + initio_append_pend_scb(host, scb); continue; } - if (!(pCurScb->SCB_Mode & SCM_RSENS)) { /* not in auto req. sense mode */ - if (pCurScb->SCB_TaStat == 2) { + if (!(scb->mode & SCM_RSENS)) { /* not in auto req. sense mode */ + if (scb->tastat == 2) { /* clr sync. nego flag */ - if (pCurScb->SCB_Flags & SCF_SENSE) { - BYTE len; - len = pCurScb->SCB_SenseLen; + if (scb->flags & SCF_SENSE) { + u8 len; + len = scb->senselen; if (len == 0) len = 1; - pCurScb->SCB_BufLen = pCurScb->SCB_SenseLen; - pCurScb->SCB_BufPtr = pCurScb->SCB_SensePtr; - pCurScb->SCB_Flags &= ~(SCF_SG | SCF_DIR); /* for xfer_data_in */ -/* pCurScb->SCB_Flags |= SCF_NO_DCHK; */ - /* so, we won't report worng direction in xfer_data_in, + scb->buflen = scb->senselen; + scb->bufptr = scb->senseptr; + scb->flags &= ~(SCF_SG | SCF_DIR); /* for xfer_data_in */ + /* so, we won't report wrong direction in xfer_data_in, and won't report HOST_DO_DU in state_6 */ - pCurScb->SCB_Mode = SCM_RSENS; - pCurScb->SCB_Ident &= 0xBF; /* Disable Disconnect */ - pCurScb->SCB_TagMsg = 0; - pCurScb->SCB_TaStat = 0; - pCurScb->SCB_CDBLen = 6; - pCurScb->SCB_CDB[0] = SCSICMD_RequestSense; - pCurScb->SCB_CDB[1] = 0; - pCurScb->SCB_CDB[2] = 0; - pCurScb->SCB_CDB[3] = 0; - pCurScb->SCB_CDB[4] = len; - pCurScb->SCB_CDB[5] = 0; - tul_push_pend_scb(pCurHcb, pCurScb); + scb->mode = SCM_RSENS; + scb->ident &= 0xBF; /* Disable Disconnect */ + scb->tagmsg = 0; + scb->tastat = 0; + scb->cdblen = 6; + scb->cdb[0] = SCSICMD_RequestSense; + scb->cdb[1] = 0; + scb->cdb[2] = 0; + scb->cdb[3] = 0; + scb->cdb[4] = len; + scb->cdb[5] = 0; + initio_push_pend_scb(host, scb); break; } } } else { /* in request sense mode */ - if (pCurScb->SCB_TaStat == 2) { /* check contition status again after sending + if (scb->tastat == 2) { /* check contition status again after sending requset sense cmd 0x3 */ - pCurScb->SCB_HaStat = HOST_BAD_PHAS; + scb->hastat = HOST_BAD_PHAS; } - pCurScb->SCB_TaStat = 2; + scb->tastat = 2; } - pCurScb->SCB_Flags |= SCF_DONE; - if (pCurScb->SCB_Flags & SCF_POST) { - (*pCurScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pCurScb); + scb->flags |= SCF_DONE; + if (scb->flags & SCF_POST) { + /* FIXME: only one post method and lose casts */ + (*scb->post) ((u8 *) host, (u8 *) scb); } } /* while */ - /* find_active: */ - if (TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0) & TSS_INT_PENDING) + if (inb(host->addr + TUL_SStatus0) & TSS_INT_PENDING) continue; - - if (pCurHcb->HCS_ActScb) { /* return to OS and wait for xfer_done_ISR/Selected_ISR */ + if (host->active) /* return to OS and wait for xfer_done_ISR/Selected_ISR */ return 1; /* return to OS, enable interrupt */ - } /* Check pending SCB */ - if (tul_find_first_pend_scb(pCurHcb) == NULL) { + if (initio_find_first_pend_scb(host) == NULL) return 1; /* return to OS, enable interrupt */ - } } /* End of for loop */ /* statement won't reach here */ } - - - -/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ -/***************************************************************************/ -/***************************************************************************/ -/***************************************************************************/ -/***************************************************************************/ - -/***************************************************************************/ -void tulip_scsi(HCS * pCurHcb) +static void tulip_scsi(struct initio_host * host) { - SCB *pCurScb; - TCS *pCurTcb; + struct scsi_ctrl_blk *scb; + struct target_control *active_tc; /* make sure to service interrupt asap */ - - if ((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) & TSS_INT_PENDING) { - - pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK; - pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1); - pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); - if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* SCSI bus reset detected */ - int_tul_scsi_rst(pCurHcb); + if ((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING) { + host->phase = host->jsstatus0 & TSS_PH_MASK; + host->jsstatus1 = inb(host->addr + TUL_SStatus1); + host->jsint = inb(host->addr + TUL_SInt); + if (host->jsint & TSS_SCSIRST_INT) { /* SCSI bus reset detected */ + int_initio_scsi_rst(host); return; } - if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) { /* if selected/reselected interrupt */ - if (int_tul_resel(pCurHcb) == 0) - tul_next_state(pCurHcb); + if (host->jsint & TSS_RESEL_INT) { /* if selected/reselected interrupt */ + if (int_initio_resel(host) == 0) + initio_next_state(host); return; } - if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) { - int_tul_busfree(pCurHcb); + if (host->jsint & TSS_SEL_TIMEOUT) { + int_initio_busfree(host); return; } - if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ - int_tul_busfree(pCurHcb); /* unexpected bus free or sel timeout */ + if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ + int_initio_busfree(host); /* unexpected bus free or sel timeout */ return; } - if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) { /* func complete or Bus service */ - if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) - tul_next_state(pCurHcb); + if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV)) { /* func complete or Bus service */ + if ((scb = host->active) != NULL) + initio_next_state(host); return; } } - if (pCurHcb->HCS_ActScb != NULL) + if (host->active != NULL) return; - if ((pCurScb = tul_find_first_pend_scb(pCurHcb)) == NULL) + if ((scb = initio_find_first_pend_scb(host)) == NULL) return; /* program HBA's SCSI ID & target SCSI ID */ - TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId, - (pCurHcb->HCS_SCSI_ID << 4) | (pCurScb->SCB_Target & 0x0F)); - if (pCurScb->SCB_Opcode == ExecSCSI) { - pCurTcb = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; + outb((host->scsi_id << 4) | (scb->target & 0x0F), + host->addr + TUL_SScsiId); + if (scb->opcode == ExecSCSI) { + active_tc = &host->targets[scb->target]; - if (pCurScb->SCB_TagMsg) - pCurTcb->TCS_DrvFlags |= TCF_DRV_EN_TAG; + if (scb->tagmsg) + active_tc->drv_flags |= TCF_DRV_EN_TAG; else - pCurTcb->TCS_DrvFlags &= ~TCF_DRV_EN_TAG; + active_tc->drv_flags &= ~TCF_DRV_EN_TAG; - TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period); - if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { /* do wdtr negotiation */ - tul_select_atn_stop(pCurHcb, pCurScb); + outb(active_tc->js_period, host->addr + TUL_SPeriod); + if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { /* do wdtr negotiation */ + initio_select_atn_stop(host, scb); } else { - if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync negotiation */ - tul_select_atn_stop(pCurHcb, pCurScb); + if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync negotiation */ + initio_select_atn_stop(host, scb); } else { - if (pCurScb->SCB_TagMsg) - tul_select_atn3(pCurHcb, pCurScb); + if (scb->tagmsg) + initio_select_atn3(host, scb); else - tul_select_atn(pCurHcb, pCurScb); + initio_select_atn(host, scb); } } - if (pCurScb->SCB_Flags & SCF_POLL) { - while (wait_tulip(pCurHcb) != -1) { - if (tul_next_state(pCurHcb) == -1) + if (scb->flags & SCF_POLL) { + while (wait_tulip(host) != -1) { + if (initio_next_state(host) == -1) break; } } - } else if (pCurScb->SCB_Opcode == BusDevRst) { - tul_select_atn_stop(pCurHcb, pCurScb); - pCurScb->SCB_NxtStat = 8; - if (pCurScb->SCB_Flags & SCF_POLL) { - while (wait_tulip(pCurHcb) != -1) { - if (tul_next_state(pCurHcb) == -1) + } else if (scb->opcode == BusDevRst) { + initio_select_atn_stop(host, scb); + scb->next_state = 8; + if (scb->flags & SCF_POLL) { + while (wait_tulip(host) != -1) { + if (initio_next_state(host) == -1) break; } } - } else if (pCurScb->SCB_Opcode == AbortCmd) { - if (tul_abort_srb(pCurHcb, pCurScb->SCB_Srb) != 0) { - - - tul_unlink_pend_scb(pCurHcb, pCurScb); - - tul_release_scb(pCurHcb, pCurScb); + } else if (scb->opcode == AbortCmd) { + if (initio_abort_srb(host, scb->srb) != 0) { + initio_unlink_pend_scb(host, scb); + initio_release_scb(host, scb); } else { - pCurScb->SCB_Opcode = BusDevRst; - tul_select_atn_stop(pCurHcb, pCurScb); - pCurScb->SCB_NxtStat = 8; + scb->opcode = BusDevRst; + initio_select_atn_stop(host, scb); + scb->next_state = 8; } - -/* 08/03/98 */ } else { - tul_unlink_pend_scb(pCurHcb, pCurScb); - pCurScb->SCB_HaStat = 0x16; /* bad command */ - tul_append_done_scb(pCurHcb, pCurScb); + initio_unlink_pend_scb(host, scb); + scb->hastat = 0x16; /* bad command */ + initio_append_done_scb(host, scb); } return; } +/** + * initio_next_state - Next SCSI state + * @host: InitIO host we are processing + * + * Progress the active command block along the state machine + * until we hit a state which we must wait for activity to occur. + * + * Returns zero or a negative code. + */ -/***************************************************************************/ -int tul_next_state(HCS * pCurHcb) +static int initio_next_state(struct initio_host * host) { int next; - next = pCurHcb->HCS_ActScb->SCB_NxtStat; + next = host->active->next_state; for (;;) { switch (next) { case 1: - next = tul_state_1(pCurHcb); + next = initio_state_1(host); break; case 2: - next = tul_state_2(pCurHcb); + next = initio_state_2(host); break; case 3: - next = tul_state_3(pCurHcb); + next = initio_state_3(host); break; case 4: - next = tul_state_4(pCurHcb); + next = initio_state_4(host); break; case 5: - next = tul_state_5(pCurHcb); + next = initio_state_5(host); break; case 6: - next = tul_state_6(pCurHcb); + next = initio_state_6(host); break; case 7: - next = tul_state_7(pCurHcb); + next = initio_state_7(host); break; case 8: - return (tul_bus_device_reset(pCurHcb)); + return initio_bus_device_reset(host); default: - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } if (next <= 0) return next; @@ -1554,338 +1314,363 @@ } -/***************************************************************************/ -/* sTate after selection with attention & stop */ -int tul_state_1(HCS * pCurHcb) +/** + * initio_state_1 - SCSI state machine + * @host: InitIO host we are controlling + * + * Perform SCSI state processing for Select/Attention/Stop + */ + +static int initio_state_1(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; - TCS *pCurTcb = pCurHcb->HCS_ActTcs; + struct scsi_ctrl_blk *scb = host->active; + struct target_control *active_tc = host->active_tc; #if DEBUG_STATE printk("-s1-"); #endif - tul_unlink_pend_scb(pCurHcb, pCurScb); - tul_append_busy_scb(pCurHcb, pCurScb); + /* Move the SCB from pending to busy */ + initio_unlink_pend_scb(host, scb); + initio_append_busy_scb(host, scb); - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0); + outb(active_tc->sconfig0, host->addr + TUL_SConfig ); /* ATN on */ - if (pCurHcb->HCS_Phase == MSG_OUT) { - - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, (TSC_EN_BUS_IN | TSC_HW_RESELECT)); - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident); - - if (pCurScb->SCB_TagMsg) { - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId); - } - if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { - - pCurTcb->TCS_Flags |= TCF_WDTR_DONE; - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2); /* Extended msg length */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* Sync request */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* Start from 16 bits */ - } else if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { - - pCurTcb->TCS_Flags |= TCF_SYNC_DONE; - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* extended msg length */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* sync request */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET); /* REQ/ACK offset */ + if (host->phase == MSG_OUT) { + outb(TSC_EN_BUS_IN | TSC_HW_RESELECT, host->addr + TUL_SCtrl1); + outb(scb->ident, host->addr + TUL_SFifo); + + if (scb->tagmsg) { + outb(scb->tagmsg, host->addr + TUL_SFifo); + outb(scb->tagid, host->addr + TUL_SFifo); + } + if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { + active_tc->flags |= TCF_WDTR_DONE; + outb(MSG_EXTEND, host->addr + TUL_SFifo); + outb(2, host->addr + TUL_SFifo); /* Extended msg length */ + outb(3, host->addr + TUL_SFifo); /* Sync request */ + outb(1, host->addr + TUL_SFifo); /* Start from 16 bits */ + } else if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { + active_tc->flags |= TCF_SYNC_DONE; + outb(MSG_EXTEND, host->addr + TUL_SFifo); + outb(3, host->addr + TUL_SFifo); /* extended msg length */ + outb(1, host->addr + TUL_SFifo); /* sync request */ + outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo); + outb(MAX_OFFSET, host->addr + TUL_SFifo); /* REQ/ACK offset */ } - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) - return (-1); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; } - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7))); - return (3); + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal); + /* Into before CDB xfer */ + return 3; } -/***************************************************************************/ -/* state after selection with attention */ -/* state after selection with attention3 */ -int tul_state_2(HCS * pCurHcb) +/** + * initio_state_2 - SCSI state machine + * @host: InitIO host we are controlling + * + * state after selection with attention + * state after selection with attention3 + */ + +static int initio_state_2(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; - TCS *pCurTcb = pCurHcb->HCS_ActTcs; + struct scsi_ctrl_blk *scb = host->active; + struct target_control *active_tc = host->active_tc; #if DEBUG_STATE printk("-s2-"); #endif - tul_unlink_pend_scb(pCurHcb, pCurScb); - tul_append_busy_scb(pCurHcb, pCurScb); + initio_unlink_pend_scb(host, scb); + initio_append_busy_scb(host, scb); - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0); + outb(active_tc->sconfig0, host->addr + TUL_SConfig); - if (pCurHcb->HCS_JSStatus1 & TSS_CMD_PH_CMP) { - return (4); - } - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7))); - return (3); + if (host->jsstatus1 & TSS_CMD_PH_CMP) + return 4; + + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal); + /* Into before CDB xfer */ + return 3; } -/***************************************************************************/ -/* state before CDB xfer is done */ -int tul_state_3(HCS * pCurHcb) +/** + * initio_state_3 - SCSI state machine + * @host: InitIO host we are controlling + * + * state before CDB xfer is done + */ + +static int initio_state_3(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; - TCS *pCurTcb = pCurHcb->HCS_ActTcs; + struct scsi_ctrl_blk *scb = host->active; + struct target_control *active_tc = host->active_tc; int i; #if DEBUG_STATE printk("-s3-"); #endif for (;;) { - switch (pCurHcb->HCS_Phase) { + switch (host->phase) { case CMD_OUT: /* Command out phase */ - for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++) - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) - return (-1); - if (pCurHcb->HCS_Phase == CMD_OUT) { - return (tul_bad_seq(pCurHcb)); - } - return (4); + for (i = 0; i < (int) scb->cdblen; i++) + outb(scb->cdb[i], host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + if (host->phase == CMD_OUT) + return initio_bad_seq(host); + return 4; case MSG_IN: /* Message in phase */ - pCurScb->SCB_NxtStat = 3; - if (tul_msgin(pCurHcb) == -1) - return (-1); + scb->next_state = 3; + if (initio_msgin(host) == -1) + return -1; break; case STATUS_IN: /* Status phase */ - if (tul_status_msg(pCurHcb) == -1) - return (-1); + if (initio_status_msg(host) == -1) + return -1; break; case MSG_OUT: /* Message out phase */ - if (pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) { + if (active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) { + outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */ + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + } else { + active_tc->flags |= TCF_SYNC_DONE; - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) - return (-1); - - } else { - pCurTcb->TCS_Flags |= TCF_SYNC_DONE; - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* ext. msg len */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* sync request */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET); /* REQ/ACK offset */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) - return (-1); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)); + outb(MSG_EXTEND, host->addr + TUL_SFifo); + outb(3, host->addr + TUL_SFifo); /* ext. msg len */ + outb(1, host->addr + TUL_SFifo); /* sync request */ + outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo); + outb(MAX_OFFSET, host->addr + TUL_SFifo); /* REQ/ACK offset */ + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + outb(inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7), host->addr + TUL_SSignal); } break; - default: - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } } } - -/***************************************************************************/ -int tul_state_4(HCS * pCurHcb) +/** + * initio_state_4 - SCSI state machine + * @host: InitIO host we are controlling + * + * SCSI state machine. State 4 + */ + +static int initio_state_4(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; + struct scsi_ctrl_blk *scb = host->active; #if DEBUG_STATE printk("-s4-"); #endif - if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_NO_XF) { - return (6); /* Go to state 6 */ + if ((scb->flags & SCF_DIR) == SCF_NO_XF) { + return 6; /* Go to state 6 (After data) */ } for (;;) { - if (pCurScb->SCB_BufLen == 0) - return (6); /* Go to state 6 */ + if (scb->buflen == 0) + return 6; - switch (pCurHcb->HCS_Phase) { + switch (host->phase) { case STATUS_IN: /* Status phase */ - if ((pCurScb->SCB_Flags & SCF_DIR) != 0) { /* if direction bit set then report data underrun */ - pCurScb->SCB_HaStat = HOST_DO_DU; - } - if ((tul_status_msg(pCurHcb)) == -1) - return (-1); + if ((scb->flags & SCF_DIR) != 0) /* if direction bit set then report data underrun */ + scb->hastat = HOST_DO_DU; + if ((initio_status_msg(host)) == -1) + return -1; break; case MSG_IN: /* Message in phase */ - pCurScb->SCB_NxtStat = 0x4; - if (tul_msgin(pCurHcb) == -1) - return (-1); + scb->next_state = 0x4; + if (initio_msgin(host) == -1) + return -1; break; case MSG_OUT: /* Message out phase */ - if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { - pCurScb->SCB_BufLen = 0; - pCurScb->SCB_HaStat = HOST_DO_DU; - if (tul_msgout_ide(pCurHcb) == -1) - return (-1); - return (6); /* Go to state 6 */ - } else { - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) - return (-1); + if (host->jsstatus0 & TSS_PAR_ERROR) { + scb->buflen = 0; + scb->hastat = HOST_DO_DU; + if (initio_msgout_ide(host) == -1) + return -1; + return 6; + } else { + outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */ + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; } break; case DATA_IN: /* Data in phase */ - return (tul_xfer_data_in(pCurHcb)); + return initio_xfer_data_in(host); case DATA_OUT: /* Data out phase */ - return (tul_xfer_data_out(pCurHcb)); + return initio_xfer_data_out(host); default: - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } } } -/***************************************************************************/ -/* state after dma xfer done or phase change before xfer done */ -int tul_state_5(HCS * pCurHcb) +/** + * initio_state_5 - SCSI state machine + * @host: InitIO host we are controlling + * + * State after dma xfer done or phase change before xfer done + */ + +static int initio_state_5(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; + struct scsi_ctrl_blk *scb = host->active; long cnt, xcnt; /* cannot use unsigned !! code: if (xcnt < 0) */ #if DEBUG_STATE printk("-s5-"); #endif -/*------ get remaining count -------*/ + /*------ get remaining count -------*/ + cnt = inl(host->addr + TUL_SCnt0) & 0x0FFFFFF; - cnt = TUL_RDLONG(pCurHcb->HCS_Base, TUL_SCnt0) & 0x0FFFFFF; - - if (TUL_RD(pCurHcb->HCS_Base, TUL_XCmd) & 0x20) { + if (inb(host->addr + TUL_XCmd) & 0x20) { /* ----------------------- DATA_IN ----------------------------- */ /* check scsi parity error */ - if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { - pCurScb->SCB_HaStat = HOST_DO_DU; - } - if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* DMA xfer pending, Send STOP */ + if (host->jsstatus0 & TSS_PAR_ERROR) + scb->hastat = HOST_DO_DU; + if (inb(host->addr + TUL_XStatus) & XPEND) { /* DMA xfer pending, Send STOP */ /* tell Hardware scsi xfer has been terminated */ - TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, TUL_RD(pCurHcb->HCS_Base, TUL_XCtrl) | 0x80); + outb(inb(host->addr + TUL_XCtrl) | 0x80, host->addr + TUL_XCtrl); /* wait until DMA xfer not pending */ - while (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND); + while (inb(host->addr + TUL_XStatus) & XPEND) + cpu_relax(); } } else { -/*-------- DATA OUT -----------*/ - if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0) { - if (pCurHcb->HCS_ActTcs->TCS_JS_Period & TSC_WIDE_SCSI) - cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F) << 1; + /*-------- DATA OUT -----------*/ + if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0) { + if (host->active_tc->js_period & TSC_WIDE_SCSI) + cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F) << 1; else - cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F); + cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F); } - if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT); + if (inb(host->addr + TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ + outb(TAX_X_ABT, host->addr + TUL_XCmd); /* wait Abort DMA xfer done */ - while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0); - } - if ((cnt == 1) && (pCurHcb->HCS_Phase == DATA_OUT)) { - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) { - return (-1); + while ((inb(host->addr + TUL_Int) & XABT) == 0) + cpu_relax(); } + if ((cnt == 1) && (host->phase == DATA_OUT)) { + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; cnt = 0; } else { - if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0) - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); + if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0) + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); } } - if (cnt == 0) { - pCurScb->SCB_BufLen = 0; - return (6); /* Go to state 6 */ + scb->buflen = 0; + return 6; /* After Data */ } /* Update active data pointer */ - xcnt = (long) pCurScb->SCB_BufLen - cnt; /* xcnt== bytes already xferred */ - pCurScb->SCB_BufLen = (U32) cnt; /* cnt == bytes left to be xferred */ - if (pCurScb->SCB_Flags & SCF_SG) { - register SG *sgp; - ULONG i; - - sgp = &pCurScb->SCB_SGList[pCurScb->SCB_SGIdx]; - for (i = pCurScb->SCB_SGIdx; i < pCurScb->SCB_SGMax; sgp++, i++) { - xcnt -= (long) sgp->SG_Len; + xcnt = (long) scb->buflen - cnt; /* xcnt== bytes already xferred */ + scb->buflen = (u32) cnt; /* cnt == bytes left to be xferred */ + if (scb->flags & SCF_SG) { + struct sg_entry *sgp; + unsigned long i; + + sgp = &scb->sglist[scb->sgidx]; + for (i = scb->sgidx; i < scb->sgmax; sgp++, i++) { + xcnt -= (long) sgp->len; if (xcnt < 0) { /* this sgp xfer half done */ - xcnt += (long) sgp->SG_Len; /* xcnt == bytes xferred in this sgp */ - sgp->SG_Ptr += (U32) xcnt; /* new ptr to be xfer */ - sgp->SG_Len -= (U32) xcnt; /* new len to be xfer */ - pCurScb->SCB_BufPtr += ((U32) (i - pCurScb->SCB_SGIdx) << 3); + xcnt += (long) sgp->len; /* xcnt == bytes xferred in this sgp */ + sgp->data += (u32) xcnt; /* new ptr to be xfer */ + sgp->len -= (u32) xcnt; /* new len to be xfer */ + scb->bufptr += ((u32) (i - scb->sgidx) << 3); /* new SG table ptr */ - pCurScb->SCB_SGLen = (BYTE) (pCurScb->SCB_SGMax - i); + scb->sglen = (u8) (scb->sgmax - i); /* new SG table len */ - pCurScb->SCB_SGIdx = (WORD) i; + scb->sgidx = (u16) i; /* for next disc and come in this loop */ - return (4); /* Go to state 4 */ + return 4; /* Go to state 4 */ } /* else (xcnt >= 0 , i.e. this sgp already xferred */ } /* for */ - return (6); /* Go to state 6 */ + return 6; /* Go to state 6 */ } else { - pCurScb->SCB_BufPtr += (U32) xcnt; + scb->bufptr += (u32) xcnt; } - return (4); /* Go to state 4 */ + return 4; /* Go to state 4 */ } -/***************************************************************************/ -/* state after Data phase */ -int tul_state_6(HCS * pCurHcb) +/** + * initio_state_6 - SCSI state machine + * @host: InitIO host we are controlling + * + * State after Data phase + */ + +static int initio_state_6(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; + struct scsi_ctrl_blk *scb = host->active; #if DEBUG_STATE printk("-s6-"); #endif for (;;) { - switch (pCurHcb->HCS_Phase) { + switch (host->phase) { case STATUS_IN: /* Status phase */ - if ((tul_status_msg(pCurHcb)) == -1) - return (-1); + if ((initio_status_msg(host)) == -1) + return -1; break; case MSG_IN: /* Message in phase */ - pCurScb->SCB_NxtStat = 6; - if ((tul_msgin(pCurHcb)) == -1) - return (-1); + scb->next_state = 6; + if ((initio_msgin(host)) == -1) + return -1; break; case MSG_OUT: /* Message out phase */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if (wait_tulip(pCurHcb) == -1) - return (-1); + outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */ + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; break; case DATA_IN: /* Data in phase */ - return (tul_xpad_in(pCurHcb)); + return initio_xpad_in(host); case DATA_OUT: /* Data out phase */ - return (tul_xpad_out(pCurHcb)); + return initio_xpad_out(host); default: - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } } } -/***************************************************************************/ -int tul_state_7(HCS * pCurHcb) +/** + * initio_state_7 - SCSI state machine + * @host: InitIO host we are controlling + * + */ + +int initio_state_7(struct initio_host * host) { int cnt, i; @@ -1893,1139 +1678,1029 @@ printk("-s7-"); #endif /* flush SCSI FIFO */ - cnt = TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F; + cnt = inb(host->addr + TUL_SFifoCnt) & 0x1F; if (cnt) { for (i = 0; i < cnt; i++) - TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); + inb(host->addr + TUL_SFifo); } - switch (pCurHcb->HCS_Phase) { + switch (host->phase) { case DATA_IN: /* Data in phase */ case DATA_OUT: /* Data out phase */ - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); default: - return (6); /* Go to state 6 */ + return 6; /* Go to state 6 */ } } -/***************************************************************************/ -int tul_xfer_data_in(HCS * pCurHcb) +/** + * initio_xfer_data_in - Commence data input + * @host: InitIO host in use + * + * Commence a block of data transfer. The transfer itself will + * be managed by the controller and we will get a completion (or + * failure) interrupt. + */ +static int initio_xfer_data_in(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; + struct scsi_ctrl_blk *scb = host->active; - if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DOUT) { - return (6); /* wrong direction */ - } - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen); + if ((scb->flags & SCF_DIR) == SCF_DOUT) + return 6; /* wrong direction */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_IN); /* 7/25/95 */ + outl(scb->buflen, host->addr + TUL_SCnt0); + outb(TSC_XF_DMA_IN, host->addr + TUL_SCmd); /* 7/25/95 */ - if (pCurScb->SCB_Flags & SCF_SG) { /* S/G xfer */ - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3); - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_IN); + if (scb->flags & SCF_SG) { /* S/G xfer */ + outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH); + outl(scb->bufptr, host->addr + TUL_XAddH); + outb(TAX_SG_IN, host->addr + TUL_XCmd); } else { - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen); - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_IN); + outl(scb->buflen, host->addr + TUL_XCntH); + outl(scb->bufptr, host->addr + TUL_XAddH); + outb(TAX_X_IN, host->addr + TUL_XCmd); } - pCurScb->SCB_NxtStat = 0x5; - return (0); /* return to OS, wait xfer done , let jas_isr come in */ + scb->next_state = 0x5; + return 0; /* return to OS, wait xfer done , let jas_isr come in */ } +/** + * initio_xfer_data_out - Commence data output + * @host: InitIO host in use + * + * Commence a block of data transfer. The transfer itself will + * be managed by the controller and we will get a completion (or + * failure) interrupt. + */ -/***************************************************************************/ -int tul_xfer_data_out(HCS * pCurHcb) +static int initio_xfer_data_out(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; + struct scsi_ctrl_blk *scb = host->active; - if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DIN) { - return (6); /* wrong direction */ - } - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_OUT); + if ((scb->flags & SCF_DIR) == SCF_DIN) + return 6; /* wrong direction */ - if (pCurScb->SCB_Flags & SCF_SG) { /* S/G xfer */ - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3); - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_OUT); + outl(scb->buflen, host->addr + TUL_SCnt0); + outb(TSC_XF_DMA_OUT, host->addr + TUL_SCmd); + + if (scb->flags & SCF_SG) { /* S/G xfer */ + outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH); + outl(scb->bufptr, host->addr + TUL_XAddH); + outb(TAX_SG_OUT, host->addr + TUL_XCmd); } else { - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen); - TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_OUT); + outl(scb->buflen, host->addr + TUL_XCntH); + outl(scb->bufptr, host->addr + TUL_XAddH); + outb(TAX_X_OUT, host->addr + TUL_XCmd); } - pCurScb->SCB_NxtStat = 0x5; - return (0); /* return to OS, wait xfer done , let jas_isr come in */ + scb->next_state = 0x5; + return 0; /* return to OS, wait xfer done , let jas_isr come in */ } - -/***************************************************************************/ -int tul_xpad_in(HCS * pCurHcb) +int initio_xpad_in(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; - TCS *pCurTcb = pCurHcb->HCS_ActTcs; + struct scsi_ctrl_blk *scb = host->active; + struct target_control *active_tc = host->active_tc; - if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) { - pCurScb->SCB_HaStat = HOST_DO_DU; /* over run */ - } + if ((scb->flags & SCF_DIR) != SCF_NO_DCHK) + scb->hastat = HOST_DO_DU; /* over run */ for (;;) { - if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI) - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2); + if (active_tc->js_period & TSC_WIDE_SCSI) + outl(2, host->addr + TUL_SCnt0); else - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); + outl(1, host->addr + TUL_SCnt0); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if ((wait_tulip(pCurHcb)) == -1) { - return (-1); - } - if (pCurHcb->HCS_Phase != DATA_IN) { - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - return (6); + outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + if (host->phase != DATA_IN) { + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + return 6; } - TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); + inb(host->addr + TUL_SFifo); } } -int tul_xpad_out(HCS * pCurHcb) +int initio_xpad_out(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; - TCS *pCurTcb = pCurHcb->HCS_ActTcs; + struct scsi_ctrl_blk *scb = host->active; + struct target_control *active_tc = host->active_tc; - if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) { - pCurScb->SCB_HaStat = HOST_DO_DU; /* over run */ - } + if ((scb->flags & SCF_DIR) != SCF_NO_DCHK) + scb->hastat = HOST_DO_DU; /* over run */ for (;;) { - if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI) - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2); + if (active_tc->js_period & TSC_WIDE_SCSI) + outl(2, host->addr + TUL_SCnt0); else - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); + outl(1, host->addr + TUL_SCnt0); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - if ((wait_tulip(pCurHcb)) == -1) { - return (-1); - } - if (pCurHcb->HCS_Phase != DATA_OUT) { /* Disable wide CPU to allow read 16 bits */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - return (6); + outb(0, host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + if ((wait_tulip(host)) == -1) + return -1; + if (host->phase != DATA_OUT) { /* Disable wide CPU to allow read 16 bits */ + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + return 6; } } } - -/***************************************************************************/ -int tul_status_msg(HCS * pCurHcb) +int initio_status_msg(struct initio_host * host) { /* status & MSG_IN */ - SCB *pCurScb = pCurHcb->HCS_ActScb; - BYTE msg; + struct scsi_ctrl_blk *scb = host->active; + u8 msg; + + outb(TSC_CMD_COMP, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_CMD_COMP); - if ((wait_tulip(pCurHcb)) == -1) { - return (-1); - } /* get status */ - pCurScb->SCB_TaStat = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); + scb->tastat = inb(host->addr + TUL_SFifo); - if (pCurHcb->HCS_Phase == MSG_OUT) { - if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY); - } else { - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); - } - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - return (wait_tulip(pCurHcb)); - } - if (pCurHcb->HCS_Phase == MSG_IN) { - msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); - if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { /* Parity error */ - if ((tul_msgin_accept(pCurHcb)) == -1) - return (-1); - if (pCurHcb->HCS_Phase != MSG_OUT) - return (tul_bad_seq(pCurHcb)); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - return (wait_tulip(pCurHcb)); + if (host->phase == MSG_OUT) { + if (host->jsstatus0 & TSS_PAR_ERROR) + outb(MSG_PARITY, host->addr + TUL_SFifo); + else + outb(MSG_NOP, host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return wait_tulip(host); + } + if (host->phase == MSG_IN) { + msg = inb(host->addr + TUL_SFifo); + if (host->jsstatus0 & TSS_PAR_ERROR) { /* Parity error */ + if ((initio_msgin_accept(host)) == -1) + return -1; + if (host->phase != MSG_OUT) + return initio_bad_seq(host); + outb(MSG_PARITY, host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return wait_tulip(host); } if (msg == 0) { /* Command complete */ - if ((pCurScb->SCB_TaStat & 0x18) == 0x10) { /* No link support */ - return (tul_bad_seq(pCurHcb)); - } - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); - return tul_wait_done_disc(pCurHcb); + if ((scb->tastat & 0x18) == 0x10) /* No link support */ + return initio_bad_seq(host); + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); + return initio_wait_done_disc(host); } - if ((msg == MSG_LINK_COMP) || (msg == MSG_LINK_FLAG)) { - if ((pCurScb->SCB_TaStat & 0x18) == 0x10) - return (tul_msgin_accept(pCurHcb)); + if (msg == MSG_LINK_COMP || msg == MSG_LINK_FLAG) { + if ((scb->tastat & 0x18) == 0x10) + return initio_msgin_accept(host); } } - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } -/***************************************************************************/ /* scsi bus free */ -int int_tul_busfree(HCS * pCurHcb) +int int_initio_busfree(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; + struct scsi_ctrl_blk *scb = host->active; - if (pCurScb != NULL) { - if (pCurScb->SCB_Status & SCB_SELECT) { /* selection timeout */ - tul_unlink_pend_scb(pCurHcb, pCurScb); - pCurScb->SCB_HaStat = HOST_SEL_TOUT; - tul_append_done_scb(pCurHcb, pCurScb); + if (scb != NULL) { + if (scb->status & SCB_SELECT) { /* selection timeout */ + initio_unlink_pend_scb(host, scb); + scb->hastat = HOST_SEL_TOUT; + initio_append_done_scb(host, scb); } else { /* Unexpected bus free */ - tul_unlink_busy_scb(pCurHcb, pCurScb); - pCurScb->SCB_HaStat = HOST_BUS_FREE; - tul_append_done_scb(pCurHcb, pCurScb); - } - pCurHcb->HCS_ActScb = NULL; - pCurHcb->HCS_ActTcs = NULL; - } - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ - return (-1); + initio_unlink_busy_scb(host, scb); + scb->hastat = HOST_BUS_FREE; + initio_append_done_scb(host, scb); + } + host->active = NULL; + host->active_tc = NULL; + } + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ + outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ + return -1; } -/***************************************************************************/ -/* scsi bus reset */ -static int int_tul_scsi_rst(HCS * pCurHcb) +/** + * int_initio_scsi_rst - SCSI reset occurred + * @host: Host seeing the reset + * + * A SCSI bus reset has occurred. Clean up any pending transfer + * the hardware is doing by DMA and then abort all active and + * disconnected commands. The mid layer should sort the rest out + * for us + */ + +static int int_initio_scsi_rst(struct initio_host * host) { - SCB *pCurScb; + struct scsi_ctrl_blk *scb; int i; /* if DMA xfer is pending, abort DMA xfer */ - if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & 0x01) { - TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO); + if (inb(host->addr + TUL_XStatus) & 0x01) { + outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd); /* wait Abort DMA xfer done */ - while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & 0x04) == 0); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); + while ((inb(host->addr + TUL_Int) & 0x04) == 0) + cpu_relax(); + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); } /* Abort all active & disconnected scb */ - while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) { - pCurScb->SCB_HaStat = HOST_BAD_PHAS; - tul_append_done_scb(pCurHcb, pCurScb); + while ((scb = initio_pop_busy_scb(host)) != NULL) { + scb->hastat = HOST_BAD_PHAS; + initio_append_done_scb(host, scb); } - pCurHcb->HCS_ActScb = NULL; - pCurHcb->HCS_ActTcs = NULL; + host->active = NULL; + host->active_tc = NULL; /* clr sync nego. done flag */ - for (i = 0; i < pCurHcb->HCS_MaxTar; i++) { - pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); - } - return (-1); + for (i = 0; i < host->max_tar; i++) + host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); + return -1; } +/** + * int_initio_scsi_resel - Reselection occured + * @host: InitIO host adapter + * + * A SCSI reselection event has been signalled and the interrupt + * is now being processed. Work out which command block needs attention + * and continue processing that command. + */ -/***************************************************************************/ -/* scsi reselection */ -int int_tul_resel(HCS * pCurHcb) +int int_initio_resel(struct initio_host * host) { - SCB *pCurScb; - TCS *pCurTcb; - BYTE tag, msg = 0; - BYTE tar, lun; - - if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) { - if (pCurScb->SCB_Status & SCB_SELECT) { /* if waiting for selection complete */ - pCurScb->SCB_Status &= ~SCB_SELECT; - } - pCurHcb->HCS_ActScb = NULL; + struct scsi_ctrl_blk *scb; + struct target_control *active_tc; + u8 tag, msg = 0; + u8 tar, lun; + + if ((scb = host->active) != NULL) { + /* FIXME: Why check and not just clear ? */ + if (scb->status & SCB_SELECT) /* if waiting for selection complete */ + scb->status &= ~SCB_SELECT; + host->active = NULL; } /* --------- get target id---------------------- */ - tar = TUL_RD(pCurHcb->HCS_Base, TUL_SBusId); + tar = inb(host->addr + TUL_SBusId); /* ------ get LUN from Identify message----------- */ - lun = TUL_RD(pCurHcb->HCS_Base, TUL_SIdent) & 0x0F; + lun = inb(host->addr + TUL_SIdent) & 0x0F; /* 07/22/98 from 0x1F -> 0x0F */ - pCurTcb = &pCurHcb->HCS_Tcs[tar]; - pCurHcb->HCS_ActTcs = pCurTcb; - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0); - TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period); - + active_tc = &host->targets[tar]; + host->active_tc = active_tc; + outb(active_tc->sconfig0, host->addr + TUL_SConfig); + outb(active_tc->js_period, host->addr + TUL_SPeriod); /* ------------- tag queueing ? ------------------- */ - if (pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG) { - if ((tul_msgin_accept(pCurHcb)) == -1) - return (-1); - if (pCurHcb->HCS_Phase != MSG_IN) + if (active_tc->drv_flags & TCF_DRV_EN_TAG) { + if ((initio_msgin_accept(host)) == -1) + return -1; + if (host->phase != MSG_IN) goto no_tag; - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if ((wait_tulip(pCurHcb)) == -1) - return (-1); - msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* Read Tag Message */ + outl(1, host->addr + TUL_SCnt0); + outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + msg = inb(host->addr + TUL_SFifo); /* Read Tag Message */ - if ((msg < MSG_STAG) || (msg > MSG_OTAG)) /* Is simple Tag */ + if (msg < MSG_STAG || msg > MSG_OTAG) /* Is simple Tag */ goto no_tag; - if ((tul_msgin_accept(pCurHcb)) == -1) - return (-1); + if (initio_msgin_accept(host) == -1) + return -1; - if (pCurHcb->HCS_Phase != MSG_IN) + if (host->phase != MSG_IN) goto no_tag; - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if ((wait_tulip(pCurHcb)) == -1) - return (-1); - tag = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* Read Tag ID */ - pCurScb = pCurHcb->HCS_Scb + tag; - if ((pCurScb->SCB_Target != tar) || (pCurScb->SCB_Lun != lun)) { - return tul_msgout_abort_tag(pCurHcb); - } - if (pCurScb->SCB_Status != SCB_BUSY) { /* 03/24/95 */ - return tul_msgout_abort_tag(pCurHcb); - } - pCurHcb->HCS_ActScb = pCurScb; - if ((tul_msgin_accept(pCurHcb)) == -1) - return (-1); + outl(1, host->addr + TUL_SCnt0); + outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + tag = inb(host->addr + TUL_SFifo); /* Read Tag ID */ + scb = host->scb + tag; + if (scb->target != tar || scb->lun != lun) { + return initio_msgout_abort_tag(host); + } + if (scb->status != SCB_BUSY) { /* 03/24/95 */ + return initio_msgout_abort_tag(host); + } + host->active = scb; + if ((initio_msgin_accept(host)) == -1) + return -1; } else { /* No tag */ no_tag: - if ((pCurScb = tul_find_busy_scb(pCurHcb, tar | (lun << 8))) == NULL) { - return tul_msgout_abort_targ(pCurHcb); + if ((scb = initio_find_busy_scb(host, tar | (lun << 8))) == NULL) { + return initio_msgout_abort_targ(host); } - pCurHcb->HCS_ActScb = pCurScb; - if (!(pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG)) { - if ((tul_msgin_accept(pCurHcb)) == -1) - return (-1); + host->active = scb; + if (!(active_tc->drv_flags & TCF_DRV_EN_TAG)) { + if ((initio_msgin_accept(host)) == -1) + return -1; } } return 0; } +/** + * int_initio_bad_seq - out of phase + * @host: InitIO host flagging event + * + * We have ended up out of phase somehow. Reset the host controller + * and throw all our toys out of the pram. Let the midlayer clean up + */ -/***************************************************************************/ -static int int_tul_bad_seq(HCS * pCurHcb) +static int int_initio_bad_seq(struct initio_host * host) { /* target wrong phase */ - SCB *pCurScb; + struct scsi_ctrl_blk *scb; int i; - tul_reset_scsi(pCurHcb, 10); + initio_reset_scsi(host, 10); - while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) { - pCurScb->SCB_HaStat = HOST_BAD_PHAS; - tul_append_done_scb(pCurHcb, pCurScb); + while ((scb = initio_pop_busy_scb(host)) != NULL) { + scb->hastat = HOST_BAD_PHAS; + initio_append_done_scb(host, scb); } - for (i = 0; i < pCurHcb->HCS_MaxTar; i++) { - pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); - } - return (-1); + for (i = 0; i < host->max_tar; i++) + host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); + return -1; } -/***************************************************************************/ -int tul_msgout_abort_targ(HCS * pCurHcb) +/** + * initio_msgout_abort_targ - abort a tag + * @host: InitIO host + * + * Abort when the target/lun does not match or when our SCB is not + * busy. Used by untagged commands. + */ + +static int initio_msgout_abort_targ(struct initio_host * host) { - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); - if (tul_msgin_accept(pCurHcb) == -1) - return (-1); - if (pCurHcb->HCS_Phase != MSG_OUT) - return (tul_bad_seq(pCurHcb)); + outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); + if (initio_msgin_accept(host) == -1) + return -1; + if (host->phase != MSG_OUT) + return initio_bad_seq(host); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); + outb(MSG_ABORT, host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); - return tul_wait_disc(pCurHcb); + return initio_wait_disc(host); } -/***************************************************************************/ -int tul_msgout_abort_tag(HCS * pCurHcb) +/** + * initio_msgout_abort_tag - abort a tag + * @host: InitIO host + * + * Abort when the target/lun does not match or when our SCB is not + * busy. Used for tagged commands. + */ + +static int initio_msgout_abort_tag(struct initio_host * host) { - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); - if (tul_msgin_accept(pCurHcb) == -1) - return (-1); - if (pCurHcb->HCS_Phase != MSG_OUT) - return (tul_bad_seq(pCurHcb)); + outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); + if (initio_msgin_accept(host) == -1) + return -1; + if (host->phase != MSG_OUT) + return initio_bad_seq(host); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT_TAG); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); + outb(MSG_ABORT_TAG, host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); - return tul_wait_disc(pCurHcb); + return initio_wait_disc(host); } -/***************************************************************************/ -int tul_msgin(HCS * pCurHcb) +/** + * initio_msgin - Message in + * @host: InitIO Host + * + * Process incoming message + */ +static int initio_msgin(struct initio_host * host) { - TCS *pCurTcb; + struct target_control *active_tc; for (;;) { + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if ((wait_tulip(pCurHcb)) == -1) - return (-1); + outl(1, host->addr + TUL_SCnt0); + outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; - switch (TUL_RD(pCurHcb->HCS_Base, TUL_SFifo)) { + switch (inb(host->addr + TUL_SFifo)) { case MSG_DISC: /* Disconnect msg */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); - - return tul_wait_disc(pCurHcb); - + outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); + return initio_wait_disc(host); case MSG_SDP: case MSG_RESTORE: case MSG_NOP: - tul_msgin_accept(pCurHcb); + initio_msgin_accept(host); break; - case MSG_REJ: /* Clear ATN first */ - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, - (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7))); - pCurTcb = pCurHcb->HCS_ActTcs; - if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync nego */ - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); - } - tul_msgin_accept(pCurHcb); + outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), + host->addr + TUL_SSignal); + active_tc = host->active_tc; + if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) /* do sync nego */ + outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), + host->addr + TUL_SSignal); + initio_msgin_accept(host); break; - case MSG_EXTEND: /* extended msg */ - tul_msgin_extend(pCurHcb); + initio_msgin_extend(host); break; - case MSG_IGNOREWIDE: - tul_msgin_accept(pCurHcb); - break; - - /* get */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if (wait_tulip(pCurHcb) == -1) - return -1; - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0); /* put pad */ - TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* get IGNORE field */ - TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* get pad */ - - tul_msgin_accept(pCurHcb); + initio_msgin_accept(host); break; - case MSG_COMP: - { - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); - return tul_wait_done_disc(pCurHcb); - } + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); + return initio_wait_done_disc(host); default: - tul_msgout_reject(pCurHcb); + initio_msgout_reject(host); break; } - if (pCurHcb->HCS_Phase != MSG_IN) - return (pCurHcb->HCS_Phase); + if (host->phase != MSG_IN) + return host->phase; } /* statement won't reach here */ } - - - -/***************************************************************************/ -int tul_msgout_reject(HCS * pCurHcb) +static int initio_msgout_reject(struct initio_host * host) { + outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); - - if ((tul_msgin_accept(pCurHcb)) == -1) - return (-1); + if (initio_msgin_accept(host) == -1) + return -1; - if (pCurHcb->HCS_Phase == MSG_OUT) { - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_REJ); /* Msg reject */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - return (wait_tulip(pCurHcb)); + if (host->phase == MSG_OUT) { + outb(MSG_REJ, host->addr + TUL_SFifo); /* Msg reject */ + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return wait_tulip(host); } - return (pCurHcb->HCS_Phase); + return host->phase; } - - -/***************************************************************************/ -int tul_msgout_ide(HCS * pCurHcb) +static int initio_msgout_ide(struct initio_host * host) { - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_IDE); /* Initiator Detected Error */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - return (wait_tulip(pCurHcb)); + outb(MSG_IDE, host->addr + TUL_SFifo); /* Initiator Detected Error */ + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return wait_tulip(host); } - -/***************************************************************************/ -int tul_msgin_extend(HCS * pCurHcb) +static int initio_msgin_extend(struct initio_host * host) { - BYTE len, idx; + u8 len, idx; - if (tul_msgin_accept(pCurHcb) != MSG_IN) - return (pCurHcb->HCS_Phase); + if (initio_msgin_accept(host) != MSG_IN) + return host->phase; /* Get extended msg length */ - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if (wait_tulip(pCurHcb) == -1) - return (-1); + outl(1, host->addr + TUL_SCnt0); + outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; - len = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); - pCurHcb->HCS_Msg[0] = len; + len = inb(host->addr + TUL_SFifo); + host->msg[0] = len; for (idx = 1; len != 0; len--) { - if ((tul_msgin_accept(pCurHcb)) != MSG_IN) - return (pCurHcb->HCS_Phase); - TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); - if (wait_tulip(pCurHcb) == -1) - return (-1); - pCurHcb->HCS_Msg[idx++] = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); - } - if (pCurHcb->HCS_Msg[1] == 1) { /* if it's synchronous data transfer request */ - if (pCurHcb->HCS_Msg[0] != 3) /* if length is not right */ - return (tul_msgout_reject(pCurHcb)); - if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_SYNC_NEGO) { /* Set OFFSET=0 to do async, nego back */ - pCurHcb->HCS_Msg[3] = 0; - } else { - if ((tul_msgin_sync(pCurHcb) == 0) && - (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SYNC_DONE)) { - tul_sync_done(pCurHcb); - return (tul_msgin_accept(pCurHcb)); + if ((initio_msgin_accept(host)) != MSG_IN) + return host->phase; + outl(1, host->addr + TUL_SCnt0); + outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); + if (wait_tulip(host) == -1) + return -1; + host->msg[idx++] = inb(host->addr + TUL_SFifo); + } + if (host->msg[1] == 1) { /* if it's synchronous data transfer request */ + u8 r; + if (host->msg[0] != 3) /* if length is not right */ + return initio_msgout_reject(host); + if (host->active_tc->flags & TCF_NO_SYNC_NEGO) { /* Set OFFSET=0 to do async, nego back */ + host->msg[3] = 0; + } else { + if (initio_msgin_sync(host) == 0 && + (host->active_tc->flags & TCF_SYNC_DONE)) { + initio_sync_done(host); + return initio_msgin_accept(host); } } - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); - if ((tul_msgin_accept(pCurHcb)) != MSG_OUT) - return (pCurHcb->HCS_Phase); + r = inb(host->addr + TUL_SSignal); + outb((r & (TSC_SET_ACK | 7)) | TSC_SET_ATN, + host->addr + TUL_SSignal); + if (initio_msgin_accept(host) != MSG_OUT) + return host->phase; /* sync msg out */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); - - tul_sync_done(pCurHcb); + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[3]); + initio_sync_done(host); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - return (wait_tulip(pCurHcb)); + outb(MSG_EXTEND, host->addr + TUL_SFifo); + outb(3, host->addr + TUL_SFifo); + outb(1, host->addr + TUL_SFifo); + outb(host->msg[2], host->addr + TUL_SFifo); + outb(host->msg[3], host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return wait_tulip(host); } - if ((pCurHcb->HCS_Msg[0] != 2) || (pCurHcb->HCS_Msg[1] != 3)) - return (tul_msgout_reject(pCurHcb)); + if (host->msg[0] != 2 || host->msg[1] != 3) + return initio_msgout_reject(host); /* if it's WIDE DATA XFER REQ */ - if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) { - pCurHcb->HCS_Msg[2] = 0; + if (host->active_tc->flags & TCF_NO_WDTR) { + host->msg[2] = 0; } else { - if (pCurHcb->HCS_Msg[2] > 2) /* > 32 bits */ - return (tul_msgout_reject(pCurHcb)); - if (pCurHcb->HCS_Msg[2] == 2) { /* == 32 */ - pCurHcb->HCS_Msg[2] = 1; + if (host->msg[2] > 2) /* > 32 bits */ + return initio_msgout_reject(host); + if (host->msg[2] == 2) { /* == 32 */ + host->msg[2] = 1; } else { - if ((pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) == 0) { - wdtr_done(pCurHcb); - if ((pCurHcb->HCS_ActTcs->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); - return (tul_msgin_accept(pCurHcb)); + if ((host->active_tc->flags & TCF_NO_WDTR) == 0) { + wdtr_done(host); + if ((host->active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) + outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); + return initio_msgin_accept(host); } } } - TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); + outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); - if (tul_msgin_accept(pCurHcb) != MSG_OUT) - return (pCurHcb->HCS_Phase); + if (initio_msgin_accept(host) != MSG_OUT) + return host->phase; /* WDTR msg out */ - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - return (wait_tulip(pCurHcb)); + outb(MSG_EXTEND, host->addr + TUL_SFifo); + outb(2, host->addr + TUL_SFifo); + outb(3, host->addr + TUL_SFifo); + outb(host->msg[2], host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return wait_tulip(host); } -/***************************************************************************/ -int tul_msgin_sync(HCS * pCurHcb) +static int initio_msgin_sync(struct initio_host * host) { char default_period; - default_period = tul_rate_tbl[pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SCSI_RATE]; - if (pCurHcb->HCS_Msg[3] > MAX_OFFSET) { - pCurHcb->HCS_Msg[3] = MAX_OFFSET; - if (pCurHcb->HCS_Msg[2] < default_period) { - pCurHcb->HCS_Msg[2] = default_period; + default_period = initio_rate_tbl[host->active_tc->flags & TCF_SCSI_RATE]; + if (host->msg[3] > MAX_OFFSET) { + host->msg[3] = MAX_OFFSET; + if (host->msg[2] < default_period) { + host->msg[2] = default_period; return 1; } - if (pCurHcb->HCS_Msg[2] >= 59) { /* Change to async */ - pCurHcb->HCS_Msg[3] = 0; - } + if (host->msg[2] >= 59) /* Change to async */ + host->msg[3] = 0; return 1; } /* offset requests asynchronous transfers ? */ - if (pCurHcb->HCS_Msg[3] == 0) { + if (host->msg[3] == 0) { return 0; } - if (pCurHcb->HCS_Msg[2] < default_period) { - pCurHcb->HCS_Msg[2] = default_period; + if (host->msg[2] < default_period) { + host->msg[2] = default_period; return 1; } - if (pCurHcb->HCS_Msg[2] >= 59) { - pCurHcb->HCS_Msg[3] = 0; + if (host->msg[2] >= 59) { + host->msg[3] = 0; return 1; } return 0; } - -/***************************************************************************/ -int wdtr_done(HCS * pCurHcb) +static int wdtr_done(struct initio_host * host) { - pCurHcb->HCS_ActTcs->TCS_Flags &= ~TCF_SYNC_DONE; - pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_WDTR_DONE; + host->active_tc->flags &= ~TCF_SYNC_DONE; + host->active_tc->flags |= TCF_WDTR_DONE; - pCurHcb->HCS_ActTcs->TCS_JS_Period = 0; - if (pCurHcb->HCS_Msg[2]) { /* if 16 bit */ - pCurHcb->HCS_ActTcs->TCS_JS_Period |= TSC_WIDE_SCSI; - } - pCurHcb->HCS_ActTcs->TCS_SConfig0 &= ~TSC_ALT_PERIOD; - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0); - TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period); + host->active_tc->js_period = 0; + if (host->msg[2]) /* if 16 bit */ + host->active_tc->js_period |= TSC_WIDE_SCSI; + host->active_tc->sconfig0 &= ~TSC_ALT_PERIOD; + outb(host->active_tc->sconfig0, host->addr + TUL_SConfig); + outb(host->active_tc->js_period, host->addr + TUL_SPeriod); return 1; } -/***************************************************************************/ -int tul_sync_done(HCS * pCurHcb) +static int initio_sync_done(struct initio_host * host) { int i; - pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_SYNC_DONE; + host->active_tc->flags |= TCF_SYNC_DONE; - if (pCurHcb->HCS_Msg[3]) { - pCurHcb->HCS_ActTcs->TCS_JS_Period |= pCurHcb->HCS_Msg[3]; + if (host->msg[3]) { + host->active_tc->js_period |= host->msg[3]; for (i = 0; i < 8; i++) { - if (tul_rate_tbl[i] >= pCurHcb->HCS_Msg[2]) /* pick the big one */ + if (initio_rate_tbl[i] >= host->msg[2]) /* pick the big one */ break; } - pCurHcb->HCS_ActTcs->TCS_JS_Period |= (i << 4); - pCurHcb->HCS_ActTcs->TCS_SConfig0 |= TSC_ALT_PERIOD; + host->active_tc->js_period |= (i << 4); + host->active_tc->sconfig0 |= TSC_ALT_PERIOD; } - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0); - TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period); + outb(host->active_tc->sconfig0, host->addr + TUL_SConfig); + outb(host->active_tc->js_period, host->addr + TUL_SPeriod); - return (-1); + return -1; } -int tul_post_scsi_rst(HCS * pCurHcb) +static int initio_post_scsi_rst(struct initio_host * host) { - SCB *pCurScb; - TCS *pCurTcb; + struct scsi_ctrl_blk *scb; + struct target_control *active_tc; int i; - pCurHcb->HCS_ActScb = NULL; - pCurHcb->HCS_ActTcs = NULL; - pCurHcb->HCS_Flags = 0; - - while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) { - pCurScb->SCB_HaStat = HOST_BAD_PHAS; - tul_append_done_scb(pCurHcb, pCurScb); + host->active = NULL; + host->active_tc = NULL; + host->flags = 0; + + while ((scb = initio_pop_busy_scb(host)) != NULL) { + scb->hastat = HOST_BAD_PHAS; + initio_append_done_scb(host, scb); } /* clear sync done flag */ - pCurTcb = &pCurHcb->HCS_Tcs[0]; - for (i = 0; i < pCurHcb->HCS_MaxTar; pCurTcb++, i++) { - pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); + active_tc = &host->targets[0]; + for (i = 0; i < host->max_tar; active_tc++, i++) { + active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); /* Initialize the sync. xfer register values to an asyn xfer */ - pCurTcb->TCS_JS_Period = 0; - pCurTcb->TCS_SConfig0 = pCurHcb->HCS_SConf1; - pCurHcb->HCS_ActTags[0] = 0; /* 07/22/98 */ - pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY; /* 07/22/98 */ + active_tc->js_period = 0; + active_tc->sconfig0 = host->sconf1; + host->act_tags[0] = 0; /* 07/22/98 */ + host->targets[i].flags &= ~TCF_BUSY; /* 07/22/98 */ } /* for */ - return (-1); + return -1; } -/***************************************************************************/ -void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb) +static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb) { - pCurScb->SCB_Status |= SCB_SELECT; - pCurScb->SCB_NxtStat = 0x1; - pCurHcb->HCS_ActScb = pCurScb; - pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SELATNSTOP); - return; + scb->status |= SCB_SELECT; + scb->next_state = 0x1; + host->active = scb; + host->active_tc = &host->targets[scb->target]; + outb(TSC_SELATNSTOP, host->addr + TUL_SCmd); } -/***************************************************************************/ -void tul_select_atn(HCS * pCurHcb, SCB * pCurScb) +static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb) { int i; - pCurScb->SCB_Status |= SCB_SELECT; - pCurScb->SCB_NxtStat = 0x2; + scb->status |= SCB_SELECT; + scb->next_state = 0x2; - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident); - for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++) - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]); - pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; - pCurHcb->HCS_ActScb = pCurScb; - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN); - return; + outb(scb->ident, host->addr + TUL_SFifo); + for (i = 0; i < (int) scb->cdblen; i++) + outb(scb->cdb[i], host->addr + TUL_SFifo); + host->active_tc = &host->targets[scb->target]; + host->active = scb; + outb(TSC_SEL_ATN, host->addr + TUL_SCmd); } -/***************************************************************************/ -void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb) +static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb) { int i; - pCurScb->SCB_Status |= SCB_SELECT; - pCurScb->SCB_NxtStat = 0x2; - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg); - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId); - for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++) - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]); - pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; - pCurHcb->HCS_ActScb = pCurScb; - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN3); - return; -} + scb->status |= SCB_SELECT; + scb->next_state = 0x2; -/***************************************************************************/ -/* SCSI Bus Device Reset */ -int tul_bus_device_reset(HCS * pCurHcb) + outb(scb->ident, host->addr + TUL_SFifo); + outb(scb->tagmsg, host->addr + TUL_SFifo); + outb(scb->tagid, host->addr + TUL_SFifo); + for (i = 0; i < scb->cdblen; i++) + outb(scb->cdb[i], host->addr + TUL_SFifo); + host->active_tc = &host->targets[scb->target]; + host->active = scb; + outb(TSC_SEL_ATN3, host->addr + TUL_SCmd); +} + +/** + * initio_bus_device_reset - SCSI Bus Device Reset + * @host: InitIO host to reset + * + * Perform a device reset and abort all pending SCBs for the + * victim device + */ +int initio_bus_device_reset(struct initio_host * host) { - SCB *pCurScb = pCurHcb->HCS_ActScb; - TCS *pCurTcb = pCurHcb->HCS_ActTcs; - SCB *pTmpScb, *pPrevScb; - BYTE tar; + struct scsi_ctrl_blk *scb = host->active; + struct target_control *active_tc = host->active_tc; + struct scsi_ctrl_blk *tmp, *prev; + u8 tar; - if (pCurHcb->HCS_Phase != MSG_OUT) { - return (int_tul_bad_seq(pCurHcb)); /* Unexpected phase */ - } - tul_unlink_pend_scb(pCurHcb, pCurScb); - tul_release_scb(pCurHcb, pCurScb); + if (host->phase != MSG_OUT) + return int_initio_bad_seq(host); /* Unexpected phase */ + initio_unlink_pend_scb(host, scb); + initio_release_scb(host, scb); - tar = pCurScb->SCB_Target; /* target */ - pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY); + + tar = scb->target; /* target */ + active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY); /* clr sync. nego & WDTR flags 07/22/98 */ /* abort all SCB with same target */ - pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */ - while (pTmpScb != NULL) { - - if (pTmpScb->SCB_Target == tar) { + prev = tmp = host->first_busy; /* Check Busy queue */ + while (tmp != NULL) { + if (tmp->target == tar) { /* unlink it */ - if (pTmpScb == pCurHcb->HCS_FirstBusy) { - if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) - pCurHcb->HCS_LastBusy = NULL; - } else { - pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; - if (pTmpScb == pCurHcb->HCS_LastBusy) - pCurHcb->HCS_LastBusy = pPrevScb; + if (tmp == host->first_busy) { + if ((host->first_busy = tmp->next) == NULL) + host->last_busy = NULL; + } else { + prev->next = tmp->next; + if (tmp == host->last_busy) + host->last_busy = prev; } - pTmpScb->SCB_HaStat = HOST_ABORTED; - tul_append_done_scb(pCurHcb, pTmpScb); + tmp->hastat = HOST_ABORTED; + initio_append_done_scb(host, tmp); } /* Previous haven't change */ else { - pPrevScb = pTmpScb; + prev = tmp; } - pTmpScb = pTmpScb->SCB_NxtScb; + tmp = tmp->next; } - - TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_DEVRST); - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); - - return tul_wait_disc(pCurHcb); + outb(MSG_DEVRST, host->addr + TUL_SFifo); + outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + return initio_wait_disc(host); } -/***************************************************************************/ -int tul_msgin_accept(HCS * pCurHcb) +static int initio_msgin_accept(struct initio_host * host) { - TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); - return (wait_tulip(pCurHcb)); + outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); + return wait_tulip(host); } -/***************************************************************************/ -int wait_tulip(HCS * pCurHcb) +static int wait_tulip(struct initio_host * host) { - while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) - & TSS_INT_PENDING)); + while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) + & TSS_INT_PENDING)) + cpu_relax(); + + host->jsint = inb(host->addr + TUL_SInt); + host->phase = host->jsstatus0 & TSS_PH_MASK; + host->jsstatus1 = inb(host->addr + TUL_SStatus1); - pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); - pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK; - pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1); - - if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) { /* if SCSI bus reset detected */ - return (int_tul_resel(pCurHcb)); - } - if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) { /* if selected/reselected timeout interrupt */ - return (int_tul_busfree(pCurHcb)); - } - if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */ - return (int_tul_scsi_rst(pCurHcb)); - } - if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ - if (pCurHcb->HCS_Flags & HCF_EXPECT_DONE_DISC) { - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ - tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb); - pCurHcb->HCS_ActScb->SCB_HaStat = 0; - tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb); - pCurHcb->HCS_ActScb = NULL; - pCurHcb->HCS_ActTcs = NULL; - pCurHcb->HCS_Flags &= ~HCF_EXPECT_DONE_DISC; - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ - return (-1); - } - if (pCurHcb->HCS_Flags & HCF_EXPECT_DISC) { - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ - pCurHcb->HCS_ActScb = NULL; - pCurHcb->HCS_ActTcs = NULL; - pCurHcb->HCS_Flags &= ~HCF_EXPECT_DISC; - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ - return (-1); + if (host->jsint & TSS_RESEL_INT) /* if SCSI bus reset detected */ + return int_initio_resel(host); + if (host->jsint & TSS_SEL_TIMEOUT) /* if selected/reselected timeout interrupt */ + return int_initio_busfree(host); + if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */ + return int_initio_scsi_rst(host); + + if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ + if (host->flags & HCF_EXPECT_DONE_DISC) { + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ + initio_unlink_busy_scb(host, host->active); + host->active->hastat = 0; + initio_append_done_scb(host, host->active); + host->active = NULL; + host->active_tc = NULL; + host->flags &= ~HCF_EXPECT_DONE_DISC; + outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ + return -1; } - return (int_tul_busfree(pCurHcb)); + if (host->flags & HCF_EXPECT_DISC) { + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ + host->active = NULL; + host->active_tc = NULL; + host->flags &= ~HCF_EXPECT_DISC; + outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ + return -1; } - if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) { - return (pCurHcb->HCS_Phase); + return int_initio_busfree(host); } - return (pCurHcb->HCS_Phase); + /* The old code really does the below. Can probably be removed */ + if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV)) + return host->phase; + return host->phase; } -/***************************************************************************/ -int tul_wait_disc(HCS * pCurHcb) -{ - - while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) - & TSS_INT_PENDING)); +static int initio_wait_disc(struct initio_host * host) +{ + while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING)) + cpu_relax(); - pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); + host->jsint = inb(host->addr + TUL_SInt); - if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */ - return (int_tul_scsi_rst(pCurHcb)); - } - if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ - pCurHcb->HCS_ActScb = NULL; - return (-1); + if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */ + return int_initio_scsi_rst(host); + if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ + outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ + host->active = NULL; + return -1; } - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } -/***************************************************************************/ -int tul_wait_done_disc(HCS * pCurHcb) +static int initio_wait_done_disc(struct initio_host * host) { + while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) + & TSS_INT_PENDING)) + cpu_relax(); + host->jsint = inb(host->addr + TUL_SInt); - while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) - & TSS_INT_PENDING)); - - pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); + if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */ + return int_initio_scsi_rst(host); + if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ + outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ + outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); + outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ + initio_unlink_busy_scb(host, host->active); - - if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */ - return (int_tul_scsi_rst(pCurHcb)); - } - if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ - TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); - TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ - tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb); - - tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb); - pCurHcb->HCS_ActScb = NULL; - return (-1); + initio_append_done_scb(host, host->active); + host->active = NULL; + return -1; } - return (tul_bad_seq(pCurHcb)); + return initio_bad_seq(host); } +/** + * i91u_intr - IRQ handler + * @irqno: IRQ number + * @dev_id: IRQ identifier + * + * Take the relevant locks and then invoke the actual isr processing + * code under the lock. + */ + static irqreturn_t i91u_intr(int irqno, void *dev_id) { struct Scsi_Host *dev = dev_id; unsigned long flags; + int r; spin_lock_irqsave(dev->host_lock, flags); - tul_isr((HCS *)dev->base); + r = initio_isr((struct initio_host *)dev->hostdata); spin_unlock_irqrestore(dev->host_lock, flags); + if (r) return IRQ_HANDLED; + else + return IRQ_NONE; } -static int tul_NewReturnNumberOfAdapters(void) -{ - struct pci_dev *pDev = NULL; /* Start from none */ - int iAdapters = 0; - long dRegValue; - WORD wBIOS; - int i = 0; - - init_i91uAdapter_table(); - - for (i = 0; i < ARRAY_SIZE(i91u_pci_devices); i++) - { - while ((pDev = pci_find_device(i91u_pci_devices[i].vendor, i91u_pci_devices[i].device, pDev)) != NULL) { - if (pci_enable_device(pDev)) - continue; - pci_read_config_dword(pDev, 0x44, (u32 *) & dRegValue); - wBIOS = (UWORD) (dRegValue & 0xFF); - if (((dRegValue & 0xFF00) >> 8) == 0xFF) - dRegValue = 0; - wBIOS = (wBIOS << 8) + ((UWORD) ((dRegValue & 0xFF00) >> 8)); - if (pci_set_dma_mask(pDev, DMA_32BIT_MASK)) { - printk(KERN_WARNING - "i91u: Could not set 32 bit DMA mask\n"); - continue; - } - - if (Addi91u_into_Adapter_table(wBIOS, - (pDev->resource[0].start), - pDev->irq, - pDev->bus->number, - (pDev->devfn >> 3) - ) == 0) - iAdapters++; - } - } - - return (iAdapters); -} - -static int i91u_detect(struct scsi_host_template * tpnt) -{ - HCS *pHCB; - struct Scsi_Host *hreg; - unsigned long i; /* 01/14/98 */ - int ok = 0, iAdapters; - ULONG dBiosAdr; - BYTE *pbBiosAdr; - - /* Get total number of adapters in the motherboard */ - iAdapters = tul_NewReturnNumberOfAdapters(); - if (iAdapters == 0) /* If no tulip founded, return */ - return (0); - - tul_num_ch = (iAdapters > tul_num_ch) ? tul_num_ch : iAdapters; - /* Update actually channel number */ - if (tul_tag_enable) { /* 1.01i */ - tul_num_scb = MAX_TARGETS * i91u_MAXQUEUE; - } else { - tul_num_scb = MAX_TARGETS + 3; /* 1-tape, 1-CD_ROM, 1- extra */ - } /* Update actually SCBs per adapter */ - - /* Get total memory needed for HCS */ - i = tul_num_ch * sizeof(HCS); - memset((unsigned char *) &tul_hcs[0], 0, i); /* Initialize tul_hcs 0 */ - /* Get total memory needed for SCB */ - - for (; tul_num_scb >= MAX_TARGETS + 3; tul_num_scb--) { - i = tul_num_ch * tul_num_scb * sizeof(SCB); - if ((tul_scb = kmalloc(i, GFP_ATOMIC | GFP_DMA)) != NULL) - break; - } - if (tul_scb == NULL) { - printk("i91u: SCB memory allocation error\n"); - return (0); - } - memset((unsigned char *) tul_scb, 0, i); - - for (i = 0, pHCB = &tul_hcs[0]; /* Get pointer for control block */ - i < tul_num_ch; - i++, pHCB++) { - get_tulipPCIConfig(pHCB, i); - - dBiosAdr = pHCB->HCS_BIOS; - dBiosAdr = (dBiosAdr << 4); - pbBiosAdr = phys_to_virt(dBiosAdr); - - init_tulip(pHCB, tul_scb + (i * tul_num_scb), tul_num_scb, pbBiosAdr, 10); - request_region(pHCB->HCS_Base, 256, "i91u"); /* Register */ - - pHCB->HCS_Index = i; /* 7/29/98 */ - hreg = scsi_register(tpnt, sizeof(HCS)); - if(hreg == NULL) { - release_region(pHCB->HCS_Base, 256); - return 0; - } - hreg->io_port = pHCB->HCS_Base; - hreg->n_io_port = 0xff; - hreg->can_queue = tul_num_scb; /* 03/05/98 */ - hreg->unique_id = pHCB->HCS_Base; - hreg->max_id = pHCB->HCS_MaxTar; - hreg->max_lun = 32; /* 10/21/97 */ - hreg->irq = pHCB->HCS_Intr; - hreg->this_id = pHCB->HCS_SCSI_ID; /* Assign HCS index */ - hreg->base = (unsigned long)pHCB; - hreg->sg_tablesize = TOTAL_SG_ENTRY; /* Maximun support is 32 */ - - /* Initial tulip chip */ - ok = request_irq(pHCB->HCS_Intr, i91u_intr, IRQF_DISABLED | IRQF_SHARED, "i91u", hreg); - if (ok < 0) { - printk(KERN_WARNING "i91u: unable to request IRQ %d\n\n", pHCB->HCS_Intr); - return 0; - } - } - - tpnt->this_id = -1; - tpnt->can_queue = 1; - - return 1; -} +/** + * initio_build_scb - Build the mappings and SCB + * @host: InitIO host taking the command + * @cblk: Firmware command block + * @cmnd: SCSI midlayer command block + * + * Translate the abstract SCSI command into a firmware command block + * suitable for feeding to the InitIO host controller. This also requires + * we build the scatter gather lists and ensure they are mapped properly. + */ -static void i91uBuildSCB(HCS * pHCB, SCB * pSCB, struct scsi_cmnd * SCpnt) +static void initio_build_scb(struct initio_host * host, struct scsi_ctrl_blk * cblk, struct scsi_cmnd * cmnd) { /* Create corresponding SCB */ - struct scatterlist *pSrbSG; - SG *pSG; /* Pointer to SG list */ - int i; - long TotalLen; + struct scatterlist *sglist; + struct sg_entry *sg; /* Pointer to SG list */ + int i, nseg; + long total_len; dma_addr_t dma_addr; - pSCB->SCB_Post = i91uSCBPost; /* i91u's callback routine */ - pSCB->SCB_Srb = SCpnt; - pSCB->SCB_Opcode = ExecSCSI; - pSCB->SCB_Flags = SCF_POST; /* After SCSI done, call post routine */ - pSCB->SCB_Target = SCpnt->device->id; - pSCB->SCB_Lun = SCpnt->device->lun; - pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW; + /* Fill in the command headers */ + cblk->post = i91uSCBPost; /* i91u's callback routine */ + cblk->srb = cmnd; + cblk->opcode = ExecSCSI; + cblk->flags = SCF_POST; /* After SCSI done, call post routine */ + cblk->target = cmnd->device->id; + cblk->lun = cmnd->device->lun; + cblk->ident = cmnd->device->lun | DISC_ALLOW; - pSCB->SCB_Flags |= SCF_SENSE; /* Turn on auto request sense */ - dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->sense_buffer, - SENSE_SIZE, DMA_FROM_DEVICE); - pSCB->SCB_SensePtr = cpu_to_le32((u32)dma_addr); - pSCB->SCB_SenseLen = cpu_to_le32(SENSE_SIZE); - SCpnt->SCp.ptr = (char *)(unsigned long)dma_addr; - - pSCB->SCB_CDBLen = SCpnt->cmd_len; - pSCB->SCB_HaStat = 0; - pSCB->SCB_TaStat = 0; - memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, SCpnt->cmd_len); + cblk->flags |= SCF_SENSE; /* Turn on auto request sense */ - if (SCpnt->device->tagged_supported) { /* Tag Support */ - pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ + /* Map the sense buffer into bus memory */ + dma_addr = dma_map_single(&host->pci_dev->dev, cmnd->sense_buffer, + SENSE_SIZE, DMA_FROM_DEVICE); + cblk->senseptr = cpu_to_le32((u32)dma_addr); + cblk->senselen = cpu_to_le32(SENSE_SIZE); + cmnd->SCp.ptr = (char *)(unsigned long)dma_addr; + cblk->cdblen = cmnd->cmd_len; + + /* Clear the returned status */ + cblk->hastat = 0; + cblk->tastat = 0; + /* Command the command */ + memcpy(&cblk->cdb[0], &cmnd->cmnd, cmnd->cmd_len); + + /* Set up tags */ + if (cmnd->device->tagged_supported) { /* Tag Support */ + cblk->tagmsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ } else { - pSCB->SCB_TagMsg = 0; /* No tag support */ + cblk->tagmsg = 0; /* No tag support */ } + /* todo handle map_sg error */ - if (SCpnt->use_sg) { - dma_addr = dma_map_single(&pHCB->pci_dev->dev, &pSCB->SCB_SGList[0], - sizeof(struct SG_Struc) * TOTAL_SG_ENTRY, + nseg = scsi_dma_map(cmnd); + BUG_ON(nseg < 0); + if (nseg) { + dma_addr = dma_map_single(&host->pci_dev->dev, &cblk->sglist[0], + sizeof(struct sg_entry) * TOTAL_SG_ENTRY, DMA_BIDIRECTIONAL); - pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr); - SCpnt->SCp.dma_handle = dma_addr; + cblk->bufptr = cpu_to_le32((u32)dma_addr); + cmnd->SCp.dma_handle = dma_addr; - pSrbSG = (struct scatterlist *) SCpnt->request_buffer; - pSCB->SCB_SGLen = dma_map_sg(&pHCB->pci_dev->dev, pSrbSG, - SCpnt->use_sg, SCpnt->sc_data_direction); - - pSCB->SCB_Flags |= SCF_SG; /* Turn on SG list flag */ - for (i = 0, TotalLen = 0, pSG = &pSCB->SCB_SGList[0]; /* 1.01g */ - i < pSCB->SCB_SGLen; i++, pSG++, pSrbSG++) { - pSG->SG_Ptr = cpu_to_le32((u32)sg_dma_address(pSrbSG)); - TotalLen += pSG->SG_Len = cpu_to_le32((u32)sg_dma_len(pSrbSG)); - } - - pSCB->SCB_BufLen = (SCpnt->request_bufflen > TotalLen) ? - TotalLen : SCpnt->request_bufflen; - } else if (SCpnt->request_bufflen) { /* Non SG */ - dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->request_buffer, - SCpnt->request_bufflen, - SCpnt->sc_data_direction); - SCpnt->SCp.dma_handle = dma_addr; - pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr); - pSCB->SCB_BufLen = cpu_to_le32((u32)SCpnt->request_bufflen); - pSCB->SCB_SGLen = 0; - } else { - pSCB->SCB_BufLen = 0; - pSCB->SCB_SGLen = 0; + + cblk->flags |= SCF_SG; /* Turn on SG list flag */ + total_len = 0; + sg = &cblk->sglist[0]; + scsi_for_each_sg(cmnd, sglist, cblk->sglen, i) { + sg->data = cpu_to_le32((u32)sg_dma_address(sglist)); + total_len += sg->len = cpu_to_le32((u32)sg_dma_len(sglist)); + } + + cblk->buflen = (scsi_bufflen(cmnd) > total_len) ? + total_len : scsi_bufflen(cmnd); + } else { /* No data transfer required */ + cblk->buflen = 0; + cblk->sglen = 0; } } +/** + * i91u_queuecommand - Queue a new command if possible + * @cmd: SCSI command block from the mid layer + * @done: Completion handler + * + * Attempts to queue a new command with the host adapter. Will return + * zero if successful or indicate a host busy condition if not (which + * will cause the mid layer to call us again later with the command) + */ + static int i91u_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { - HCS *pHCB = (HCS *) cmd->device->host->base; - register SCB *pSCB; + struct initio_host *host = (struct initio_host *) cmd->device->host->hostdata; + struct scsi_ctrl_blk *cmnd; cmd->scsi_done = done; - pSCB = tul_alloc_scb(pHCB); - if (!pSCB) + cmnd = initio_alloc_scb(host); + if (!cmnd) return SCSI_MLQUEUE_HOST_BUSY; - i91uBuildSCB(pHCB, pSCB, cmd); - tul_exec_scb(pHCB, pSCB); + initio_build_scb(host, cmnd, cmd); + initio_exec_scb(host, cmnd); return 0; } -#if 0 /* no new EH yet */ -/* - * Abort a queued command - * (commands that are on the bus can't be aborted easily) - */ -static int i91u_abort(struct scsi_cmnd * SCpnt) -{ - HCS *pHCB; - - pHCB = (HCS *) SCpnt->device->host->base; - return tul_abort_srb(pHCB, SCpnt); -} - -/* - * Reset registers, reset a hanging bus and - * kill active and disconnected commands for target w/o soft reset +/** + * i91u_bus_reset - reset the SCSI bus + * @cmnd: Command block we want to trigger the reset for + * + * Initiate a SCSI bus reset sequence */ -static int i91u_reset(struct scsi_cmnd * SCpnt, unsigned int reset_flags) -{ /* I need Host Control Block Information */ - HCS *pHCB; - - pHCB = (HCS *) SCpnt->device->host->base; - - if (reset_flags & (SCSI_RESET_SUGGEST_BUS_RESET | SCSI_RESET_SUGGEST_HOST_RESET)) - return tul_reset_scsi_bus(pHCB); - else - return tul_device_reset(pHCB, SCpnt, SCpnt->device->id, reset_flags); -} -#endif -static int i91u_bus_reset(struct scsi_cmnd * SCpnt) +static int i91u_bus_reset(struct scsi_cmnd * cmnd) { - HCS *pHCB; + struct initio_host *host; - pHCB = (HCS *) SCpnt->device->host->base; + host = (struct initio_host *) cmnd->device->host->hostdata; - spin_lock_irq(SCpnt->device->host->host_lock); - tul_reset_scsi(pHCB, 0); - spin_unlock_irq(SCpnt->device->host->host_lock); + spin_lock_irq(cmnd->device->host->host_lock); + initio_reset_scsi(host, 0); + spin_unlock_irq(cmnd->device->host->host_lock); return SUCCESS; } -/* - * Return the "logical geometry" +/** + * i91u_biospararm - return the "logical geometry + * @sdev: SCSI device + * @dev; Matching block device + * @capacity: Sector size of drive + * @info_array: Return space for BIOS geometry + * + * Map the device geometry in a manner compatible with the host + * controller BIOS behaviour. + * + * FIXME: limited to 2^32 sector devices. */ + static int i91u_biosparam(struct scsi_device *sdev, struct block_device *dev, sector_t capacity, int *info_array) { - HCS *pHcb; /* Point to Host adapter control block */ - TCS *pTcb; + struct initio_host *host; /* Point to Host adapter control block */ + struct target_control *tc; - pHcb = (HCS *) sdev->host->base; - pTcb = &pHcb->HCS_Tcs[sdev->id]; + host = (struct initio_host *) sdev->host->hostdata; + tc = &host->targets[sdev->id]; - if (pTcb->TCS_DrvHead) { - info_array[0] = pTcb->TCS_DrvHead; - info_array[1] = pTcb->TCS_DrvSector; - info_array[2] = (unsigned long)capacity / pTcb->TCS_DrvHead / pTcb->TCS_DrvSector; + if (tc->heads) { + info_array[0] = tc->heads; + info_array[1] = tc->sectors; + info_array[2] = (unsigned long)capacity / tc->heads / tc->sectors; } else { - if (pTcb->TCS_DrvFlags & TCF_DRV_255_63) { + if (tc->drv_flags & TCF_DRV_255_63) { info_array[0] = 255; info_array[1] = 63; info_array[2] = (unsigned long)capacity / 255 / 63; @@ -3047,7 +2722,16 @@ return 0; } -static void i91u_unmap_cmnd(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd) +/** + * i91u_unmap_scb - Unmap a command + * @pci_dev: PCI device the command is for + * @cmnd: The command itself + * + * Unmap any PCI mapping/IOMMU resources allocated when the command + * was mapped originally as part of initio_build_scb + */ + +static void i91u_unmap_scb(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd) { /* auto sense buffer */ if (cmnd->SCp.ptr) { @@ -3058,65 +2742,63 @@ } /* request buffer */ - if (cmnd->use_sg) { + if (scsi_sg_count(cmnd)) { dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle, - sizeof(struct SG_Struc) * TOTAL_SG_ENTRY, + sizeof(struct sg_entry) * TOTAL_SG_ENTRY, DMA_BIDIRECTIONAL); - dma_unmap_sg(&pci_dev->dev, cmnd->request_buffer, - cmnd->use_sg, - cmnd->sc_data_direction); - } else if (cmnd->request_bufflen) { - dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle, - cmnd->request_bufflen, - cmnd->sc_data_direction); + scsi_dma_unmap(cmnd); } } -/***************************************************************************** - Function name : i91uSCBPost - Description : This is callback routine be called when tulip finish one - SCSI command. - Input : pHCB - Pointer to host adapter control block. - pSCB - Pointer to SCSI control block. - Output : None. - Return : None. -*****************************************************************************/ -static void i91uSCBPost(BYTE * pHcb, BYTE * pScb) -{ - struct scsi_cmnd *pSRB; /* Pointer to SCSI request block */ - HCS *pHCB; - SCB *pSCB; - - pHCB = (HCS *) pHcb; - pSCB = (SCB *) pScb; - if ((pSRB = pSCB->SCB_Srb) == 0) { - printk("i91uSCBPost: SRB pointer is empty\n"); +/** + * i91uSCBPost - SCSI callback + * @host: Pointer to host adapter control block. + * @cmnd: Pointer to SCSI control block. + * + * This is callback routine be called when tulip finish one + * SCSI command. + */ + +static void i91uSCBPost(u8 * host_mem, u8 * cblk_mem) +{ + struct scsi_cmnd *cmnd; /* Pointer to SCSI request block */ + struct initio_host *host; + struct scsi_ctrl_blk *cblk; - tul_release_scb(pHCB, pSCB); /* Release SCB for current channel */ + host = (struct initio_host *) host_mem; + cblk = (struct scsi_ctrl_blk *) cblk_mem; + if ((cmnd = cblk->srb) == NULL) { + printk(KERN_ERR "i91uSCBPost: SRB pointer is empty\n"); + WARN_ON(1); + initio_release_scb(host, cblk); /* Release SCB for current channel */ return; } - switch (pSCB->SCB_HaStat) { + + /* + * Remap the firmware error status into a mid layer one + */ + switch (cblk->hastat) { case 0x0: case 0xa: /* Linked command complete without error and linked normally */ case 0xb: /* Linked command complete without error interrupt generated */ - pSCB->SCB_HaStat = 0; + cblk->hastat = 0; break; case 0x11: /* Selection time out-The initiator selection or target reselection was not complete within the SCSI Time out period */ - pSCB->SCB_HaStat = DID_TIME_OUT; + cblk->hastat = DID_TIME_OUT; break; case 0x14: /* Target bus phase sequence failure-An invalid bus phase or bus phase sequence was requested by the target. The host adapter will generate a SCSI Reset Condition, notifying the host with a SCRD interrupt */ - pSCB->SCB_HaStat = DID_RESET; + cblk->hastat = DID_RESET; break; case 0x1a: /* SCB Aborted. 07/21/98 */ - pSCB->SCB_HaStat = DID_ABORT; + cblk->hastat = DID_ABORT; break; case 0x12: /* Data overrun/underrun-The target attempted to transfer more data @@ -3126,49 +2808,196 @@ case 0x16: /* Invalid SCB Operation Code. */ default: - printk("ini9100u: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat); - pSCB->SCB_HaStat = DID_ERROR; /* Couldn't find any better */ + printk("ini9100u: %x %x\n", cblk->hastat, cblk->tastat); + cblk->hastat = DID_ERROR; /* Couldn't find any better */ break; } - pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16); - - if (pSRB == NULL) { - printk("pSRB is NULL\n"); - } - - i91u_unmap_cmnd(pHCB->pci_dev, pSRB); - pSRB->scsi_done(pSRB); /* Notify system DONE */ - - tul_release_scb(pHCB, pSCB); /* Release SCB for current channel */ + cmnd->result = cblk->tastat | (cblk->hastat << 16); + WARN_ON(cmnd == NULL); + i91u_unmap_scb(host->pci_dev, cmnd); + cmnd->scsi_done(cmnd); /* Notify system DONE */ + initio_release_scb(host, cblk); /* Release SCB for current channel */ } -/* - * Release ressources - */ -static int i91u_release(struct Scsi_Host *hreg) -{ - free_irq(hreg->irq, hreg); - release_region(hreg->io_port, 256); - return 0; -} -MODULE_LICENSE("Dual BSD/GPL"); - -static struct scsi_host_template driver_template = { +static struct scsi_host_template initio_template = { .proc_name = "INI9100U", - .name = i91u_REVID, - .detect = i91u_detect, - .release = i91u_release, + .name = "Initio INI-9X00U/UW SCSI device driver", .queuecommand = i91u_queuecommand, -// .abort = i91u_abort, -// .reset = i91u_reset, .eh_bus_reset_handler = i91u_bus_reset, .bios_param = i91u_biosparam, - .can_queue = 1, + .can_queue = MAX_TARGETS * i91u_MAXQUEUE, .this_id = 1, .sg_tablesize = SG_ALL, .cmd_per_lun = 1, .use_clustering = ENABLE_CLUSTERING, }; -#include "scsi_module.c" +static int initio_probe_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct Scsi_Host *shost; + struct initio_host *host; + u32 reg; + u16 bios_seg; + struct scsi_ctrl_blk *scb, *tmp, *prev = NULL /* silence gcc */; + int num_scb, i, error; + + error = pci_enable_device(pdev); + if (error) + return error; + + pci_read_config_dword(pdev, 0x44, (u32 *) & reg); + bios_seg = (u16) (reg & 0xFF); + if (((reg & 0xFF00) >> 8) == 0xFF) + reg = 0; + bios_seg = (bios_seg << 8) + ((u16) ((reg & 0xFF00) >> 8)); + + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { + printk(KERN_WARNING "i91u: Could not set 32 bit DMA mask\n"); + error = -ENODEV; + goto out_disable_device; + } + shost = scsi_host_alloc(&initio_template, sizeof(struct initio_host)); + if (!shost) { + printk(KERN_WARNING "initio: Could not allocate host structure.\n"); + error = -ENOMEM; + goto out_disable_device; + } + host = (struct initio_host *)shost->hostdata; + memset(host, 0, sizeof(struct initio_host)); + + if (!request_region(host->addr, 256, "i91u")) { + printk(KERN_WARNING "initio: I/O port range 0x%x is busy.\n", host->addr); + error = -ENODEV; + goto out_host_put; + } + + if (initio_tag_enable) /* 1.01i */ + num_scb = MAX_TARGETS * i91u_MAXQUEUE; + else + num_scb = MAX_TARGETS + 3; /* 1-tape, 1-CD_ROM, 1- extra */ + + for (; num_scb >= MAX_TARGETS + 3; num_scb--) { + i = num_scb * sizeof(struct scsi_ctrl_blk); + if ((scb = kzalloc(i, GFP_DMA)) != NULL) + break; + } + + if (!scb) { + printk(KERN_WARNING "initio: Cannot allocate SCB array.\n"); + error = -ENOMEM; + goto out_release_region; + } + + host->num_scbs = num_scb; + host->scb = scb; + host->next_pending = scb; + host->next_avail = scb; + for (i = 0, tmp = scb; i < num_scb; i++, tmp++) { + tmp->tagid = i; + if (i != 0) + prev->next = tmp; + prev = tmp; + } + prev->next = NULL; + host->scb_end = tmp; + host->first_avail = scb; + host->last_avail = prev; + + initio_init(host, phys_to_virt(bios_seg << 4)); + + host->jsstatus0 = 0; + + shost->io_port = host->addr; + shost->n_io_port = 0xff; + shost->can_queue = num_scb; /* 03/05/98 */ + shost->unique_id = host->addr; + shost->max_id = host->max_tar; + shost->max_lun = 32; /* 10/21/97 */ + shost->irq = pdev->irq; + shost->this_id = host->scsi_id; /* Assign HCS index */ + shost->base = host->addr; + shost->sg_tablesize = TOTAL_SG_ENTRY; + + error = request_irq(pdev->irq, i91u_intr, IRQF_DISABLED|IRQF_SHARED, "i91u", shost); + if (error < 0) { + printk(KERN_WARNING "initio: Unable to request IRQ %d\n", pdev->irq); + goto out_free_scbs; + } + + pci_set_drvdata(pdev, shost); + host->pci_dev = pdev; + + error = scsi_add_host(shost, &pdev->dev); + if (error) + goto out_free_irq; + scsi_scan_host(shost); + return 0; +out_free_irq: + free_irq(pdev->irq, shost); +out_free_scbs: + kfree(host->scb); +out_release_region: + release_region(host->addr, 256); +out_host_put: + scsi_host_put(shost); +out_disable_device: + pci_disable_device(pdev); + return error; +} + +/** + * initio_remove_one - control shutdown + * @pdev: PCI device being released + * + * Release the resources assigned to this adapter after it has + * finished being used. + */ + +static void initio_remove_one(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct initio_host *s = (struct initio_host *)host->hostdata; + scsi_remove_host(host); + free_irq(pdev->irq, host); + release_region(s->addr, 256); + scsi_host_put(host); + pci_disable_device(pdev); +} + +MODULE_LICENSE("GPL"); + +static struct pci_device_id initio_pci_tbl[] = { + {PCI_VENDOR_ID_INIT, 0x9500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_INIT, 0x9400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_INIT, 0x9401, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_INIT, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_DOMEX, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0,} +}; +MODULE_DEVICE_TABLE(pci, initio_pci_tbl); + +static struct pci_driver initio_pci_driver = { + .name = "initio", + .id_table = initio_pci_tbl, + .probe = initio_probe_one, + .remove = __devexit_p(initio_remove_one), +}; + +static int __init initio_init_driver(void) +{ + return pci_register_driver(&initio_pci_driver); +} + +static void __exit initio_exit_driver(void) +{ + pci_unregister_driver(&initio_pci_driver); +} + +MODULE_DESCRIPTION("Initio INI-9X00U/UW SCSI device driver"); +MODULE_AUTHOR("Initio Corporation"); +MODULE_LICENSE("GPL"); + +module_init(initio_init_driver); +module_exit(initio_exit_driver); diff -Nurb linux-2.6.22-570/drivers/scsi/initio.h linux-2.6.22-591/drivers/scsi/initio.h --- linux-2.6.22-570/drivers/scsi/initio.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/initio.h 2007-12-21 15:36:12.000000000 -0500 @@ -4,6 +4,8 @@ * Copyright (c) 1994-1998 Initio Corporation * All rights reserved. * + * Cleanups (c) Copyright 2007 Red Hat + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) @@ -18,27 +20,6 @@ * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * - * -------------------------------------------------------------------------- - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Where this Software is combined with software released under the terms of - * the GNU General Public License ("GPL") and the terms of the GPL would require the - * combined work to also be released under the terms of the GPL, the terms - * and conditions of this License will apply in addition to those of the - * GPL with the exception of any terms or conditions of this License that - * conflict with, or are expressly prohibited by, the GPL. - * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -56,17 +37,6 @@ #include -#define ULONG unsigned long -#define USHORT unsigned short -#define UCHAR unsigned char -#define BYTE unsigned char -#define WORD unsigned short -#define DWORD unsigned long -#define UBYTE unsigned char -#define UWORD unsigned short -#define UDWORD unsigned long -#define U32 u32 - #define TOTAL_SG_ENTRY 32 #define MAX_SUPPORTED_ADAPTERS 8 #define MAX_OFFSET 15 @@ -368,55 +338,55 @@ /************************************************************************/ /* Scatter-Gather Element Structure */ /************************************************************************/ -typedef struct SG_Struc { - U32 SG_Ptr; /* Data Pointer */ - U32 SG_Len; /* Data Length */ -} SG; +struct sg_entry { + u32 data; /* Data Pointer */ + u32 len; /* Data Length */ +}; /*********************************************************************** SCSI Control Block ************************************************************************/ -typedef struct Scsi_Ctrl_Blk { - struct Scsi_Ctrl_Blk *SCB_NxtScb; - UBYTE SCB_Status; /*4 */ - UBYTE SCB_NxtStat; /*5 */ - UBYTE SCB_Mode; /*6 */ - UBYTE SCB_Msgin; /*7 SCB_Res0 */ - UWORD SCB_SGIdx; /*8 */ - UWORD SCB_SGMax; /*A */ +struct scsi_ctrl_blk { + struct scsi_ctrl_blk *next; + u8 status; /*4 */ + u8 next_state; /*5 */ + u8 mode; /*6 */ + u8 msgin; /*7 SCB_Res0 */ + u16 sgidx; /*8 */ + u16 sgmax; /*A */ #ifdef ALPHA - U32 SCB_Reserved[2]; /*C */ + u32 reserved[2]; /*C */ #else - U32 SCB_Reserved[3]; /*C */ + u32 reserved[3]; /*C */ #endif - U32 SCB_XferLen; /*18 Current xfer len */ - U32 SCB_TotXLen; /*1C Total xfer len */ - U32 SCB_PAddr; /*20 SCB phy. Addr. */ - - UBYTE SCB_Opcode; /*24 SCB command code */ - UBYTE SCB_Flags; /*25 SCB Flags */ - UBYTE SCB_Target; /*26 Target Id */ - UBYTE SCB_Lun; /*27 Lun */ - U32 SCB_BufPtr; /*28 Data Buffer Pointer */ - U32 SCB_BufLen; /*2C Data Allocation Length */ - UBYTE SCB_SGLen; /*30 SG list # */ - UBYTE SCB_SenseLen; /*31 Sense Allocation Length */ - UBYTE SCB_HaStat; /*32 */ - UBYTE SCB_TaStat; /*33 */ - UBYTE SCB_CDBLen; /*34 CDB Length */ - UBYTE SCB_Ident; /*35 Identify */ - UBYTE SCB_TagMsg; /*36 Tag Message */ - UBYTE SCB_TagId; /*37 Queue Tag */ - UBYTE SCB_CDB[12]; /*38 */ - U32 SCB_SGPAddr; /*44 SG List/Sense Buf phy. Addr. */ - U32 SCB_SensePtr; /*48 Sense data pointer */ - void (*SCB_Post) (BYTE *, BYTE *); /*4C POST routine */ - struct scsi_cmnd *SCB_Srb; /*50 SRB Pointer */ - SG SCB_SGList[TOTAL_SG_ENTRY]; /*54 Start of SG list */ -} SCB; + u32 xferlen; /*18 Current xfer len */ + u32 totxlen; /*1C Total xfer len */ + u32 paddr; /*20 SCB phy. Addr. */ + + u8 opcode; /*24 SCB command code */ + u8 flags; /*25 SCB Flags */ + u8 target; /*26 Target Id */ + u8 lun; /*27 Lun */ + u32 bufptr; /*28 Data Buffer Pointer */ + u32 buflen; /*2C Data Allocation Length */ + u8 sglen; /*30 SG list # */ + u8 senselen; /*31 Sense Allocation Length */ + u8 hastat; /*32 */ + u8 tastat; /*33 */ + u8 cdblen; /*34 CDB Length */ + u8 ident; /*35 Identify */ + u8 tagmsg; /*36 Tag Message */ + u8 tagid; /*37 Queue Tag */ + u8 cdb[12]; /*38 */ + u32 sgpaddr; /*44 SG List/Sense Buf phy. Addr. */ + u32 senseptr; /*48 Sense data pointer */ + void (*post) (u8 *, u8 *); /*4C POST routine */ + struct scsi_cmnd *srb; /*50 SRB Pointer */ + struct sg_entry sglist[TOTAL_SG_ENTRY]; /*54 Start of SG list */ +}; -/* Bit Definition for SCB_Status */ +/* Bit Definition for status */ #define SCB_RENT 0x01 #define SCB_PEND 0x02 #define SCB_CONTIG 0x04 /* Contigent Allegiance */ @@ -425,17 +395,17 @@ #define SCB_DONE 0x20 -/* Opcodes of SCB_Opcode */ +/* Opcodes for opcode */ #define ExecSCSI 0x1 #define BusDevRst 0x2 #define AbortCmd 0x3 -/* Bit Definition for SCB_Mode */ +/* Bit Definition for mode */ #define SCM_RSENS 0x01 /* request sense mode */ -/* Bit Definition for SCB_Flags */ +/* Bit Definition for flags */ #define SCF_DONE 0x01 #define SCF_POST 0x02 #define SCF_SENSE 0x04 @@ -492,15 +462,14 @@ Target Device Control Structure **********************************************************************/ -typedef struct Tar_Ctrl_Struc { - UWORD TCS_Flags; /* 0 */ - UBYTE TCS_JS_Period; /* 2 */ - UBYTE TCS_SConfig0; /* 3 */ - - UWORD TCS_DrvFlags; /* 4 */ - UBYTE TCS_DrvHead; /* 6 */ - UBYTE TCS_DrvSector; /* 7 */ -} TCS; +struct target_control { + u16 flags; + u8 js_period; + u8 sconfig0; + u16 drv_flags; + u8 heads; + u8 sectors; +}; /*********************************************************************** Target Device Control Structure @@ -523,62 +492,53 @@ #define TCF_DRV_EN_TAG 0x0800 #define TCF_DRV_255_63 0x0400 -typedef struct I91u_Adpt_Struc { - UWORD ADPT_BIOS; /* 0 */ - UWORD ADPT_BASE; /* 1 */ - UBYTE ADPT_Bus; /* 2 */ - UBYTE ADPT_Device; /* 3 */ - UBYTE ADPT_INTR; /* 4 */ -} INI_ADPT_STRUCT; - - /*********************************************************************** Host Adapter Control Structure ************************************************************************/ -typedef struct Ha_Ctrl_Struc { - UWORD HCS_Base; /* 00 */ - UWORD HCS_BIOS; /* 02 */ - UBYTE HCS_Intr; /* 04 */ - UBYTE HCS_SCSI_ID; /* 05 */ - UBYTE HCS_MaxTar; /* 06 */ - UBYTE HCS_NumScbs; /* 07 */ - - UBYTE HCS_Flags; /* 08 */ - UBYTE HCS_Index; /* 09 */ - UBYTE HCS_HaId; /* 0A */ - UBYTE HCS_Config; /* 0B */ - UWORD HCS_IdMask; /* 0C */ - UBYTE HCS_Semaph; /* 0E */ - UBYTE HCS_Phase; /* 0F */ - UBYTE HCS_JSStatus0; /* 10 */ - UBYTE HCS_JSInt; /* 11 */ - UBYTE HCS_JSStatus1; /* 12 */ - UBYTE HCS_SConf1; /* 13 */ - - UBYTE HCS_Msg[8]; /* 14 */ - SCB *HCS_NxtAvail; /* 1C */ - SCB *HCS_Scb; /* 20 */ - SCB *HCS_ScbEnd; /* 24 */ - SCB *HCS_NxtPend; /* 28 */ - SCB *HCS_NxtContig; /* 2C */ - SCB *HCS_ActScb; /* 30 */ - TCS *HCS_ActTcs; /* 34 */ - - SCB *HCS_FirstAvail; /* 38 */ - SCB *HCS_LastAvail; /* 3C */ - SCB *HCS_FirstPend; /* 40 */ - SCB *HCS_LastPend; /* 44 */ - SCB *HCS_FirstBusy; /* 48 */ - SCB *HCS_LastBusy; /* 4C */ - SCB *HCS_FirstDone; /* 50 */ - SCB *HCS_LastDone; /* 54 */ - UBYTE HCS_MaxTags[16]; /* 58 */ - UBYTE HCS_ActTags[16]; /* 68 */ - TCS HCS_Tcs[MAX_TARGETS]; /* 78 */ - spinlock_t HCS_AvailLock; - spinlock_t HCS_SemaphLock; +struct initio_host { + u16 addr; /* 00 */ + u16 bios_addr; /* 02 */ + u8 irq; /* 04 */ + u8 scsi_id; /* 05 */ + u8 max_tar; /* 06 */ + u8 num_scbs; /* 07 */ + + u8 flags; /* 08 */ + u8 index; /* 09 */ + u8 ha_id; /* 0A */ + u8 config; /* 0B */ + u16 idmask; /* 0C */ + u8 semaph; /* 0E */ + u8 phase; /* 0F */ + u8 jsstatus0; /* 10 */ + u8 jsint; /* 11 */ + u8 jsstatus1; /* 12 */ + u8 sconf1; /* 13 */ + + u8 msg[8]; /* 14 */ + struct scsi_ctrl_blk *next_avail; /* 1C */ + struct scsi_ctrl_blk *scb; /* 20 */ + struct scsi_ctrl_blk *scb_end; /* 24 */ /*UNUSED*/ + struct scsi_ctrl_blk *next_pending; /* 28 */ + struct scsi_ctrl_blk *next_contig; /* 2C */ /*UNUSED*/ + struct scsi_ctrl_blk *active; /* 30 */ + struct target_control *active_tc; /* 34 */ + + struct scsi_ctrl_blk *first_avail; /* 38 */ + struct scsi_ctrl_blk *last_avail; /* 3C */ + struct scsi_ctrl_blk *first_pending; /* 40 */ + struct scsi_ctrl_blk *last_pending; /* 44 */ + struct scsi_ctrl_blk *first_busy; /* 48 */ + struct scsi_ctrl_blk *last_busy; /* 4C */ + struct scsi_ctrl_blk *first_done; /* 50 */ + struct scsi_ctrl_blk *last_done; /* 54 */ + u8 max_tags[16]; /* 58 */ + u8 act_tags[16]; /* 68 */ + struct target_control targets[MAX_TARGETS]; /* 78 */ + spinlock_t avail_lock; + spinlock_t semaph_lock; struct pci_dev *pci_dev; -} HCS; +}; /* Bit Definition for HCB_Config */ #define HCC_SCSI_RESET 0x01 @@ -599,47 +559,47 @@ *******************************************************************/ typedef struct _NVRAM_SCSI { /* SCSI channel configuration */ - UCHAR NVM_ChSCSIID; /* 0Ch -> Channel SCSI ID */ - UCHAR NVM_ChConfig1; /* 0Dh -> Channel config 1 */ - UCHAR NVM_ChConfig2; /* 0Eh -> Channel config 2 */ - UCHAR NVM_NumOfTarg; /* 0Fh -> Number of SCSI target */ + u8 NVM_ChSCSIID; /* 0Ch -> Channel SCSI ID */ + u8 NVM_ChConfig1; /* 0Dh -> Channel config 1 */ + u8 NVM_ChConfig2; /* 0Eh -> Channel config 2 */ + u8 NVM_NumOfTarg; /* 0Fh -> Number of SCSI target */ /* SCSI target configuration */ - UCHAR NVM_Targ0Config; /* 10h -> Target 0 configuration */ - UCHAR NVM_Targ1Config; /* 11h -> Target 1 configuration */ - UCHAR NVM_Targ2Config; /* 12h -> Target 2 configuration */ - UCHAR NVM_Targ3Config; /* 13h -> Target 3 configuration */ - UCHAR NVM_Targ4Config; /* 14h -> Target 4 configuration */ - UCHAR NVM_Targ5Config; /* 15h -> Target 5 configuration */ - UCHAR NVM_Targ6Config; /* 16h -> Target 6 configuration */ - UCHAR NVM_Targ7Config; /* 17h -> Target 7 configuration */ - UCHAR NVM_Targ8Config; /* 18h -> Target 8 configuration */ - UCHAR NVM_Targ9Config; /* 19h -> Target 9 configuration */ - UCHAR NVM_TargAConfig; /* 1Ah -> Target A configuration */ - UCHAR NVM_TargBConfig; /* 1Bh -> Target B configuration */ - UCHAR NVM_TargCConfig; /* 1Ch -> Target C configuration */ - UCHAR NVM_TargDConfig; /* 1Dh -> Target D configuration */ - UCHAR NVM_TargEConfig; /* 1Eh -> Target E configuration */ - UCHAR NVM_TargFConfig; /* 1Fh -> Target F configuration */ + u8 NVM_Targ0Config; /* 10h -> Target 0 configuration */ + u8 NVM_Targ1Config; /* 11h -> Target 1 configuration */ + u8 NVM_Targ2Config; /* 12h -> Target 2 configuration */ + u8 NVM_Targ3Config; /* 13h -> Target 3 configuration */ + u8 NVM_Targ4Config; /* 14h -> Target 4 configuration */ + u8 NVM_Targ5Config; /* 15h -> Target 5 configuration */ + u8 NVM_Targ6Config; /* 16h -> Target 6 configuration */ + u8 NVM_Targ7Config; /* 17h -> Target 7 configuration */ + u8 NVM_Targ8Config; /* 18h -> Target 8 configuration */ + u8 NVM_Targ9Config; /* 19h -> Target 9 configuration */ + u8 NVM_TargAConfig; /* 1Ah -> Target A configuration */ + u8 NVM_TargBConfig; /* 1Bh -> Target B configuration */ + u8 NVM_TargCConfig; /* 1Ch -> Target C configuration */ + u8 NVM_TargDConfig; /* 1Dh -> Target D configuration */ + u8 NVM_TargEConfig; /* 1Eh -> Target E configuration */ + u8 NVM_TargFConfig; /* 1Fh -> Target F configuration */ } NVRAM_SCSI; typedef struct _NVRAM { /*----------header ---------------*/ - USHORT NVM_Signature; /* 0,1: Signature */ - UCHAR NVM_Size; /* 2: Size of data structure */ - UCHAR NVM_Revision; /* 3: Revision of data structure */ + u16 NVM_Signature; /* 0,1: Signature */ + u8 NVM_Size; /* 2: Size of data structure */ + u8 NVM_Revision; /* 3: Revision of data structure */ /* ----Host Adapter Structure ---- */ - UCHAR NVM_ModelByte0; /* 4: Model number (byte 0) */ - UCHAR NVM_ModelByte1; /* 5: Model number (byte 1) */ - UCHAR NVM_ModelInfo; /* 6: Model information */ - UCHAR NVM_NumOfCh; /* 7: Number of SCSI channel */ - UCHAR NVM_BIOSConfig1; /* 8: BIOS configuration 1 */ - UCHAR NVM_BIOSConfig2; /* 9: BIOS configuration 2 */ - UCHAR NVM_HAConfig1; /* A: Hoat adapter configuration 1 */ - UCHAR NVM_HAConfig2; /* B: Hoat adapter configuration 2 */ + u8 NVM_ModelByte0; /* 4: Model number (byte 0) */ + u8 NVM_ModelByte1; /* 5: Model number (byte 1) */ + u8 NVM_ModelInfo; /* 6: Model information */ + u8 NVM_NumOfCh; /* 7: Number of SCSI channel */ + u8 NVM_BIOSConfig1; /* 8: BIOS configuration 1 */ + u8 NVM_BIOSConfig2; /* 9: BIOS configuration 2 */ + u8 NVM_HAConfig1; /* A: Hoat adapter configuration 1 */ + u8 NVM_HAConfig2; /* B: Hoat adapter configuration 2 */ NVRAM_SCSI NVM_SCSIInfo[2]; - UCHAR NVM_reserved[10]; + u8 NVM_reserved[10]; /* ---------- CheckSum ---------- */ - USHORT NVM_CheckSum; /* 0x3E, 0x3F: Checksum of NVRam */ + u16 NVM_CheckSum; /* 0x3E, 0x3F: Checksum of NVRam */ } NVRAM, *PNVRAM; /* Bios Configuration for nvram->BIOSConfig1 */ @@ -681,19 +641,6 @@ #define DISC_ALLOW 0xC0 /* Disconnect is allowed */ #define SCSICMD_RequestSense 0x03 -typedef struct _HCSinfo { - ULONG base; - UCHAR vec; - UCHAR bios; /* High byte of BIOS address */ - USHORT BaseAndBios; /* high byte: pHcsInfo->bios,low byte:pHcsInfo->base */ -} HCSINFO; - -#define TUL_RD(x,y) (UCHAR)(inb( (int)((ULONG)(x+y)) )) -#define TUL_RDLONG(x,y) (ULONG)(inl((int)((ULONG)(x+y)) )) -#define TUL_WR( adr,data) outb( (UCHAR)(data), (int)(adr)) -#define TUL_WRSHORT(adr,data) outw( (UWORD)(data), (int)(adr)) -#define TUL_WRLONG( adr,data) outl( (ULONG)(data), (int)(adr)) - #define SCSI_ABORT_SNOOZE 0 #define SCSI_ABORT_SUCCESS 1 #define SCSI_ABORT_PENDING 2 diff -Nurb linux-2.6.22-570/drivers/scsi/ipr.c linux-2.6.22-591/drivers/scsi/ipr.c --- linux-2.6.22-570/drivers/scsi/ipr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ipr.c 2007-12-21 15:36:12.000000000 -0500 @@ -540,32 +540,6 @@ } /** - * ipr_unmap_sglist - Unmap scatterlist if mapped - * @ioa_cfg: ioa config struct - * @ipr_cmd: ipr command struct - * - * Return value: - * nothing - **/ -static void ipr_unmap_sglist(struct ipr_ioa_cfg *ioa_cfg, - struct ipr_cmnd *ipr_cmd) -{ - struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; - - if (ipr_cmd->dma_use_sg) { - if (scsi_cmd->use_sg > 0) { - pci_unmap_sg(ioa_cfg->pdev, scsi_cmd->request_buffer, - scsi_cmd->use_sg, - scsi_cmd->sc_data_direction); - } else { - pci_unmap_single(ioa_cfg->pdev, ipr_cmd->dma_handle, - scsi_cmd->request_bufflen, - scsi_cmd->sc_data_direction); - } - } -} - -/** * ipr_mask_and_clear_interrupts - Mask all and clear specified interrupts * @ioa_cfg: ioa config struct * @clr_ints: interrupts to clear @@ -677,7 +651,7 @@ scsi_cmd->result |= (DID_ERROR << 16); - ipr_unmap_sglist(ioa_cfg, ipr_cmd); + scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); } @@ -2465,6 +2439,7 @@ /** * ipr_read_trace - Dump the adapter trace * @kobj: kobject struct + * @bin_attr: bin_attribute struct * @buf: buffer * @off: offset * @count: buffer size @@ -2472,8 +2447,9 @@ * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_read_trace(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t ipr_read_trace(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *shost = class_to_shost(cdev); @@ -3166,6 +3142,7 @@ /** * ipr_read_dump - Dump the adapter * @kobj: kobject struct + * @bin_attr: bin_attribute struct * @buf: buffer * @off: offset * @count: buffer size @@ -3173,8 +3150,9 @@ * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_read_dump(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t ipr_read_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *shost = class_to_shost(cdev); @@ -3327,6 +3305,7 @@ /** * ipr_write_dump - Setup dump state of adapter * @kobj: kobject struct + * @bin_attr: bin_attribute struct * @buf: buffer * @off: offset * @count: buffer size @@ -3334,8 +3313,9 @@ * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_write_dump(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t ipr_write_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct class_device *cdev = container_of(kobj,struct class_device,kobj); struct Scsi_Host *shost = class_to_shost(cdev); @@ -4292,24 +4272,25 @@ static int ipr_build_ioadl(struct ipr_ioa_cfg *ioa_cfg, struct ipr_cmnd *ipr_cmd) { - int i; - struct scatterlist *sglist; + int i, nseg; + struct scatterlist *sg; u32 length; u32 ioadl_flags = 0; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; struct ipr_ioadl_desc *ioadl = ipr_cmd->ioadl; - length = scsi_cmd->request_bufflen; - - if (length == 0) + length = scsi_bufflen(scsi_cmd); + if (!length) return 0; - if (scsi_cmd->use_sg) { - ipr_cmd->dma_use_sg = pci_map_sg(ioa_cfg->pdev, - scsi_cmd->request_buffer, - scsi_cmd->use_sg, - scsi_cmd->sc_data_direction); + nseg = scsi_dma_map(scsi_cmd); + if (nseg < 0) { + dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n"); + return -1; + } + + ipr_cmd->dma_use_sg = nseg; if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) { ioadl_flags = IPR_IOADL_FLAGS_WRITE; @@ -4324,8 +4305,6 @@ cpu_to_be32(sizeof(struct ipr_ioadl_desc) * ipr_cmd->dma_use_sg); } - sglist = scsi_cmd->request_buffer; - if (ipr_cmd->dma_use_sg <= ARRAY_SIZE(ioarcb->add_data.u.ioadl)) { ioadl = ioarcb->add_data.u.ioadl; ioarcb->write_ioadl_addr = @@ -4334,51 +4313,14 @@ ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr; } - for (i = 0; i < ipr_cmd->dma_use_sg; i++) { + scsi_for_each_sg(scsi_cmd, sg, ipr_cmd->dma_use_sg, i) { ioadl[i].flags_and_data_len = - cpu_to_be32(ioadl_flags | sg_dma_len(&sglist[i])); - ioadl[i].address = - cpu_to_be32(sg_dma_address(&sglist[i])); + cpu_to_be32(ioadl_flags | sg_dma_len(sg)); + ioadl[i].address = cpu_to_be32(sg_dma_address(sg)); } - if (likely(ipr_cmd->dma_use_sg)) { - ioadl[i-1].flags_and_data_len |= - cpu_to_be32(IPR_IOADL_FLAGS_LAST); - return 0; - } else - dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n"); - } else { - if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) { - ioadl_flags = IPR_IOADL_FLAGS_WRITE; - ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_WRITE_NOT_READ; - ioarcb->write_data_transfer_length = cpu_to_be32(length); - ioarcb->write_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc)); - } else if (scsi_cmd->sc_data_direction == DMA_FROM_DEVICE) { - ioadl_flags = IPR_IOADL_FLAGS_READ; - ioarcb->read_data_transfer_length = cpu_to_be32(length); - ioarcb->read_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc)); - } - - ipr_cmd->dma_handle = pci_map_single(ioa_cfg->pdev, - scsi_cmd->request_buffer, length, - scsi_cmd->sc_data_direction); - - if (likely(!pci_dma_mapping_error(ipr_cmd->dma_handle))) { - ioadl = ioarcb->add_data.u.ioadl; - ioarcb->write_ioadl_addr = - cpu_to_be32(be32_to_cpu(ioarcb->ioarcb_host_pci_addr) + - offsetof(struct ipr_ioarcb, add_data)); - ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr; - ipr_cmd->dma_use_sg = 1; - ioadl[0].flags_and_data_len = - cpu_to_be32(ioadl_flags | length | IPR_IOADL_FLAGS_LAST); - ioadl[0].address = cpu_to_be32(ipr_cmd->dma_handle); + ioadl[i-1].flags_and_data_len |= cpu_to_be32(IPR_IOADL_FLAGS_LAST); return 0; - } else - dev_err(&ioa_cfg->pdev->dev, "pci_map_single failed!\n"); - } - - return -1; } /** @@ -4441,7 +4383,7 @@ res->needs_sync_complete = 1; res->in_erp = 0; } - ipr_unmap_sglist(ioa_cfg, ipr_cmd); + scsi_dma_unmap(ipr_cmd->scsi_cmd); list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -4819,7 +4761,7 @@ break; } - ipr_unmap_sglist(ioa_cfg, ipr_cmd); + scsi_dma_unmap(ipr_cmd->scsi_cmd); list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -4840,10 +4782,10 @@ struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc); - scsi_cmd->resid = be32_to_cpu(ipr_cmd->ioasa.residual_data_len); + scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->ioasa.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { - ipr_unmap_sglist(ioa_cfg, ipr_cmd); + scsi_dma_unmap(ipr_cmd->scsi_cmd); list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); scsi_cmd->scsi_done(scsi_cmd); } else diff -Nurb linux-2.6.22-570/drivers/scsi/ips.c linux-2.6.22-591/drivers/scsi/ips.c --- linux-2.6.22-570/drivers/scsi/ips.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ips.c 2007-12-21 15:36:12.000000000 -0500 @@ -211,19 +211,6 @@ #warning "This driver has only been tested on the x86/ia64/x86_64 platforms" #endif -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0) -#include -#include "sd.h" -#define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(&io_request_lock,flags) -#define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(&io_request_lock,flags) -#ifndef __devexit_p -#define __devexit_p(x) x -#endif -#else -#define IPS_LOCK_SAVE(lock,flags) do{spin_lock(lock);(void)flags;}while(0) -#define IPS_UNLOCK_RESTORE(lock,flags) do{spin_unlock(lock);(void)flags;}while(0) -#endif - #define IPS_DMA_DIR(scb) ((!scb->scsi_cmd || ips_is_passthru(scb->scsi_cmd) || \ DMA_NONE == scb->scsi_cmd->sc_data_direction) ? \ PCI_DMA_BIDIRECTIONAL : \ @@ -381,24 +368,13 @@ .eh_abort_handler = ips_eh_abort, .eh_host_reset_handler = ips_eh_reset, .proc_name = "ips", -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) .proc_info = ips_proc_info, .slave_configure = ips_slave_configure, -#else - .proc_info = ips_proc24_info, - .select_queue_depths = ips_select_queue_depth, -#endif .bios_param = ips_biosparam, .this_id = -1, .sg_tablesize = IPS_MAX_SG, .cmd_per_lun = 3, .use_clustering = ENABLE_CLUSTERING, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - .use_new_eh_code = 1, -#endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - .highmem_io = 1, -#endif }; @@ -731,7 +707,7 @@ /* free IRQ */ free_irq(ha->irq, ha); - IPS_REMOVE_HOST(sh); + scsi_remove_host(sh); scsi_host_put(sh); ips_released_controllers++; @@ -813,7 +789,6 @@ ips_ha_t *ha; ips_copp_wait_item_t *item; int ret; - unsigned long cpu_flags; struct Scsi_Host *host; METHOD_TRACE("ips_eh_abort", 1); @@ -830,7 +805,7 @@ if (!ha->active) return (FAILED); - IPS_LOCK_SAVE(host->host_lock, cpu_flags); + spin_lock(host->host_lock); /* See if the command is on the copp queue */ item = ha->copp_waitlist.head; @@ -851,7 +826,7 @@ ret = (FAILED); } - IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); + spin_unlock(host->host_lock); return ret; } @@ -1129,7 +1104,7 @@ /* A Reset IOCTL is only sent by the boot CD in extreme cases. */ /* There can never be any system activity ( network or disk ), but check */ /* anyway just as a good practice. */ - pt = (ips_passthru_t *) SC->request_buffer; + pt = (ips_passthru_t *) scsi_sglist(SC); if ((pt->CoppCP.cmd.reset.op_code == IPS_CMD_RESET_CHANNEL) && (pt->CoppCP.cmd.reset.adapter_flag == 1)) { if (ha->scb_activelist.count != 0) { @@ -1176,18 +1151,10 @@ /* Set bios geometry for the controller */ /* */ /****************************************************************************/ -static int -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -ips_biosparam(Disk * disk, kdev_t dev, int geom[]) -{ - ips_ha_t *ha = (ips_ha_t *) disk->device->host->hostdata; - unsigned long capacity = disk->capacity; -#else -ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, +static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) { ips_ha_t *ha = (ips_ha_t *) sdev->host->hostdata; -#endif int heads; int sectors; int cylinders; @@ -1225,70 +1192,6 @@ return (0); } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - -/* ips_proc24_info is a wrapper around ips_proc_info * - * for compatibility with the 2.4 scsi parameters */ -static int -ips_proc24_info(char *buffer, char **start, off_t offset, int length, - int hostno, int func) -{ - int i; - - for (i = 0; i < ips_next_controller; i++) { - if (ips_sh[i] && ips_sh[i]->host_no == hostno) { - return ips_proc_info(ips_sh[i], buffer, start, - offset, length, func); - } - } - return -EINVAL; -} - -/****************************************************************************/ -/* */ -/* Routine Name: ips_select_queue_depth */ -/* */ -/* Routine Description: */ -/* */ -/* Select queue depths for the devices on the contoller */ -/* */ -/****************************************************************************/ -static void -ips_select_queue_depth(struct Scsi_Host *host, struct scsi_device * scsi_devs) -{ - struct scsi_device *device; - ips_ha_t *ha; - int count = 0; - int min; - - ha = IPS_HA(host); - min = ha->max_cmds / 4; - - for (device = scsi_devs; device; device = device->next) { - if (device->host == host) { - if ((device->channel == 0) && (device->type == 0)) - count++; - } - } - - for (device = scsi_devs; device; device = device->next) { - if (device->host == host) { - if ((device->channel == 0) && (device->type == 0)) { - device->queue_depth = - (ha->max_cmds - 1) / count; - if (device->queue_depth < min) - device->queue_depth = min; - } else { - device->queue_depth = 2; - } - - if (device->queue_depth < 2) - device->queue_depth = 2; - } - } -} - -#else /****************************************************************************/ /* */ /* Routine Name: ips_slave_configure */ @@ -1316,7 +1219,6 @@ SDptr->skip_ms_page_3f = 1; return 0; } -#endif /****************************************************************************/ /* */ @@ -1331,7 +1233,6 @@ do_ipsintr(int irq, void *dev_id) { ips_ha_t *ha; - unsigned long cpu_flags; struct Scsi_Host *host; int irqstatus; @@ -1347,16 +1248,16 @@ return IRQ_HANDLED; } - IPS_LOCK_SAVE(host->host_lock, cpu_flags); + spin_lock(host->host_lock); if (!ha->active) { - IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); + spin_unlock(host->host_lock); return IRQ_HANDLED; } irqstatus = (*ha->func.intr) (ha); - IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); + spin_unlock(host->host_lock); /* start the next command */ ips_next(ha, IPS_INTR_ON); @@ -1606,15 +1507,8 @@ if ((SC->cmnd[0] == IPS_IOCTL_COMMAND) && (SC->device->channel == 0) && (SC->device->id == IPS_ADAPTER_ID) && - (SC->device->lun == 0) && SC->request_buffer) { - if ((!SC->use_sg) && SC->request_bufflen && - (((char *) SC->request_buffer)[0] == 'C') && - (((char *) SC->request_buffer)[1] == 'O') && - (((char *) SC->request_buffer)[2] == 'P') && - (((char *) SC->request_buffer)[3] == 'P')) - return 1; - else if (SC->use_sg) { - struct scatterlist *sg = SC->request_buffer; + (SC->device->lun == 0) && scsi_sglist(SC)) { + struct scatterlist *sg = scsi_sglist(SC); char *buffer; /* kmap_atomic() ensures addressability of the user buffer.*/ @@ -1630,7 +1524,6 @@ kunmap_atomic(buffer - sg->offset, KM_IRQ0); local_irq_restore(flags); } - } return 0; } @@ -1680,18 +1573,14 @@ { ips_passthru_t *pt; int length = 0; - int ret; + int i, ret; + struct scatterlist *sg = scsi_sglist(SC); METHOD_TRACE("ips_make_passthru", 1); - if (!SC->use_sg) { - length = SC->request_bufflen; - } else { - struct scatterlist *sg = SC->request_buffer; - int i; - for (i = 0; i < SC->use_sg; i++) + scsi_for_each_sg(SC, sg, scsi_sg_count(SC), i) length += sg[i].length; - } + if (length < sizeof (ips_passthru_t)) { /* wrong size */ DEBUG_VAR(1, "(%s%d) Passthru structure wrong size", @@ -2115,7 +2004,7 @@ METHOD_TRACE("ips_cleanup_passthru", 1); - if ((!scb) || (!scb->scsi_cmd) || (!scb->scsi_cmd->request_buffer)) { + if ((!scb) || (!scb->scsi_cmd) || (!scsi_sglist(scb->scsi_cmd))) { DEBUG_VAR(1, "(%s%d) couldn't cleanup after passthru", ips_name, ha->host_num); @@ -2730,7 +2619,6 @@ struct scsi_cmnd *q; ips_copp_wait_item_t *item; int ret; - unsigned long cpu_flags = 0; struct Scsi_Host *host; METHOD_TRACE("ips_next", 1); @@ -2742,7 +2630,7 @@ * this command won't time out */ if (intr == IPS_INTR_ON) - IPS_LOCK_SAVE(host->host_lock, cpu_flags); + spin_lock(host->host_lock); if ((ha->subsys->param[3] & 0x300000) && (ha->scb_activelist.count == 0)) { @@ -2769,14 +2657,14 @@ item = ips_removeq_copp_head(&ha->copp_waitlist); ha->num_ioctl++; if (intr == IPS_INTR_ON) - IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); + spin_unlock(host->host_lock); scb->scsi_cmd = item->scsi_cmd; kfree(item); ret = ips_make_passthru(ha, scb->scsi_cmd, scb, intr); if (intr == IPS_INTR_ON) - IPS_LOCK_SAVE(host->host_lock, cpu_flags); + spin_lock(host->host_lock); switch (ret) { case IPS_FAILURE: if (scb->scsi_cmd) { @@ -2846,7 +2734,7 @@ SC = ips_removeq_wait(&ha->scb_waitlist, q); if (intr == IPS_INTR_ON) - IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); /* Unlock HA after command is taken off queue */ + spin_unlock(host->host_lock); /* Unlock HA after command is taken off queue */ SC->result = DID_OK; SC->host_scribble = NULL; @@ -2866,43 +2754,28 @@ /* copy in the CDB */ memcpy(scb->cdb, SC->cmnd, SC->cmd_len); - /* Now handle the data buffer */ - if (SC->use_sg) { + scb->sg_count = scsi_dma_map(SC); + BUG_ON(scb->sg_count < 0); + if (scb->sg_count) { struct scatterlist *sg; int i; - sg = SC->request_buffer; - scb->sg_count = pci_map_sg(ha->pcidev, sg, SC->use_sg, - SC->sc_data_direction); scb->flags |= IPS_SCB_MAP_SG; - for (i = 0; i < scb->sg_count; i++) { + + scsi_for_each_sg(SC, sg, scb->sg_count, i) { if (ips_fill_scb_sg_single - (ha, sg_dma_address(&sg[i]), scb, i, - sg_dma_len(&sg[i])) < 0) + (ha, sg_dma_address(sg), scb, i, + sg_dma_len(sg)) < 0) break; } scb->dcdb.transfer_length = scb->data_len; } else { - if (SC->request_bufflen) { - scb->data_busaddr = - pci_map_single(ha->pcidev, - SC->request_buffer, - SC->request_bufflen, - SC->sc_data_direction); - scb->flags |= IPS_SCB_MAP_SINGLE; - ips_fill_scb_sg_single(ha, scb->data_busaddr, - scb, 0, - SC->request_bufflen); - scb->dcdb.transfer_length = scb->data_len; - } else { scb->data_busaddr = 0L; scb->sg_len = 0; scb->data_len = 0; scb->dcdb.transfer_length = 0; } - } - scb->dcdb.cmd_attribute = ips_command_direction[scb->scsi_cmd->cmnd[0]]; @@ -2919,7 +2792,7 @@ scb->dcdb.transfer_length = 0; } if (intr == IPS_INTR_ON) - IPS_LOCK_SAVE(host->host_lock, cpu_flags); + spin_lock(host->host_lock); ret = ips_send_cmd(ha, scb); @@ -2958,7 +2831,7 @@ } /* end while */ if (intr == IPS_INTR_ON) - IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); + spin_unlock(host->host_lock); } /****************************************************************************/ @@ -3377,29 +3250,24 @@ * the rest of the data and continue. */ if ((scb->breakup) || (scb->sg_break)) { + struct scatterlist *sg; + int sg_dma_index, ips_sg_index = 0; + /* we had a data breakup */ scb->data_len = 0; - if (scb->sg_count) { - /* S/G request */ - struct scatterlist *sg; - int ips_sg_index = 0; - int sg_dma_index; - - sg = scb->scsi_cmd->request_buffer; + sg = scsi_sglist(scb->scsi_cmd); /* Spin forward to last dma chunk */ sg_dma_index = scb->breakup; /* Take care of possible partial on last chunk */ ips_fill_scb_sg_single(ha, - sg_dma_address(&sg - [sg_dma_index]), + sg_dma_address(&sg[sg_dma_index]), scb, ips_sg_index++, - sg_dma_len(&sg - [sg_dma_index])); + sg_dma_len(&sg[sg_dma_index])); - for (; sg_dma_index < scb->sg_count; + for (; sg_dma_index < scsi_sg_count(scb->scsi_cmd); sg_dma_index++) { if (ips_fill_scb_sg_single (ha, @@ -3407,21 +3275,6 @@ scb, ips_sg_index++, sg_dma_len(&sg[sg_dma_index])) < 0) break; - - } - - } else { - /* Non S/G Request */ - (void) ips_fill_scb_sg_single(ha, - scb-> - data_busaddr + - (scb->sg_break * - ha->max_xfer), - scb, 0, - scb->scsi_cmd-> - request_bufflen - - (scb->sg_break * - ha->max_xfer)); } scb->dcdb.transfer_length = scb->data_len; @@ -3653,15 +3506,15 @@ static void ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count) { - if (scmd->use_sg) { int i; unsigned int min_cnt, xfer_cnt; char *cdata = (char *) data; unsigned char *buffer; unsigned long flags; - struct scatterlist *sg = scmd->request_buffer; + struct scatterlist *sg = scsi_sglist(scmd); + for (i = 0, xfer_cnt = 0; - (i < scmd->use_sg) && (xfer_cnt < count); i++) { + (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) { min_cnt = min(count - xfer_cnt, sg[i].length); /* kmap_atomic() ensures addressability of the data buffer.*/ @@ -3674,11 +3527,6 @@ xfer_cnt += min_cnt; } - - } else { - unsigned int min_cnt = min(count, scmd->request_bufflen); - memcpy(scmd->request_buffer, data, min_cnt); - } } /****************************************************************************/ @@ -3691,15 +3539,15 @@ static void ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count) { - if (scmd->use_sg) { int i; unsigned int min_cnt, xfer_cnt; char *cdata = (char *) data; unsigned char *buffer; unsigned long flags; - struct scatterlist *sg = scmd->request_buffer; + struct scatterlist *sg = scsi_sglist(scmd); + for (i = 0, xfer_cnt = 0; - (i < scmd->use_sg) && (xfer_cnt < count); i++) { + (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) { min_cnt = min(count - xfer_cnt, sg[i].length); /* kmap_atomic() ensures addressability of the data buffer.*/ @@ -3712,11 +3560,6 @@ xfer_cnt += min_cnt; } - - } else { - unsigned int min_cnt = min(count, scmd->request_bufflen); - memcpy(data, scmd->request_buffer, min_cnt); - } } /****************************************************************************/ @@ -4350,7 +4193,7 @@ METHOD_TRACE("ips_rdcap", 1); - if (scb->scsi_cmd->request_bufflen < 8) + if (scsi_bufflen(scb->scsi_cmd) < 8) return (0); cap.lba = @@ -4735,8 +4578,7 @@ METHOD_TRACE("ips_freescb", 1); if (scb->flags & IPS_SCB_MAP_SG) - pci_unmap_sg(ha->pcidev, scb->scsi_cmd->request_buffer, - scb->scsi_cmd->use_sg, IPS_DMA_DIR(scb)); + scsi_dma_unmap(scb->scsi_cmd); else if (scb->flags & IPS_SCB_MAP_SINGLE) pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len, IPS_DMA_DIR(scb)); @@ -7004,7 +6846,6 @@ kfree(oldha); ips_sh[index] = sh; ips_ha[index] = ha; - IPS_SCSI_SET_DEVICE(sh, ha); /* Store away needed values for later use */ sh->io_port = ha->io_addr; @@ -7016,17 +6857,16 @@ sh->cmd_per_lun = sh->hostt->cmd_per_lun; sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma; sh->use_clustering = sh->hostt->use_clustering; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7) sh->max_sectors = 128; -#endif sh->max_id = ha->ntargets; sh->max_lun = ha->nlun; sh->max_channel = ha->nbus - 1; sh->can_queue = ha->max_cmds - 1; - IPS_ADD_HOST(sh, NULL); + scsi_add_host(sh, NULL); + scsi_scan_host(sh); + return 0; } @@ -7069,7 +6909,7 @@ return -ENODEV; ips_driver_template.module = THIS_MODULE; ips_order_controllers(); - if (IPS_REGISTER_HOSTS(&ips_driver_template)) { + if (!ips_detect(&ips_driver_template)) { pci_unregister_driver(&ips_pci_driver); return -ENODEV; } @@ -7087,7 +6927,6 @@ static void __exit ips_module_exit(void) { - IPS_UNREGISTER_HOSTS(&ips_driver_template); pci_unregister_driver(&ips_pci_driver); unregister_reboot_notifier(&ips_notifier); } @@ -7443,15 +7282,9 @@ return SUCCESS; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9) MODULE_LICENSE("GPL"); -#endif - MODULE_DESCRIPTION("IBM ServeRAID Adapter Driver " IPS_VER_STRING); - -#ifdef MODULE_VERSION MODULE_VERSION(IPS_VER_STRING); -#endif /* diff -Nurb linux-2.6.22-570/drivers/scsi/ips.h linux-2.6.22-591/drivers/scsi/ips.h --- linux-2.6.22-570/drivers/scsi/ips.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ips.h 2007-12-21 15:36:12.000000000 -0500 @@ -58,10 +58,6 @@ /* * Some handy macros */ - #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined CONFIG_HIGHIO - #define IPS_HIGHIO - #endif - #define IPS_HA(x) ((ips_ha_t *) x->hostdata) #define IPS_COMMAND_ID(ha, scb) (int) (scb - ha->scbs) #define IPS_IS_TROMBONE(ha) (((ha->device_id == IPS_DEVICEID_COPPERHEAD) && \ @@ -84,38 +80,8 @@ #define IPS_SGLIST_SIZE(ha) (IPS_USE_ENH_SGLIST(ha) ? \ sizeof(IPS_ENH_SG_LIST) : sizeof(IPS_STD_SG_LIST)) - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) - #define pci_set_dma_mask(dev,mask) ( mask > 0xffffffff ? 1:0 ) - #define scsi_set_pci_device(sh,dev) (0) - #endif - - #ifndef IRQ_NONE - typedef void irqreturn_t; - #define IRQ_NONE - #define IRQ_HANDLED - #define IRQ_RETVAL(x) - #endif - - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - #define IPS_REGISTER_HOSTS(SHT) scsi_register_module(MODULE_SCSI_HA,SHT) - #define IPS_UNREGISTER_HOSTS(SHT) scsi_unregister_module(MODULE_SCSI_HA,SHT) - #define IPS_ADD_HOST(shost,device) - #define IPS_REMOVE_HOST(shost) - #define IPS_SCSI_SET_DEVICE(sh,ha) scsi_set_pci_device(sh, (ha)->pcidev) - #define IPS_PRINTK(level, pcidev, format, arg...) \ - printk(level "%s %s:" format , "ips" , \ - (pcidev)->slot_name , ## arg) - #define scsi_host_alloc(sh,size) scsi_register(sh,size) - #define scsi_host_put(sh) scsi_unregister(sh) - #else - #define IPS_REGISTER_HOSTS(SHT) (!ips_detect(SHT)) - #define IPS_UNREGISTER_HOSTS(SHT) - #define IPS_ADD_HOST(shost,device) do { scsi_add_host(shost,device); scsi_scan_host(shost); } while (0) - #define IPS_REMOVE_HOST(shost) scsi_remove_host(shost) - #define IPS_SCSI_SET_DEVICE(sh,ha) do { } while (0) #define IPS_PRINTK(level, pcidev, format, arg...) \ dev_printk(level , &((pcidev)->dev) , format , ## arg) - #endif #define MDELAY(n) \ do { \ @@ -134,7 +100,7 @@ #define pci_dma_hi32(a) ((a >> 16) >> 16) #define pci_dma_lo32(a) (a & 0xffffffff) - #if (BITS_PER_LONG > 32) || (defined CONFIG_HIGHMEM64G && defined IPS_HIGHIO) + #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) #define IPS_ENABLE_DMA64 (1) #else #define IPS_ENABLE_DMA64 (0) @@ -451,16 +417,10 @@ /* * Scsi_Host Template */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - static int ips_proc24_info(char *, char **, off_t, int, int, int); - static void ips_select_queue_depth(struct Scsi_Host *, struct scsi_device *); - static int ips_biosparam(Disk *disk, kdev_t dev, int geom[]); -#else static int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int); static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); static int ips_slave_configure(struct scsi_device *SDptr); -#endif /* * Raid Command Formats diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.c linux-2.6.22-591/drivers/scsi/iscsi_tcp.c --- linux-2.6.22-570/drivers/scsi/iscsi_tcp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/iscsi_tcp.c 2007-12-21 15:36:12.000000000 -0500 @@ -29,14 +29,15 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include +#include #include #include #include @@ -109,7 +110,7 @@ struct iscsi_tcp_conn *tcp_conn = conn->dd_data; crypto_hash_digest(&tcp_conn->tx_hash, &buf->sg, buf->sg.length, crc); - buf->sg.length = tcp_conn->hdr_size; + buf->sg.length += sizeof(u32); } static inline int @@ -211,16 +212,14 @@ static int iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { - int rc; struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr; struct iscsi_session *session = conn->session; + struct scsi_cmnd *sc = ctask->sc; int datasn = be32_to_cpu(rhdr->datasn); - rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr); - if (rc) - return rc; + iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); /* * setup Data-In byte counter (gets decremented..) */ @@ -229,31 +228,36 @@ if (tcp_conn->in.datalen == 0) return 0; - if (ctask->datasn != datasn) + if (tcp_ctask->exp_datasn != datasn) { + debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n", + __FUNCTION__, tcp_ctask->exp_datasn, datasn); return ISCSI_ERR_DATASN; + } - ctask->datasn++; + tcp_ctask->exp_datasn++; tcp_ctask->data_offset = be32_to_cpu(rhdr->offset); - if (tcp_ctask->data_offset + tcp_conn->in.datalen > ctask->total_length) + if (tcp_ctask->data_offset + tcp_conn->in.datalen > scsi_bufflen(sc)) { + debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n", + __FUNCTION__, tcp_ctask->data_offset, + tcp_conn->in.datalen, scsi_bufflen(sc)); return ISCSI_ERR_DATA_OFFSET; + } if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) { - struct scsi_cmnd *sc = ctask->sc; - conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1; if (rhdr->flags & ISCSI_FLAG_DATA_UNDERFLOW) { int res_count = be32_to_cpu(rhdr->residual_count); if (res_count > 0 && - res_count <= sc->request_bufflen) { - sc->resid = res_count; + res_count <= scsi_bufflen(sc)) { + scsi_set_resid(sc, res_count); sc->result = (DID_OK << 16) | rhdr->cmd_status; } else sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; } else if (rhdr->flags & ISCSI_FLAG_DATA_OVERFLOW) { - sc->resid = be32_to_cpu(rhdr->residual_count); + scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count)); sc->result = (DID_OK << 16) | rhdr->cmd_status; } else sc->result = (DID_OK << 16) | rhdr->cmd_status; @@ -281,6 +285,8 @@ { struct iscsi_data *hdr; struct scsi_cmnd *sc = ctask->sc; + int i, sg_count = 0; + struct scatterlist *sg; hdr = &r2t->dtask.hdr; memset(hdr, 0, sizeof(struct iscsi_data)); @@ -308,12 +314,9 @@ iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr, sizeof(struct iscsi_hdr)); - if (sc->use_sg) { - int i, sg_count = 0; - struct scatterlist *sg = sc->request_buffer; - + sg = scsi_sglist(sc); r2t->sg = NULL; - for (i = 0; i < sc->use_sg; i++, sg += 1) { + for (i = 0; i < scsi_sg_count(sc); i++, sg += 1) { /* FIXME: prefetch ? */ if (sg_count + sg->length > r2t->data_offset) { int page_offset; @@ -335,12 +338,6 @@ sg_count += sg->length; } BUG_ON(r2t->sg == NULL); - } else { - iscsi_buf_init_iov(&r2t->sendbuf, - (char*)sc->request_buffer + r2t->data_offset, - r2t->data_count); - r2t->sg = NULL; - } } /** @@ -365,17 +362,16 @@ return ISCSI_ERR_DATALEN; } - if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn) + if (tcp_ctask->exp_datasn != r2tsn){ + debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n", + __FUNCTION__, tcp_ctask->exp_datasn, r2tsn); return ISCSI_ERR_R2TSN; - - rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr); - if (rc) - return rc; - - /* FIXME: use R2TSN to detect missing R2T */ + } /* fill-in new R2T associated with the task */ spin_lock(&session->lock); + iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); + if (!ctask->sc || ctask->mtask || session->state != ISCSI_STATE_LOGGED_IN) { printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in " @@ -401,11 +397,11 @@ r2t->data_length, session->max_burst); r2t->data_offset = be32_to_cpu(rhdr->data_offset); - if (r2t->data_offset + r2t->data_length > ctask->total_length) { + if (r2t->data_offset + r2t->data_length > scsi_bufflen(ctask->sc)) { spin_unlock(&session->lock); printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at " "offset %u and total length %d\n", r2t->data_length, - r2t->data_offset, ctask->total_length); + r2t->data_offset, scsi_bufflen(ctask->sc)); return ISCSI_ERR_DATALEN; } @@ -414,9 +410,9 @@ iscsi_solicit_data_init(conn, ctask, r2t); - tcp_ctask->exp_r2tsn = r2tsn + 1; + tcp_ctask->exp_datasn = r2tsn + 1; __kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*)); - tcp_ctask->xmstate |= XMSTATE_SOL_HDR; + tcp_ctask->xmstate |= XMSTATE_SOL_HDR_INIT; list_move_tail(&ctask->running, &conn->xmitqueue); scsi_queue_work(session->host, &conn->xmitwork); @@ -600,7 +596,7 @@ { struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; int buf_left = buf_size - (tcp_conn->data_copied + offset); - int size = min(tcp_conn->in.copy, buf_left); + unsigned size = min(tcp_conn->in.copy, buf_left); int rc; size = min(size, ctask->data_count); @@ -609,7 +605,7 @@ size, tcp_conn->in.offset, tcp_conn->in.copied); BUG_ON(size <= 0); - BUG_ON(tcp_ctask->sent + size > ctask->total_length); + BUG_ON(tcp_ctask->sent + size > scsi_bufflen(ctask->sc)); rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset, (char*)buf + (offset + tcp_conn->data_copied), size); @@ -707,25 +703,8 @@ BUG_ON((void*)ctask != sc->SCp.ptr); - /* - * copying Data-In into the Scsi_Cmnd - */ - if (!sc->use_sg) { - i = ctask->data_count; - rc = iscsi_ctask_copy(tcp_conn, ctask, sc->request_buffer, - sc->request_bufflen, - tcp_ctask->data_offset); - if (rc == -EAGAIN) - return rc; - if (conn->datadgst_en) - iscsi_recv_digest_update(tcp_conn, sc->request_buffer, - i); - rc = 0; - goto done; - } - offset = tcp_ctask->data_offset; - sg = sc->request_buffer; + sg = scsi_sglist(sc); if (tcp_ctask->data_offset) for (i = 0; i < tcp_ctask->sg_count; i++) @@ -734,7 +713,7 @@ if (offset < 0) offset = 0; - for (i = tcp_ctask->sg_count; i < sc->use_sg; i++) { + for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) { char *dest; dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0); @@ -779,7 +758,6 @@ } BUG_ON(ctask->data_count); -done: /* check for non-exceptional status */ if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) { debug_scsi("done [sc %lx res %d itt 0x%x flags 0x%x]\n", @@ -895,11 +873,27 @@ } } - if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV) { + if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV && + tcp_conn->in.copy) { uint32_t recv_digest; debug_tcp("extra data_recv offset %d copy %d\n", tcp_conn->in.offset, tcp_conn->in.copy); + + if (!tcp_conn->data_copied) { + if (tcp_conn->in.padding) { + debug_tcp("padding -> %d\n", + tcp_conn->in.padding); + memset(pad, 0, tcp_conn->in.padding); + sg_init_one(&sg, pad, tcp_conn->in.padding); + crypto_hash_update(&tcp_conn->rx_hash, + &sg, sg.length); + } + crypto_hash_final(&tcp_conn->rx_hash, + (u8 *) &tcp_conn->in.datadgst); + debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); + } + rc = iscsi_tcp_copy(conn, sizeof(uint32_t)); if (rc) { if (rc == -EAGAIN) @@ -925,7 +919,6 @@ if (tcp_conn->in_progress == IN_PROGRESS_DATA_RECV && tcp_conn->in.copy) { - debug_tcp("data_recv offset %d copy %d\n", tcp_conn->in.offset, tcp_conn->in.copy); @@ -936,24 +929,32 @@ iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return 0; } - tcp_conn->in.copy -= tcp_conn->in.padding; - tcp_conn->in.offset += tcp_conn->in.padding; - if (conn->datadgst_en) { - if (tcp_conn->in.padding) { - debug_tcp("padding -> %d\n", - tcp_conn->in.padding); - memset(pad, 0, tcp_conn->in.padding); - sg_init_one(&sg, pad, tcp_conn->in.padding); - crypto_hash_update(&tcp_conn->rx_hash, - &sg, sg.length); - } - crypto_hash_final(&tcp_conn->rx_hash, - (u8 *) &tcp_conn->in.datadgst); - debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); + + if (tcp_conn->in.padding) + tcp_conn->in_progress = IN_PROGRESS_PAD_RECV; + else if (conn->datadgst_en) tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; + else + tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; tcp_conn->data_copied = 0; - } else + } + + if (tcp_conn->in_progress == IN_PROGRESS_PAD_RECV && + tcp_conn->in.copy) { + int copylen = min(tcp_conn->in.padding - tcp_conn->data_copied, + tcp_conn->in.copy); + + tcp_conn->in.copy -= copylen; + tcp_conn->in.offset += copylen; + tcp_conn->data_copied += copylen; + + if (tcp_conn->data_copied != tcp_conn->in.padding) + tcp_conn->in_progress = IN_PROGRESS_PAD_RECV; + else if (conn->datadgst_en) + tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; + else tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; + tcp_conn->data_copied = 0; } debug_tcp("f, processed %d from out of %d padding %d\n", @@ -1215,7 +1216,6 @@ struct iscsi_r2t_info *r2t, int left) { struct iscsi_data *hdr; - struct scsi_cmnd *sc = ctask->sc; int new_offset; hdr = &r2t->dtask.hdr; @@ -1245,15 +1245,8 @@ if (iscsi_buf_left(&r2t->sendbuf)) return; - if (sc->use_sg) { iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg); r2t->sg += 1; - } else { - iscsi_buf_init_iov(&r2t->sendbuf, - (char*)sc->request_buffer + new_offset, - r2t->data_count); - r2t->sg = NULL; - } } static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask, @@ -1277,41 +1270,10 @@ static void iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask) { - struct scsi_cmnd *sc = ctask->sc; struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; BUG_ON(__kfifo_len(tcp_ctask->r2tqueue)); - - tcp_ctask->sent = 0; - tcp_ctask->sg_count = 0; - - if (sc->sc_data_direction == DMA_TO_DEVICE) { - tcp_ctask->xmstate = XMSTATE_W_HDR; - tcp_ctask->exp_r2tsn = 0; - BUG_ON(ctask->total_length == 0); - - if (sc->use_sg) { - struct scatterlist *sg = sc->request_buffer; - - iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg); - tcp_ctask->sg = sg + 1; - tcp_ctask->bad_sg = sg + sc->use_sg; - } else { - iscsi_buf_init_iov(&tcp_ctask->sendbuf, - sc->request_buffer, - sc->request_bufflen); - tcp_ctask->sg = NULL; - tcp_ctask->bad_sg = NULL; - } - debug_scsi("cmd [itt 0x%x total %d imm_data %d " - "unsol count %d, unsol offset %d]\n", - ctask->itt, ctask->total_length, ctask->imm_count, - ctask->unsol_count, ctask->unsol_offset); - } else - tcp_ctask->xmstate = XMSTATE_R_HDR; - - iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr, - sizeof(struct iscsi_hdr)); + tcp_ctask->xmstate = XMSTATE_CMD_HDR_INIT; } /** @@ -1324,9 +1286,11 @@ * call it again later, or recover. '0' return code means successful * xmit. * - * Management xmit state machine consists of two states: - * IN_PROGRESS_IMM_HEAD - PDU Header xmit in progress - * IN_PROGRESS_IMM_DATA - PDU Data xmit in progress + * Management xmit state machine consists of these states: + * XMSTATE_IMM_HDR_INIT - calculate digest of PDU Header + * XMSTATE_IMM_HDR - PDU Header xmit in progress + * XMSTATE_IMM_DATA - PDU Data xmit in progress + * XMSTATE_IDLE - management PDU is done **/ static int iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask) @@ -1337,23 +1301,34 @@ debug_scsi("mtask deq [cid %d state %x itt 0x%x]\n", conn->id, tcp_mtask->xmstate, mtask->itt); - if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) { - tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR; - if (mtask->data_count) + if (tcp_mtask->xmstate & XMSTATE_IMM_HDR_INIT) { + iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr, + sizeof(struct iscsi_hdr)); + + if (mtask->data_count) { tcp_mtask->xmstate |= XMSTATE_IMM_DATA; + iscsi_buf_init_iov(&tcp_mtask->sendbuf, + (char*)mtask->data, + mtask->data_count); + } + if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && conn->stop_stage != STOP_CONN_RECOVER && conn->hdrdgst_en) iscsi_hdr_digest(conn, &tcp_mtask->headbuf, (u8*)tcp_mtask->hdrext); + + tcp_mtask->sent = 0; + tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR_INIT; + tcp_mtask->xmstate |= XMSTATE_IMM_HDR; + } + + if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) { rc = iscsi_sendhdr(conn, &tcp_mtask->headbuf, mtask->data_count); - if (rc) { - tcp_mtask->xmstate |= XMSTATE_IMM_HDR; - if (mtask->data_count) - tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA; + if (rc) return rc; - } + tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR; } if (tcp_mtask->xmstate & XMSTATE_IMM_DATA) { @@ -1387,55 +1362,67 @@ return 0; } -static inline int -iscsi_send_read_hdr(struct iscsi_conn *conn, - struct iscsi_tcp_cmd_task *tcp_ctask) +static int +iscsi_send_cmd_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { - int rc; + struct scsi_cmnd *sc = ctask->sc; + struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; + int rc = 0; - tcp_ctask->xmstate &= ~XMSTATE_R_HDR; - if (conn->hdrdgst_en) - iscsi_hdr_digest(conn, &tcp_ctask->headbuf, - (u8*)tcp_ctask->hdrext); - rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, 0); - if (!rc) { - BUG_ON(tcp_ctask->xmstate != XMSTATE_IDLE); - return 0; /* wait for Data-In */ + if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_INIT) { + tcp_ctask->sent = 0; + tcp_ctask->sg_count = 0; + tcp_ctask->exp_datasn = 0; + + if (sc->sc_data_direction == DMA_TO_DEVICE) { + struct scatterlist *sg = scsi_sglist(sc); + + iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg); + tcp_ctask->sg = sg + 1; + tcp_ctask->bad_sg = sg + scsi_sg_count(sc); + + debug_scsi("cmd [itt 0x%x total %d imm_data %d " + "unsol count %d, unsol offset %d]\n", + ctask->itt, scsi_bufflen(sc), + ctask->imm_count, ctask->unsol_count, + ctask->unsol_offset); } - tcp_ctask->xmstate |= XMSTATE_R_HDR; - return rc; -} -static inline int -iscsi_send_write_hdr(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) -{ - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - int rc; + iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr, + sizeof(struct iscsi_hdr)); - tcp_ctask->xmstate &= ~XMSTATE_W_HDR; if (conn->hdrdgst_en) iscsi_hdr_digest(conn, &tcp_ctask->headbuf, (u8*)tcp_ctask->hdrext); + tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_INIT; + tcp_ctask->xmstate |= XMSTATE_CMD_HDR_XMIT; + } + + if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_XMIT) { rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count); - if (rc) { - tcp_ctask->xmstate |= XMSTATE_W_HDR; + if (rc) return rc; - } + tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_XMIT; + + if (sc->sc_data_direction != DMA_TO_DEVICE) + return 0; if (ctask->imm_count) { tcp_ctask->xmstate |= XMSTATE_IMM_DATA; iscsi_set_padding(tcp_ctask, ctask->imm_count); if (ctask->conn->datadgst_en) { - iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask); + iscsi_data_digest_init(ctask->conn->dd_data, + tcp_ctask); tcp_ctask->immdigest = 0; } } if (ctask->unsol_count) - tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT; - return 0; + tcp_ctask->xmstate |= + XMSTATE_UNS_HDR | XMSTATE_UNS_INIT; + } + return rc; } static int @@ -1624,9 +1611,7 @@ struct iscsi_data_task *dtask; int left, rc; - if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) { - tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; + if (tcp_ctask->xmstate & XMSTATE_SOL_HDR_INIT) { if (!tcp_ctask->r2t) { spin_lock_bh(&session->lock); __kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t, @@ -1640,13 +1625,20 @@ if (conn->hdrdgst_en) iscsi_hdr_digest(conn, &r2t->headbuf, (u8*)dtask->hdrext); - rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); - if (rc) { - tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; + tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR_INIT; tcp_ctask->xmstate |= XMSTATE_SOL_HDR; - return rc; } + if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) { + r2t = tcp_ctask->r2t; + dtask = &r2t->dtask; + + rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); + if (rc) + return rc; + tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; + tcp_ctask->xmstate |= XMSTATE_SOL_DATA; + if (conn->datadgst_en) { iscsi_data_digest_init(conn->dd_data, tcp_ctask); dtask->digest = 0; @@ -1677,8 +1669,6 @@ left = r2t->data_length - r2t->sent; if (left) { iscsi_solicit_data_cont(conn, ctask, r2t, left); - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; - tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; goto send_hdr; } @@ -1693,8 +1683,6 @@ if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) { tcp_ctask->r2t = r2t; - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; - tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; spin_unlock_bh(&session->lock); goto send_hdr; } @@ -1703,6 +1691,46 @@ return 0; } +/** + * iscsi_tcp_ctask_xmit - xmit normal PDU task + * @conn: iscsi connection + * @ctask: iscsi command task + * + * Notes: + * The function can return -EAGAIN in which case caller must + * call it again later, or recover. '0' return code means successful + * xmit. + * The function is devided to logical helpers (above) for the different + * xmit stages. + * + *iscsi_send_cmd_hdr() + * XMSTATE_CMD_HDR_INIT - prepare Header and Data buffers Calculate + * Header Digest + * XMSTATE_CMD_HDR_XMIT - Transmit header in progress + * + *iscsi_send_padding + * XMSTATE_W_PAD - Prepare and send pading + * XMSTATE_W_RESEND_PAD - retry send pading + * + *iscsi_send_digest + * XMSTATE_W_RESEND_DATA_DIGEST - Finalize and send Data Digest + * XMSTATE_W_RESEND_DATA_DIGEST - retry sending digest + * + *iscsi_send_unsol_hdr + * XMSTATE_UNS_INIT - prepare un-solicit data header and digest + * XMSTATE_UNS_HDR - send un-solicit header + * + *iscsi_send_unsol_pdu + * XMSTATE_UNS_DATA - send un-solicit data in progress + * + *iscsi_send_sol_pdu + * XMSTATE_SOL_HDR_INIT - solicit data header and digest initialize + * XMSTATE_SOL_HDR - send solicit header + * XMSTATE_SOL_DATA - send solicit data + * + *iscsi_tcp_ctask_xmit + * XMSTATE_IMM_DATA - xmit managment data (??) + **/ static int iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { @@ -1712,20 +1740,11 @@ debug_scsi("ctask deq [cid %d xmstate %x itt 0x%x]\n", conn->id, tcp_ctask->xmstate, ctask->itt); - /* - * serialize with TMF AbortTask - */ - if (ctask->mtask) - return rc; - - if (tcp_ctask->xmstate & XMSTATE_R_HDR) - return iscsi_send_read_hdr(conn, tcp_ctask); - - if (tcp_ctask->xmstate & XMSTATE_W_HDR) { - rc = iscsi_send_write_hdr(conn, ctask); + rc = iscsi_send_cmd_hdr(conn, ctask); if (rc) return rc; - } + if (ctask->sc->sc_data_direction != DMA_TO_DEVICE) + return 0; if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) { rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg, @@ -1810,18 +1829,22 @@ static void iscsi_tcp_release_conn(struct iscsi_conn *conn) { + struct iscsi_session *session = conn->session; struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct socket *sock = tcp_conn->sock; - if (!tcp_conn->sock) + if (!sock) return; - sock_hold(tcp_conn->sock->sk); + sock_hold(sock->sk); iscsi_conn_restore_callbacks(tcp_conn); - sock_put(tcp_conn->sock->sk); + sock_put(sock->sk); - sock_release(tcp_conn->sock); + spin_lock_bh(&session->lock); tcp_conn->sock = NULL; conn->recv_lock = NULL; + spin_unlock_bh(&session->lock); + sockfd_put(sock); } static void @@ -1852,6 +1875,46 @@ tcp_conn->hdr_size = sizeof(struct iscsi_hdr); } +static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock, + char *buf, int *port, + int (*getname)(struct socket *, struct sockaddr *, + int *addrlen)) +{ + struct sockaddr_storage *addr; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int rc = 0, len; + + addr = kmalloc(GFP_KERNEL, sizeof(*addr)); + if (!addr) + return -ENOMEM; + + if (getname(sock, (struct sockaddr *) addr, &len)) { + rc = -ENODEV; + goto free_addr; + } + + switch (addr->ss_family) { + case AF_INET: + sin = (struct sockaddr_in *)addr; + spin_lock_bh(&conn->session->lock); + sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); + *port = be16_to_cpu(sin->sin_port); + spin_unlock_bh(&conn->session->lock); + break; + case AF_INET6: + sin6 = (struct sockaddr_in6 *)addr; + spin_lock_bh(&conn->session->lock); + sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr)); + *port = be16_to_cpu(sin6->sin6_port); + spin_unlock_bh(&conn->session->lock); + break; + } +free_addr: + kfree(addr); + return rc; +} + static int iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, @@ -1869,10 +1932,24 @@ printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err); return -EEXIST; } + /* + * copy these values now because if we drop the session + * userspace may still want to query the values since we will + * be using them for the reconnect + */ + err = iscsi_tcp_get_addr(conn, sock, conn->portal_address, + &conn->portal_port, kernel_getpeername); + if (err) + goto free_socket; + + err = iscsi_tcp_get_addr(conn, sock, conn->local_address, + &conn->local_port, kernel_getsockname); + if (err) + goto free_socket; err = iscsi_conn_bind(cls_session, cls_conn, is_leading); if (err) - return err; + goto free_socket; /* bind iSCSI connection and socket */ tcp_conn->sock = sock; @@ -1896,25 +1973,19 @@ * set receive state machine into initial state */ tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; - return 0; + +free_socket: + sockfd_put(sock); + return err; } /* called with host lock */ static void -iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask, - char *data, uint32_t data_size) +iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask) { struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data; - - iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr, - sizeof(struct iscsi_hdr)); - tcp_mtask->xmstate = XMSTATE_IMM_HDR; - tcp_mtask->sent = 0; - - if (mtask->data_count) - iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data, - mtask->data_count); + tcp_mtask->xmstate = XMSTATE_IMM_HDR_INIT; } static int @@ -2026,41 +2097,18 @@ enum iscsi_param param, char *buf) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - struct inet_sock *inet; - struct ipv6_pinfo *np; - struct sock *sk; int len; switch(param) { case ISCSI_PARAM_CONN_PORT: - mutex_lock(&conn->xmitmutex); - if (!tcp_conn->sock) { - mutex_unlock(&conn->xmitmutex); - return -EINVAL; - } - - inet = inet_sk(tcp_conn->sock->sk); - len = sprintf(buf, "%hu\n", be16_to_cpu(inet->dport)); - mutex_unlock(&conn->xmitmutex); + spin_lock_bh(&conn->session->lock); + len = sprintf(buf, "%hu\n", conn->portal_port); + spin_unlock_bh(&conn->session->lock); break; case ISCSI_PARAM_CONN_ADDRESS: - mutex_lock(&conn->xmitmutex); - if (!tcp_conn->sock) { - mutex_unlock(&conn->xmitmutex); - return -EINVAL; - } - - sk = tcp_conn->sock->sk; - if (sk->sk_family == PF_INET) { - inet = inet_sk(sk); - len = sprintf(buf, NIPQUAD_FMT "\n", - NIPQUAD(inet->daddr)); - } else { - np = inet6_sk(sk); - len = sprintf(buf, NIP6_FMT "\n", NIP6(np->daddr)); - } - mutex_unlock(&conn->xmitmutex); + spin_lock_bh(&conn->session->lock); + len = sprintf(buf, "%s\n", conn->portal_address); + spin_unlock_bh(&conn->session->lock); break; default: return iscsi_conn_get_param(cls_conn, param, buf); @@ -2069,6 +2117,29 @@ return len; } +static int +iscsi_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, + char *buf) +{ + struct iscsi_session *session = iscsi_hostdata(shost->hostdata); + int len; + + switch (param) { + case ISCSI_HOST_PARAM_IPADDRESS: + spin_lock_bh(&session->lock); + if (!session->leadconn) + len = -ENODEV; + else + len = sprintf(buf, "%s\n", + session->leadconn->local_address); + spin_unlock_bh(&session->lock); + break; + default: + return iscsi_host_get_param(shost, param, buf); + } + return len; +} + static void iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) { @@ -2096,6 +2167,7 @@ static struct iscsi_cls_session * iscsi_tcp_session_create(struct iscsi_transport *iscsit, struct scsi_transport_template *scsit, + uint16_t cmds_max, uint16_t qdepth, uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; @@ -2103,7 +2175,7 @@ uint32_t hn; int cmd_i; - cls_session = iscsi_session_setup(iscsit, scsit, + cls_session = iscsi_session_setup(iscsit, scsit, cmds_max, qdepth, sizeof(struct iscsi_tcp_cmd_task), sizeof(struct iscsi_tcp_mgmt_task), initial_cmdsn, &hn); @@ -2142,17 +2214,24 @@ iscsi_session_teardown(cls_session); } +static int iscsi_tcp_slave_configure(struct scsi_device *sdev) +{ + blk_queue_dma_alignment(sdev->request_queue, 0); + return 0; +} + static struct scsi_host_template iscsi_sht = { .name = "iSCSI Initiator over TCP/IP", .queuecommand = iscsi_queuecommand, .change_queue_depth = iscsi_change_queue_depth, - .can_queue = ISCSI_XMIT_CMDS_MAX - 1, + .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_SG_TABLESIZE, .max_sectors = 0xFFFF, .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN, .eh_abort_handler = iscsi_eh_abort, .eh_host_reset_handler = iscsi_eh_host_reset, .use_clustering = DISABLE_CLUSTERING, + .slave_configure = iscsi_tcp_slave_configure, .proc_name = "iscsi_tcp", .this_id = -1, }; @@ -2179,8 +2258,12 @@ ISCSI_EXP_STATSN | ISCSI_PERSISTENT_PORT | ISCSI_PERSISTENT_ADDRESS | - ISCSI_TARGET_NAME | - ISCSI_TPGT, + ISCSI_TARGET_NAME | ISCSI_TPGT | + ISCSI_USERNAME | ISCSI_PASSWORD | + ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN, + .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | + ISCSI_HOST_INITIATOR_NAME | + ISCSI_HOST_NETDEV_NAME, .host_template = &iscsi_sht, .conndata_size = sizeof(struct iscsi_conn), .max_conn = 1, @@ -2197,6 +2280,9 @@ .get_session_param = iscsi_session_get_param, .start_conn = iscsi_conn_start, .stop_conn = iscsi_tcp_conn_stop, + /* iscsi host params */ + .get_host_param = iscsi_tcp_host_get_param, + .set_host_param = iscsi_host_set_param, /* IO */ .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_conn_get_stats, diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.h linux-2.6.22-591/drivers/scsi/iscsi_tcp.h --- linux-2.6.22-570/drivers/scsi/iscsi_tcp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/iscsi_tcp.h 2007-12-21 15:36:12.000000000 -0500 @@ -29,11 +29,12 @@ #define IN_PROGRESS_HEADER_GATHER 0x1 #define IN_PROGRESS_DATA_RECV 0x2 #define IN_PROGRESS_DDIGEST_RECV 0x3 +#define IN_PROGRESS_PAD_RECV 0x4 /* xmit state machine */ #define XMSTATE_IDLE 0x0 -#define XMSTATE_R_HDR 0x1 -#define XMSTATE_W_HDR 0x2 +#define XMSTATE_CMD_HDR_INIT 0x1 +#define XMSTATE_CMD_HDR_XMIT 0x2 #define XMSTATE_IMM_HDR 0x4 #define XMSTATE_IMM_DATA 0x8 #define XMSTATE_UNS_INIT 0x10 @@ -44,6 +45,8 @@ #define XMSTATE_W_PAD 0x200 #define XMSTATE_W_RESEND_PAD 0x400 #define XMSTATE_W_RESEND_DATA_DIGEST 0x800 +#define XMSTATE_IMM_HDR_INIT 0x1000 +#define XMSTATE_SOL_HDR_INIT 0x2000 #define ISCSI_PAD_LEN 4 #define ISCSI_SG_TABLESIZE SG_ALL @@ -152,7 +155,7 @@ struct scatterlist *sg; /* per-cmd SG list */ struct scatterlist *bad_sg; /* assert statement */ int sg_count; /* SG's to process */ - uint32_t exp_r2tsn; + uint32_t exp_datasn; /* expected target's R2TSN/DataSN */ int data_offset; struct iscsi_r2t_info *r2t; /* in progress R2T */ struct iscsi_queue r2tpool; diff -Nurb linux-2.6.22-570/drivers/scsi/jazz_esp.c linux-2.6.22-591/drivers/scsi/jazz_esp.c --- linux-2.6.22-570/drivers/scsi/jazz_esp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/jazz_esp.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,6 +1,6 @@ /* jazz_esp.c: ESP front-end for MIPS JAZZ systems. * - * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende) + * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende) */ #include @@ -143,7 +143,7 @@ goto fail; host->max_id = 8; - esp = host_to_esp(host); + esp = shost_priv(host); esp->host = host; esp->dev = dev; diff -Nurb linux-2.6.22-570/drivers/scsi/libiscsi.c linux-2.6.22-591/drivers/scsi/libiscsi.c --- linux-2.6.22-570/drivers/scsi/libiscsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/libiscsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -22,7 +22,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include -#include #include #include #include @@ -46,27 +45,53 @@ } EXPORT_SYMBOL_GPL(class_to_transport_session); -#define INVALID_SN_DELTA 0xffff +/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ +#define SNA32_CHECK 2147483648UL -int -iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) +static int iscsi_sna_lt(u32 n1, u32 n2) +{ + return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || + (n1 > n2 && (n2 - n1 < SNA32_CHECK))); +} + +/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ +static int iscsi_sna_lte(u32 n1, u32 n2) +{ + return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || + (n1 > n2 && (n2 - n1 < SNA32_CHECK))); +} + +void +iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) { uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn); uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn); - if (max_cmdsn < exp_cmdsn -1 && - max_cmdsn > exp_cmdsn - INVALID_SN_DELTA) - return ISCSI_ERR_MAX_CMDSN; - if (max_cmdsn > session->max_cmdsn || - max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA) - session->max_cmdsn = max_cmdsn; - if (exp_cmdsn > session->exp_cmdsn || - exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA) + /* + * standard specifies this check for when to update expected and + * max sequence numbers + */ + if (iscsi_sna_lt(max_cmdsn, exp_cmdsn - 1)) + return; + + if (exp_cmdsn != session->exp_cmdsn && + !iscsi_sna_lt(exp_cmdsn, session->exp_cmdsn)) session->exp_cmdsn = exp_cmdsn; - return 0; + if (max_cmdsn != session->max_cmdsn && + !iscsi_sna_lt(max_cmdsn, session->max_cmdsn)) { + session->max_cmdsn = max_cmdsn; + /* + * if the window closed with IO queued, then kick the + * xmit thread + */ + if (!list_empty(&session->leadconn->xmitqueue) || + __kfifo_len(session->leadconn->mgmtqueue)) + scsi_queue_work(session->host, + &session->leadconn->xmitwork); + } } -EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn); +EXPORT_SYMBOL_GPL(iscsi_update_cmdsn); void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask, struct iscsi_data *hdr) @@ -115,14 +140,17 @@ hdr->flags = ISCSI_ATTR_SIMPLE; int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun); hdr->itt = build_itt(ctask->itt, conn->id, session->age); - hdr->data_length = cpu_to_be32(sc->request_bufflen); + hdr->data_length = cpu_to_be32(scsi_bufflen(sc)); hdr->cmdsn = cpu_to_be32(session->cmdsn); session->cmdsn++; hdr->exp_statsn = cpu_to_be32(conn->exp_statsn); memcpy(hdr->cdb, sc->cmnd, sc->cmd_len); - memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len); + if (sc->cmd_len < MAX_COMMAND_SIZE) + memset(&hdr->cdb[sc->cmd_len], 0, + MAX_COMMAND_SIZE - sc->cmd_len); ctask->data_count = 0; + ctask->imm_count = 0; if (sc->sc_data_direction == DMA_TO_DEVICE) { hdr->flags |= ISCSI_FLAG_CMD_WRITE; /* @@ -139,25 +167,24 @@ * * pad_count bytes to be sent as zero-padding */ - ctask->imm_count = 0; ctask->unsol_count = 0; ctask->unsol_offset = 0; ctask->unsol_datasn = 0; if (session->imm_data_en) { - if (ctask->total_length >= session->first_burst) + if (scsi_bufflen(sc) >= session->first_burst) ctask->imm_count = min(session->first_burst, conn->max_xmit_dlength); else - ctask->imm_count = min(ctask->total_length, + ctask->imm_count = min(scsi_bufflen(sc), conn->max_xmit_dlength); hton24(ctask->hdr->dlength, ctask->imm_count); } else zero_data(ctask->hdr->dlength); if (!session->initial_r2t_en) { - ctask->unsol_count = min(session->first_burst, - ctask->total_length) - ctask->imm_count; + ctask->unsol_count = min((session->first_burst), + (scsi_bufflen(sc))) - ctask->imm_count; ctask->unsol_offset = ctask->imm_count; } @@ -165,7 +192,6 @@ /* No unsolicit Data-Out's */ ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL; } else { - ctask->datasn = 0; hdr->flags |= ISCSI_FLAG_CMD_FINAL; zero_data(hdr->dlength); @@ -174,8 +200,13 @@ } conn->scsicmd_pdus_cnt++; + + debug_scsi("iscsi prep [%s cid %d sc %p cdb 0x%x itt 0x%x len %d " + "cmdsn %d win %d]\n", + sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", + conn->id, sc, sc->cmnd[0], ctask->itt, scsi_bufflen(sc), + session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1); } -EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu); /** * iscsi_complete_command - return command back to scsi-ml @@ -204,26 +235,12 @@ atomic_inc(&ctask->refcount); } -static void iscsi_get_ctask(struct iscsi_cmd_task *ctask) -{ - spin_lock_bh(&ctask->conn->session->lock); - __iscsi_get_ctask(ctask); - spin_unlock_bh(&ctask->conn->session->lock); -} - static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask) { if (atomic_dec_and_test(&ctask->refcount)) iscsi_complete_command(ctask); } -static void iscsi_put_ctask(struct iscsi_cmd_task *ctask) -{ - spin_lock_bh(&ctask->conn->session->lock); - __iscsi_put_ctask(ctask); - spin_unlock_bh(&ctask->conn->session->lock); -} - /** * iscsi_cmd_rsp - SCSI Command Response processing * @conn: iscsi connection @@ -235,21 +252,15 @@ * iscsi_cmd_rsp sets up the scsi_cmnd fields based on the PDU and * then completes the command and task. **/ -static int iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, +static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, struct iscsi_cmd_task *ctask, char *data, int datalen) { - int rc; struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr; struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = ctask->sc; - rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr); - if (rc) { - sc->result = DID_ERROR << 16; - goto out; - } - + iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1; sc->result = (DID_OK << 16) | rhdr->cmd_status; @@ -286,14 +297,14 @@ if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) { int res_count = be32_to_cpu(rhdr->residual_count); - if (res_count > 0 && res_count <= sc->request_bufflen) - sc->resid = res_count; + if (res_count > 0 && res_count <= scsi_bufflen(sc)) + scsi_set_resid(sc, res_count); else sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; } else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW) sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW) - sc->resid = be32_to_cpu(rhdr->residual_count); + scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count)); out: debug_scsi("done [sc %lx res %d itt 0x%x]\n", @@ -301,7 +312,6 @@ conn->scsirsp_pdus_cnt++; __iscsi_put_ctask(ctask); - return rc; } static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) @@ -381,7 +391,7 @@ switch(opcode) { case ISCSI_OP_SCSI_CMD_RSP: BUG_ON((void*)ctask != ctask->sc->SCp.ptr); - rc = iscsi_scsi_cmd_rsp(conn, hdr, ctask, data, + iscsi_scsi_cmd_rsp(conn, hdr, ctask, data, datalen); break; case ISCSI_OP_SCSI_DATA_IN: @@ -405,11 +415,7 @@ debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n", opcode, conn->id, mtask->itt, datalen); - rc = iscsi_check_assign_cmdsn(session, - (struct iscsi_nopin*)hdr); - if (rc) - goto done; - + iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); switch(opcode) { case ISCSI_OP_LOGOUT_RSP: if (datalen) { @@ -458,10 +464,7 @@ break; } } else if (itt == ~0U) { - rc = iscsi_check_assign_cmdsn(session, - (struct iscsi_nopin*)hdr); - if (rc) - goto done; + iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); switch(opcode) { case ISCSI_OP_NOOP_IN: @@ -491,7 +494,6 @@ } else rc = ISCSI_ERR_BAD_ITT; -done: return rc; } EXPORT_SYMBOL_GPL(__iscsi_complete_pdu); @@ -578,17 +580,47 @@ } EXPORT_SYMBOL_GPL(iscsi_conn_failure); +static void iscsi_prep_mtask(struct iscsi_conn *conn, + struct iscsi_mgmt_task *mtask) +{ + struct iscsi_session *session = conn->session; + struct iscsi_hdr *hdr = mtask->hdr; + struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr; + + if (hdr->opcode != (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) && + hdr->opcode != (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE)) + nop->exp_statsn = cpu_to_be32(conn->exp_statsn); + /* + * pre-format CmdSN for outgoing PDU. + */ + nop->cmdsn = cpu_to_be32(session->cmdsn); + if (hdr->itt != RESERVED_ITT) { + hdr->itt = build_itt(mtask->itt, conn->id, session->age); + if (conn->c_stage == ISCSI_CONN_STARTED && + !(hdr->opcode & ISCSI_OP_IMMEDIATE)) + session->cmdsn++; + } + + if (session->tt->init_mgmt_task) + session->tt->init_mgmt_task(conn, mtask); + + debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n", + hdr->opcode, hdr->itt, mtask->data_count); +} + static int iscsi_xmit_mtask(struct iscsi_conn *conn) { struct iscsi_hdr *hdr = conn->mtask->hdr; int rc, was_logout = 0; + spin_unlock_bh(&conn->session->lock); if ((hdr->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT) { conn->session->state = ISCSI_STATE_IN_RECOVERY; iscsi_block_session(session_to_cls(conn->session)); was_logout = 1; } rc = conn->session->tt->xmit_mgmt_task(conn, conn->mtask); + spin_lock_bh(&conn->session->lock); if (rc) return rc; @@ -602,6 +634,45 @@ return 0; } +static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn) +{ + struct iscsi_session *session = conn->session; + + /* + * Check for iSCSI window and take care of CmdSN wrap-around + */ + if (!iscsi_sna_lte(session->cmdsn, session->max_cmdsn)) { + debug_scsi("iSCSI CmdSN closed. MaxCmdSN %u CmdSN %u\n", + session->max_cmdsn, session->cmdsn); + return -ENOSPC; + } + return 0; +} + +static int iscsi_xmit_ctask(struct iscsi_conn *conn) +{ + struct iscsi_cmd_task *ctask = conn->ctask; + int rc = 0; + + /* + * serialize with TMF AbortTask + */ + if (ctask->state == ISCSI_TASK_ABORTING) + goto done; + + __iscsi_get_ctask(ctask); + spin_unlock_bh(&conn->session->lock); + rc = conn->session->tt->xmit_cmd_task(conn, ctask); + spin_lock_bh(&conn->session->lock); + __iscsi_put_ctask(ctask); + +done: + if (!rc) + /* done with this ctask */ + conn->ctask = NULL; + return rc; +} + /** * iscsi_data_xmit - xmit any command into the scheduled connection * @conn: iscsi connection @@ -613,106 +684,79 @@ **/ static int iscsi_data_xmit(struct iscsi_conn *conn) { - struct iscsi_transport *tt; int rc = 0; + spin_lock_bh(&conn->session->lock); if (unlikely(conn->suspend_tx)) { debug_scsi("conn %d Tx suspended!\n", conn->id); + spin_unlock_bh(&conn->session->lock); return -ENODATA; } - tt = conn->session->tt; - - /* - * Transmit in the following order: - * - * 1) un-finished xmit (ctask or mtask) - * 2) immediate control PDUs - * 3) write data - * 4) SCSI commands - * 5) non-immediate control PDUs - * - * No need to lock around __kfifo_get as long as - * there's one producer and one consumer. - */ - - BUG_ON(conn->ctask && conn->mtask); if (conn->ctask) { - iscsi_get_ctask(conn->ctask); - rc = tt->xmit_cmd_task(conn, conn->ctask); - iscsi_put_ctask(conn->ctask); + rc = iscsi_xmit_ctask(conn); if (rc) goto again; - /* done with this in-progress ctask */ - conn->ctask = NULL; } + if (conn->mtask) { rc = iscsi_xmit_mtask(conn); if (rc) goto again; } - /* process immediate first */ - if (unlikely(__kfifo_len(conn->immqueue))) { - while (__kfifo_get(conn->immqueue, (void*)&conn->mtask, + /* + * process mgmt pdus like nops before commands since we should + * only have one nop-out as a ping from us and targets should not + * overflow us with nop-ins + */ +check_mgmt: + while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask, sizeof(void*))) { - spin_lock_bh(&conn->session->lock); - list_add_tail(&conn->mtask->running, - &conn->mgmt_run_list); - spin_unlock_bh(&conn->session->lock); + iscsi_prep_mtask(conn, conn->mtask); + list_add_tail(&conn->mtask->running, &conn->mgmt_run_list); rc = iscsi_xmit_mtask(conn); if (rc) goto again; } - } /* process command queue */ - spin_lock_bh(&conn->session->lock); while (!list_empty(&conn->xmitqueue)) { + rc = iscsi_check_cmdsn_window_closed(conn); + if (rc) { + spin_unlock_bh(&conn->session->lock); + return rc; + } /* * iscsi tcp may readd the task to the xmitqueue to send * write data */ conn->ctask = list_entry(conn->xmitqueue.next, struct iscsi_cmd_task, running); + if (conn->ctask->state == ISCSI_TASK_PENDING) { + iscsi_prep_scsi_cmd_pdu(conn->ctask); + conn->session->tt->init_cmd_task(conn->ctask); + } conn->ctask->state = ISCSI_TASK_RUNNING; list_move_tail(conn->xmitqueue.next, &conn->run_list); - __iscsi_get_ctask(conn->ctask); - spin_unlock_bh(&conn->session->lock); - - rc = tt->xmit_cmd_task(conn, conn->ctask); - - spin_lock_bh(&conn->session->lock); - __iscsi_put_ctask(conn->ctask); - if (rc) { - spin_unlock_bh(&conn->session->lock); - goto again; - } - } - spin_unlock_bh(&conn->session->lock); - /* done with this ctask */ - conn->ctask = NULL; - - /* process the rest control plane PDUs, if any */ - if (unlikely(__kfifo_len(conn->mgmtqueue))) { - while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask, - sizeof(void*))) { - spin_lock_bh(&conn->session->lock); - list_add_tail(&conn->mtask->running, - &conn->mgmt_run_list); - spin_unlock_bh(&conn->session->lock); - rc = iscsi_xmit_mtask(conn); + rc = iscsi_xmit_ctask(conn); if (rc) goto again; + /* + * we could continuously get new ctask requests so + * we need to check the mgmt queue for nops that need to + * be sent to aviod starvation + */ + if (__kfifo_len(conn->mgmtqueue)) + goto check_mgmt; } - } - + spin_unlock_bh(&conn->session->lock); return -ENODATA; again: if (unlikely(conn->suspend_tx)) - return -ENODATA; - + rc = -ENODATA; + spin_unlock_bh(&conn->session->lock); return rc; } @@ -724,11 +768,9 @@ /* * serialize Xmit worker on a per-connection basis. */ - mutex_lock(&conn->xmitmutex); do { rc = iscsi_data_xmit(conn); } while (rc >= 0 || rc == -EAGAIN); - mutex_unlock(&conn->xmitmutex); } enum { @@ -786,20 +828,23 @@ goto fault; } - /* - * Check for iSCSI window and take care of CmdSN wrap-around - */ - if ((int)(session->max_cmdsn - session->cmdsn) < 0) { - reason = FAILURE_WINDOW_CLOSED; - goto reject; - } - conn = session->leadconn; if (!conn) { reason = FAILURE_SESSION_FREED; goto fault; } + /* + * We check this here and in data xmit, because if we get to the point + * that this check is hitting the window then we have enough IO in + * flight and enough IO waiting to be transmitted it is better + * to let the scsi/block layer queue up. + */ + if (iscsi_check_cmdsn_window_closed(conn)) { + reason = FAILURE_WINDOW_CLOSED; + goto reject; + } + if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*))) { reason = FAILURE_OOM; @@ -814,18 +859,8 @@ ctask->conn = conn; ctask->sc = sc; INIT_LIST_HEAD(&ctask->running); - ctask->total_length = sc->request_bufflen; - iscsi_prep_scsi_cmd_pdu(ctask); - - session->tt->init_cmd_task(ctask); list_add_tail(&ctask->running, &conn->xmitqueue); - debug_scsi( - "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d " - "win %d]\n", - sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", - conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen, - session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1); spin_unlock(&session->lock); scsi_queue_work(host, &conn->xmitwork); @@ -841,7 +876,7 @@ printk(KERN_ERR "iscsi: cmd 0x%x is not queued (%d)\n", sc->cmnd[0], reason); sc->result = (DID_NO_CONNECT << 16); - sc->resid = sc->request_bufflen; + scsi_set_resid(sc, scsi_bufflen(sc)); sc->scsi_done(sc); return 0; } @@ -856,19 +891,16 @@ } EXPORT_SYMBOL_GPL(iscsi_change_queue_depth); -static int -iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr, +static struct iscsi_mgmt_task * +__iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size) { struct iscsi_session *session = conn->session; - struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr; struct iscsi_mgmt_task *mtask; - spin_lock_bh(&session->lock); - if (session->state == ISCSI_STATE_TERMINATE) { - spin_unlock_bh(&session->lock); - return -EPERM; - } + if (session->state == ISCSI_STATE_TERMINATE) + return NULL; + if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) || hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE)) /* @@ -882,27 +914,11 @@ BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE); BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED); - nop->exp_statsn = cpu_to_be32(conn->exp_statsn); if (!__kfifo_get(session->mgmtpool.queue, - (void*)&mtask, sizeof(void*))) { - spin_unlock_bh(&session->lock); - return -ENOSPC; - } + (void*)&mtask, sizeof(void*))) + return NULL; } - /* - * pre-format CmdSN for outgoing PDU. - */ - if (hdr->itt != RESERVED_ITT) { - hdr->itt = build_itt(mtask->itt, conn->id, session->age); - nop->cmdsn = cpu_to_be32(session->cmdsn); - if (conn->c_stage == ISCSI_CONN_STARTED && - !(hdr->opcode & ISCSI_OP_IMMEDIATE)) - session->cmdsn++; - } else - /* do not advance CmdSN */ - nop->cmdsn = cpu_to_be32(session->cmdsn); - if (data_size) { memcpy(mtask->data, data, data_size); mtask->data_count = data_size; @@ -911,38 +927,23 @@ INIT_LIST_HEAD(&mtask->running); memcpy(mtask->hdr, hdr, sizeof(struct iscsi_hdr)); - if (session->tt->init_mgmt_task) - session->tt->init_mgmt_task(conn, mtask, data, data_size); - spin_unlock_bh(&session->lock); - - debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n", - hdr->opcode, hdr->itt, data_size); - - /* - * since send_pdu() could be called at least from two contexts, - * we need to serialize __kfifo_put, so we don't have to take - * additional lock on fast data-path - */ - if (hdr->opcode & ISCSI_OP_IMMEDIATE) - __kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*)); - else __kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*)); - - scsi_queue_work(session->host, &conn->xmitwork); - return 0; + return mtask; } int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size) { struct iscsi_conn *conn = cls_conn->dd_data; - int rc; - - mutex_lock(&conn->xmitmutex); - rc = iscsi_conn_send_generic(conn, hdr, data, data_size); - mutex_unlock(&conn->xmitmutex); + struct iscsi_session *session = conn->session; + int err = 0; - return rc; + spin_lock_bh(&session->lock); + if (!__iscsi_conn_send_pdu(conn, hdr, data, data_size)) + err = -EPERM; + spin_unlock_bh(&session->lock); + scsi_queue_work(session->host, &conn->xmitwork); + return err; } EXPORT_SYMBOL_GPL(iscsi_conn_send_pdu); @@ -1027,14 +1028,12 @@ spin_unlock(&session->lock); } -/* must be called with the mutex lock */ static int iscsi_exec_abort_task(struct scsi_cmnd *sc, struct iscsi_cmd_task *ctask) { struct iscsi_conn *conn = ctask->conn; struct iscsi_session *session = conn->session; struct iscsi_tm *hdr = &conn->tmhdr; - int rc; /* * ctask timed out but session is OK requests must be serialized. @@ -1047,32 +1046,27 @@ hdr->rtt = ctask->hdr->itt; hdr->refcmdsn = ctask->hdr->cmdsn; - rc = iscsi_conn_send_generic(conn, (struct iscsi_hdr *)hdr, + ctask->mtask = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)hdr, NULL, 0); - if (rc) { + if (!ctask->mtask) { iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); - debug_scsi("abort sent failure [itt 0x%x] %d\n", ctask->itt, - rc); - return rc; + debug_scsi("abort sent failure [itt 0x%x]\n", ctask->itt); + return -EPERM; } + ctask->state = ISCSI_TASK_ABORTING; debug_scsi("abort sent [itt 0x%x]\n", ctask->itt); - spin_lock_bh(&session->lock); - ctask->mtask = (struct iscsi_mgmt_task *) - session->mgmt_cmds[get_itt(hdr->itt) - - ISCSI_MGMT_ITT_OFFSET]; - if (conn->tmabort_state == TMABORT_INITIAL) { conn->tmfcmd_pdus_cnt++; - conn->tmabort_timer.expires = 10*HZ + jiffies; + conn->tmabort_timer.expires = 20*HZ + jiffies; conn->tmabort_timer.function = iscsi_tmabort_timedout; conn->tmabort_timer.data = (unsigned long)ctask; add_timer(&conn->tmabort_timer); debug_scsi("abort set timeout [itt 0x%x]\n", ctask->itt); } spin_unlock_bh(&session->lock); - mutex_unlock(&conn->xmitmutex); + scsi_queue_work(session->host, &conn->xmitwork); /* * block eh thread until: @@ -1089,13 +1083,12 @@ if (signal_pending(current)) flush_signals(current); del_timer_sync(&conn->tmabort_timer); - - mutex_lock(&conn->xmitmutex); + spin_lock_bh(&session->lock); return 0; } /* - * xmit mutex and session lock must be held + * session lock must be held */ static struct iscsi_mgmt_task * iscsi_remove_mgmt_task(struct kfifo *fifo, uint32_t itt) @@ -1127,7 +1120,7 @@ if (!ctask->mtask) return -EINVAL; - if (!iscsi_remove_mgmt_task(conn->immqueue, ctask->mtask->itt)) + if (!iscsi_remove_mgmt_task(conn->mgmtqueue, ctask->mtask->itt)) list_del(&ctask->mtask->running); __kfifo_put(session->mgmtpool.queue, (void*)&ctask->mtask, sizeof(void*)); @@ -1136,7 +1129,7 @@ } /* - * session lock and xmitmutex must be held + * session lock must be held */ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, int err) @@ -1147,11 +1140,14 @@ if (!sc) return; + if (ctask->state != ISCSI_TASK_PENDING) conn->session->tt->cleanup_cmd_task(conn, ctask); iscsi_ctask_mtask_cleanup(ctask); sc->result = err; - sc->resid = sc->request_bufflen; + scsi_set_resid(sc, scsi_bufflen(sc)); + if (conn->ctask == ctask) + conn->ctask = NULL; /* release ref from queuecommand */ __iscsi_put_ctask(ctask); } @@ -1179,7 +1175,6 @@ conn->eh_abort_cnt++; debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt); - mutex_lock(&conn->xmitmutex); spin_lock_bh(&session->lock); /* @@ -1192,9 +1187,8 @@ /* ctask completed before time out */ if (!ctask->sc) { - spin_unlock_bh(&session->lock); debug_scsi("sc completed while abort in progress\n"); - goto success_rel_mutex; + goto success; } /* what should we do here ? */ @@ -1204,15 +1198,13 @@ goto failed; } - if (ctask->state == ISCSI_TASK_PENDING) - goto success_cleanup; + if (ctask->state == ISCSI_TASK_PENDING) { + fail_command(conn, ctask, DID_ABORT << 16); + goto success; + } conn->tmabort_state = TMABORT_INITIAL; - - spin_unlock_bh(&session->lock); rc = iscsi_exec_abort_task(sc, ctask); - spin_lock_bh(&session->lock); - if (rc || sc->SCp.phase != session->age || session->state != ISCSI_STATE_LOGGED_IN) goto failed; @@ -1220,45 +1212,44 @@ switch (conn->tmabort_state) { case TMABORT_SUCCESS: - goto success_cleanup; + spin_unlock_bh(&session->lock); + /* + * clean up task if aborted. grab the recv lock as a writer + */ + write_lock_bh(conn->recv_lock); + spin_lock(&session->lock); + fail_command(conn, ctask, DID_ABORT << 16); + spin_unlock(&session->lock); + write_unlock_bh(conn->recv_lock); + /* + * make sure xmit thread is not still touching the + * ctask/scsi_cmnd + */ + scsi_flush_work(session->host); + goto success_unlocked; case TMABORT_NOT_FOUND: if (!ctask->sc) { /* ctask completed before tmf abort response */ - spin_unlock_bh(&session->lock); debug_scsi("sc completed while abort in progress\n"); - goto success_rel_mutex; + goto success; } /* fall through */ default: /* timedout or failed */ spin_unlock_bh(&session->lock); iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); - spin_lock_bh(&session->lock); - goto failed; + goto failed_unlocked; } -success_cleanup: - debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt); +success: spin_unlock_bh(&session->lock); - - /* - * clean up task if aborted. we have the xmitmutex so grab - * the recv lock as a writer - */ - write_lock_bh(conn->recv_lock); - spin_lock(&session->lock); - fail_command(conn, ctask, DID_ABORT << 16); - spin_unlock(&session->lock); - write_unlock_bh(conn->recv_lock); - -success_rel_mutex: - mutex_unlock(&conn->xmitmutex); +success_unlocked: + debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt); return SUCCESS; failed: spin_unlock_bh(&session->lock); - mutex_unlock(&conn->xmitmutex); - +failed_unlocked: debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt); return FAILED; } @@ -1339,6 +1330,10 @@ * iscsi_session_setup - create iscsi cls session and host and session * @scsit: scsi transport template * @iscsit: iscsi transport template + * @cmds_max: scsi host can queue + * @qdepth: scsi host cmds per lun + * @cmd_task_size: LLD ctask private data size + * @mgmt_task_size: LLD mtask private data size * @initial_cmdsn: initial CmdSN * @hostno: host no allocated * @@ -1348,6 +1343,7 @@ struct iscsi_cls_session * iscsi_session_setup(struct iscsi_transport *iscsit, struct scsi_transport_template *scsit, + uint16_t cmds_max, uint16_t qdepth, int cmd_task_size, int mgmt_task_size, uint32_t initial_cmdsn, uint32_t *hostno) { @@ -1356,11 +1352,32 @@ struct iscsi_cls_session *cls_session; int cmd_i; + if (qdepth > ISCSI_MAX_CMD_PER_LUN || qdepth < 1) { + if (qdepth != 0) + printk(KERN_ERR "iscsi: invalid queue depth of %d. " + "Queue depth must be between 1 and %d.\n", + qdepth, ISCSI_MAX_CMD_PER_LUN); + qdepth = ISCSI_DEF_CMD_PER_LUN; + } + + if (cmds_max < 2 || (cmds_max & (cmds_max - 1)) || + cmds_max >= ISCSI_MGMT_ITT_OFFSET) { + if (cmds_max != 0) + printk(KERN_ERR "iscsi: invalid can_queue of %d. " + "can_queue must be a power of 2 and between " + "2 and %d - setting to %d.\n", cmds_max, + ISCSI_MGMT_ITT_OFFSET, ISCSI_DEF_XMIT_CMDS_MAX); + cmds_max = ISCSI_DEF_XMIT_CMDS_MAX; + } + shost = scsi_host_alloc(iscsit->host_template, hostdata_privsize(sizeof(*session))); if (!shost) return NULL; + /* the iscsi layer takes one task for reserve */ + shost->can_queue = cmds_max - 1; + shost->cmd_per_lun = qdepth; shost->max_id = 1; shost->max_channel = 0; shost->max_lun = iscsit->max_lun; @@ -1374,7 +1391,7 @@ session->host = shost; session->state = ISCSI_STATE_FREE; session->mgmtpool_max = ISCSI_MGMT_CMDS_MAX; - session->cmds_max = ISCSI_XMIT_CMDS_MAX; + session->cmds_max = cmds_max; session->cmdsn = initial_cmdsn; session->exp_cmdsn = initial_cmdsn + 1; session->max_cmdsn = initial_cmdsn + 1; @@ -1461,7 +1478,14 @@ iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds); iscsi_pool_free(&session->cmdpool, (void**)session->cmds); + kfree(session->password); + kfree(session->password_in); + kfree(session->username); + kfree(session->username_in); kfree(session->targetname); + kfree(session->netdev); + kfree(session->hwaddress); + kfree(session->initiatorname); iscsi_destroy_session(cls_session); scsi_host_put(shost); @@ -1499,11 +1523,6 @@ INIT_LIST_HEAD(&conn->xmitqueue); /* initialize general immediate & non-immediate PDU commands queue */ - conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*), - GFP_KERNEL, NULL); - if (conn->immqueue == ERR_PTR(-ENOMEM)) - goto immqueue_alloc_fail; - conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*), GFP_KERNEL, NULL); if (conn->mgmtqueue == ERR_PTR(-ENOMEM)) @@ -1527,7 +1546,6 @@ conn->login_mtask->data = conn->data = data; init_timer(&conn->tmabort_timer); - mutex_init(&conn->xmitmutex); init_waitqueue_head(&conn->ehwait); return cls_conn; @@ -1538,8 +1556,6 @@ login_mtask_alloc_fail: kfifo_free(conn->mgmtqueue); mgmtqueue_alloc_fail: - kfifo_free(conn->immqueue); -immqueue_alloc_fail: iscsi_destroy_conn(cls_conn); return NULL; } @@ -1558,10 +1574,8 @@ struct iscsi_session *session = conn->session; unsigned long flags; - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - mutex_lock(&conn->xmitmutex); - spin_lock_bh(&session->lock); + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); conn->c_stage = ISCSI_CONN_CLEANUP_WAIT; if (session->leadconn == conn) { /* @@ -1572,8 +1586,6 @@ } spin_unlock_bh(&session->lock); - mutex_unlock(&conn->xmitmutex); - /* * Block until all in-progress commands for this connection * time out or fail. @@ -1610,7 +1622,6 @@ } spin_unlock_bh(&session->lock); - kfifo_free(conn->immqueue); kfifo_free(conn->mgmtqueue); iscsi_destroy_conn(cls_conn); @@ -1671,8 +1682,7 @@ struct iscsi_mgmt_task *mtask, *tmp; /* handle pending */ - while (__kfifo_get(conn->immqueue, (void*)&mtask, sizeof(void*)) || - __kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) { + while (__kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) { if (mtask == conn->login_mtask) continue; debug_scsi("flushing pending mgmt task itt 0x%x\n", mtask->itt); @@ -1742,12 +1752,12 @@ conn->c_stage = ISCSI_CONN_STOPPED; set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); spin_unlock_bh(&session->lock); + scsi_flush_work(session->host); write_lock_bh(conn->recv_lock); set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); write_unlock_bh(conn->recv_lock); - mutex_lock(&conn->xmitmutex); /* * for connection level recovery we should not calculate * header digest. conn->hdr_size used for optimization @@ -1771,8 +1781,6 @@ fail_all_commands(conn); flush_control_queues(session, conn); spin_unlock_bh(&session->lock); - - mutex_unlock(&conn->xmitmutex); } void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) @@ -1867,6 +1875,30 @@ case ISCSI_PARAM_EXP_STATSN: sscanf(buf, "%u", &conn->exp_statsn); break; + case ISCSI_PARAM_USERNAME: + kfree(session->username); + session->username = kstrdup(buf, GFP_KERNEL); + if (!session->username) + return -ENOMEM; + break; + case ISCSI_PARAM_USERNAME_IN: + kfree(session->username_in); + session->username_in = kstrdup(buf, GFP_KERNEL); + if (!session->username_in) + return -ENOMEM; + break; + case ISCSI_PARAM_PASSWORD: + kfree(session->password); + session->password = kstrdup(buf, GFP_KERNEL); + if (!session->password) + return -ENOMEM; + break; + case ISCSI_PARAM_PASSWORD_IN: + kfree(session->password_in); + session->password_in = kstrdup(buf, GFP_KERNEL); + if (!session->password_in) + return -ENOMEM; + break; case ISCSI_PARAM_TARGET_NAME: /* this should not change between logins */ if (session->targetname) @@ -1940,6 +1972,18 @@ case ISCSI_PARAM_TPGT: len = sprintf(buf, "%d\n", session->tpgt); break; + case ISCSI_PARAM_USERNAME: + len = sprintf(buf, "%s\n", session->username); + break; + case ISCSI_PARAM_USERNAME_IN: + len = sprintf(buf, "%s\n", session->username_in); + break; + case ISCSI_PARAM_PASSWORD: + len = sprintf(buf, "%s\n", session->password); + break; + case ISCSI_PARAM_PASSWORD_IN: + len = sprintf(buf, "%s\n", session->password_in); + break; default: return -ENOSYS; } @@ -1990,6 +2034,66 @@ } EXPORT_SYMBOL_GPL(iscsi_conn_get_param); +int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, + char *buf) +{ + struct iscsi_session *session = iscsi_hostdata(shost->hostdata); + int len; + + switch (param) { + case ISCSI_HOST_PARAM_NETDEV_NAME: + if (!session->netdev) + len = sprintf(buf, "%s\n", "default"); + else + len = sprintf(buf, "%s\n", session->netdev); + break; + case ISCSI_HOST_PARAM_HWADDRESS: + if (!session->hwaddress) + len = sprintf(buf, "%s\n", "default"); + else + len = sprintf(buf, "%s\n", session->hwaddress); + break; + case ISCSI_HOST_PARAM_INITIATOR_NAME: + if (!session->initiatorname) + len = sprintf(buf, "%s\n", "unknown"); + else + len = sprintf(buf, "%s\n", session->initiatorname); + break; + + default: + return -ENOSYS; + } + + return len; +} +EXPORT_SYMBOL_GPL(iscsi_host_get_param); + +int iscsi_host_set_param(struct Scsi_Host *shost, enum iscsi_host_param param, + char *buf, int buflen) +{ + struct iscsi_session *session = iscsi_hostdata(shost->hostdata); + + switch (param) { + case ISCSI_HOST_PARAM_NETDEV_NAME: + if (!session->netdev) + session->netdev = kstrdup(buf, GFP_KERNEL); + break; + case ISCSI_HOST_PARAM_HWADDRESS: + if (!session->hwaddress) + session->hwaddress = kstrdup(buf, GFP_KERNEL); + break; + case ISCSI_HOST_PARAM_INITIATOR_NAME: + if (!session->initiatorname) + session->initiatorname = kstrdup(buf, GFP_KERNEL); + break; + default: + return -ENOSYS; + } + + return 0; +} +EXPORT_SYMBOL_GPL(iscsi_host_set_param); + MODULE_AUTHOR("Mike Christie"); MODULE_DESCRIPTION("iSCSI library functions"); MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c linux-2.6.22-591/drivers/scsi/libsas/sas_expander.c --- linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/libsas/sas_expander.c 2007-12-21 15:36:12.000000000 -0500 @@ -38,8 +38,10 @@ #if 0 /* FIXME: smp needs to migrate into the sas class */ -static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t); -static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t); +static ssize_t smp_portal_read(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); +static ssize_t smp_portal_write(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); #endif /* ---------- SMP task management ---------- */ @@ -1368,7 +1370,6 @@ memset(bin_attr, 0, sizeof(*bin_attr)); bin_attr->attr.name = SMP_BIN_ATTR_NAME; - bin_attr->attr.owner = THIS_MODULE; bin_attr->attr.mode = 0600; bin_attr->size = 0; @@ -1846,8 +1847,9 @@ #if 0 /* ---------- SMP portal ---------- */ -static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs, - size_t size) +static ssize_t smp_portal_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t offs, size_t size) { struct domain_device *dev = to_dom_device(kobj); struct expander_device *ex = &dev->ex_dev; @@ -1873,8 +1875,9 @@ return size; } -static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs, - size_t size) +static ssize_t smp_portal_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t offs, size_t size) { struct domain_device *dev = to_dom_device(kobj); struct expander_device *ex = &dev->ex_dev; diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c linux-2.6.22-591/drivers/scsi/libsas/sas_scsi_host.c --- linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/libsas/sas_scsi_host.c 2007-12-21 15:36:12.000000000 -0500 @@ -40,6 +40,7 @@ #include #include +#include #include /* ---------- SCSI Host glue ---------- */ @@ -76,8 +77,8 @@ hs = DID_NO_CONNECT; break; case SAS_DATA_UNDERRUN: - sc->resid = ts->residual; - if (sc->request_bufflen - sc->resid < sc->underflow) + scsi_set_resid(sc, ts->residual); + if (scsi_bufflen(sc) - scsi_get_resid(sc) < sc->underflow) hs = DID_ERROR; break; case SAS_DATA_OVERRUN: @@ -161,9 +162,9 @@ task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); memcpy(task->ssp_task.cdb, cmd->cmnd, 16); - task->scatter = cmd->request_buffer; - task->num_scatter = cmd->use_sg; - task->total_xfer_len = cmd->request_bufflen; + task->scatter = scsi_sglist(cmd); + task->num_scatter = scsi_sg_count(cmd); + task->total_xfer_len = scsi_bufflen(cmd); task->data_dir = cmd->sc_data_direction; task->task_done = sas_scsi_task_done; @@ -868,8 +869,6 @@ { struct sas_ha_struct *sas_ha = _sas_ha; - current->flags |= PF_NOFREEZE; - while (1) { set_current_state(TASK_INTERRUPTIBLE); schedule(); diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/Makefile linux-2.6.22-591/drivers/scsi/lpfc/Makefile --- linux-2.6.22-570/drivers/scsi/lpfc/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -1,7 +1,7 @@ #/******************************************************************* # * This file is part of the Emulex Linux Device Driver for * # * Fibre Channel Host Bus Adapters. * -# * Copyright (C) 2004-2005 Emulex. All rights reserved. * +# * Copyright (C) 2004-2006 Emulex. All rights reserved. * # * EMULEX and SLI are trademarks of Emulex. * # * www.emulex.com * # * * @@ -27,4 +27,5 @@ obj-$(CONFIG_SCSI_LPFC) := lpfc.o lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o lpfc_hbadisc.o \ - lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o + lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o \ + lpfc_vport.o lpfc_debugfs.o diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc.h 2007-12-21 15:36:12.000000000 -0500 @@ -19,8 +19,9 @@ * included with this package. * *******************************************************************/ -struct lpfc_sli2_slim; +#include +struct lpfc_sli2_slim; #define LPFC_MAX_TARGET 256 /* max number of targets supported */ #define LPFC_MAX_DISC_THREADS 64 /* max outstanding discovery els @@ -32,6 +33,20 @@ #define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */ #define LPFC_Q_RAMP_UP_INTERVAL 120 /* lun q_depth ramp up interval */ +/* + * Following time intervals are used of adjusting SCSI device + * queue depths when there are driver resource error or Firmware + * resource error. + */ +#define QUEUE_RAMP_DOWN_INTERVAL (1 * HZ) /* 1 Second */ +#define QUEUE_RAMP_UP_INTERVAL (300 * HZ) /* 5 minutes */ + +/* Number of exchanges reserved for discovery to complete */ +#define LPFC_DISC_IOCB_BUFF_COUNT 20 + +#define LPFC_HB_MBOX_INTERVAL 5 /* Heart beat interval in seconds. */ +#define LPFC_HB_MBOX_TIMEOUT 30 /* Heart beat timeout in seconds. */ + /* Define macros for 64 bit support */ #define putPaddrLow(addr) ((uint32_t) (0xffffffff & (u64)(addr))) #define putPaddrHigh(addr) ((uint32_t) (0xffffffff & (((u64)(addr))>>32))) @@ -61,6 +76,11 @@ uint32_t current_count; }; +struct hbq_dmabuf { + struct lpfc_dmabuf dbuf; + uint32_t tag; +}; + /* Priority bit. Set value to exceed low water mark in lpfc_mem. */ #define MEM_PRI 0x100 @@ -90,6 +110,29 @@ uint32_t sli2FwRev; uint8_t sli2FwName[16]; } rev; + struct { +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd2 :24; /* Reserved */ + uint32_t cmv : 1; /* Configure Max VPIs */ + uint32_t ccrp : 1; /* Config Command Ring Polling */ + uint32_t csah : 1; /* Configure Synchronous Abort Handling */ + uint32_t chbs : 1; /* Cofigure Host Backing store */ + uint32_t cinb : 1; /* Enable Interrupt Notification Block */ + uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ + uint32_t cmx : 1; /* Configure Max XRIs */ + uint32_t cmr : 1; /* Configure Max RPIs */ +#else /* __LITTLE_ENDIAN */ + uint32_t cmr : 1; /* Configure Max RPIs */ + uint32_t cmx : 1; /* Configure Max XRIs */ + uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ + uint32_t cinb : 1; /* Enable Interrupt Notification Block */ + uint32_t chbs : 1; /* Cofigure Host Backing store */ + uint32_t csah : 1; /* Configure Synchronous Abort Handling */ + uint32_t ccrp : 1; /* Config Command Ring Polling */ + uint32_t cmv : 1; /* Configure Max VPIs */ + uint32_t rsvd2 :24; /* Reserved */ +#endif + } sli3Feat; } lpfc_vpd_t; struct lpfc_scsi_buf; @@ -122,6 +165,7 @@ uint32_t elsRcvRPS; uint32_t elsRcvRPL; uint32_t elsXmitFLOGI; + uint32_t elsXmitFDISC; uint32_t elsXmitPLOGI; uint32_t elsXmitPRLI; uint32_t elsXmitADISC; @@ -165,70 +209,53 @@ struct lpfcMboxq * mbox; }; -struct lpfc_hba { - struct lpfc_sli sli; - struct lpfc_sli2_slim *slim2p; - dma_addr_t slim2p_mapping; - uint16_t pci_cfg_value; +struct lpfc_hba; - int32_t hba_state; -#define LPFC_STATE_UNKNOWN 0 /* HBA state is unknown */ -#define LPFC_WARM_START 1 /* HBA state after selective reset */ -#define LPFC_INIT_START 2 /* Initial state after board reset */ -#define LPFC_INIT_MBX_CMDS 3 /* Initialize HBA with mbox commands */ -#define LPFC_LINK_DOWN 4 /* HBA initialized, link is down */ -#define LPFC_LINK_UP 5 /* Link is up - issue READ_LA */ -#define LPFC_LOCAL_CFG_LINK 6 /* local NPORT Id configured */ -#define LPFC_FLOGI 7 /* FLOGI sent to Fabric */ -#define LPFC_FABRIC_CFG_LINK 8 /* Fabric assigned NPORT Id - configured */ -#define LPFC_NS_REG 9 /* Register with NameServer */ -#define LPFC_NS_QRY 10 /* Query NameServer for NPort ID list */ -#define LPFC_BUILD_DISC_LIST 11 /* Build ADISC and PLOGI lists for +enum discovery_state { + LPFC_VPORT_UNKNOWN = 0, /* vport state is unknown */ + LPFC_VPORT_FAILED = 1, /* vport has failed */ + LPFC_LOCAL_CFG_LINK = 6, /* local NPORT Id configured */ + LPFC_FLOGI = 7, /* FLOGI sent to Fabric */ + LPFC_FDISC = 8, /* FDISC sent for vport */ + LPFC_FABRIC_CFG_LINK = 9, /* Fabric assigned NPORT Id + * configured */ + LPFC_NS_REG = 10, /* Register with NameServer */ + LPFC_NS_QRY = 11, /* Query NameServer for NPort ID list */ + LPFC_BUILD_DISC_LIST = 12, /* Build ADISC and PLOGI lists for * device authentication / discovery */ -#define LPFC_DISC_AUTH 12 /* Processing ADISC list */ -#define LPFC_CLEAR_LA 13 /* authentication cmplt - issue - CLEAR_LA */ -#define LPFC_HBA_READY 32 -#define LPFC_HBA_ERROR -1 + LPFC_DISC_AUTH = 13, /* Processing ADISC list */ + LPFC_VPORT_READY = 32, +}; - int32_t stopped; /* HBA has not been restarted since last ERATT */ - uint8_t fc_linkspeed; /* Link speed after last READ_LA */ +enum hba_state { + LPFC_LINK_UNKNOWN = 0, /* HBA state is unknown */ + LPFC_WARM_START = 1, /* HBA state after selective reset */ + LPFC_INIT_START = 2, /* Initial state after board reset */ + LPFC_INIT_MBX_CMDS = 3, /* Initialize HBA with mbox commands */ + LPFC_LINK_DOWN = 4, /* HBA initialized, link is down */ + LPFC_LINK_UP = 5, /* Link is up - issue READ_LA */ + LPFC_CLEAR_LA = 6, /* authentication cmplt - issue + * CLEAR_LA */ + LPFC_HBA_READY = 32, + LPFC_HBA_ERROR = -1 +}; - uint32_t fc_eventTag; /* event tag for link attention */ - uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */ +struct lpfc_vport { + struct list_head listentry; + struct lpfc_hba *phba; + uint8_t port_type; +#define LPFC_PHYSICAL_PORT 1 +#define LPFC_NPIV_PORT 2 +#define LPFC_FABRIC_PORT 3 + enum discovery_state port_state; - uint32_t num_disc_nodes; /*in addition to hba_state */ + uint16_t vpi; - struct timer_list fc_estabtmo; /* link establishment timer */ - struct timer_list fc_disctmo; /* Discovery rescue timer */ - struct timer_list fc_fdmitmo; /* fdmi timer */ - /* These fields used to be binfo */ - struct lpfc_name fc_nodename; /* fc nodename */ - struct lpfc_name fc_portname; /* fc portname */ - uint32_t fc_pref_DID; /* preferred D_ID */ - uint8_t fc_pref_ALPA; /* preferred AL_PA */ - uint32_t fc_edtov; /* E_D_TOV timer value */ - uint32_t fc_arbtov; /* ARB_TOV timer value */ - uint32_t fc_ratov; /* R_A_TOV timer value */ - uint32_t fc_rttov; /* R_T_TOV timer value */ - uint32_t fc_altov; /* AL_TOV timer value */ - uint32_t fc_crtov; /* C_R_TOV timer value */ - uint32_t fc_citov; /* C_I_TOV timer value */ - uint32_t fc_myDID; /* fibre channel S_ID */ - uint32_t fc_prevDID; /* previous fibre channel S_ID */ - - struct serv_parm fc_sparam; /* buffer for our service parameters */ - struct serv_parm fc_fabparam; /* fabric service parameters buffer */ - uint8_t alpa_map[128]; /* AL_PA map from READ_LA */ - - uint8_t fc_ns_retry; /* retries for fabric nameserver */ - uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */ - uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */ - struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN]; - uint32_t lmt; uint32_t fc_flag; /* FC flags */ +/* Several of these flags are HBA centric and should be moved to + * phba->link_flag (e.g. FC_PTP, FC_PUBLIC_LOOP) + */ #define FC_PT2PT 0x1 /* pt2pt with no fabric */ #define FC_PT2PT_PLOGI 0x2 /* pt2pt initiate PLOGI */ #define FC_DISC_TMO 0x4 /* Discovery timer running */ @@ -239,22 +266,14 @@ #define FC_OFFLINE_MODE 0x80 /* Interface is offline for diag */ #define FC_FABRIC 0x100 /* We are fabric attached */ #define FC_ESTABLISH_LINK 0x200 /* Reestablish Link */ -#define FC_RSCN_DISCOVERY 0x400 /* Authenticate all devices after RSCN*/ -#define FC_BLOCK_MGMT_IO 0x800 /* Don't allow mgmt mbx or iocb cmds */ -#define FC_LOADING 0x1000 /* HBA in process of loading drvr */ -#define FC_UNLOADING 0x2000 /* HBA in process of unloading drvr */ +#define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ #define FC_BYPASSED_MODE 0x20000 /* NPort is in bypassed mode */ -#define FC_LOOPBACK_MODE 0x40000 /* NPort is in Loopback mode */ - /* This flag is set while issuing */ - /* INIT_LINK mailbox command */ -#define FC_IGNORE_ERATT 0x80000 /* intr handler should ignore ERATT */ - - uint32_t fc_topology; /* link topology, from LINK INIT */ - - struct lpfc_stats fc_stat; +#define FC_RFF_NOT_SUPPORTED 0x40000 /* RFF_ID was rejected by switch */ +#define FC_VPORT_NEEDS_REG_VPI 0x80000 /* Needs to have its vpi registered */ +#define FC_RSCN_DEFERRED 0x100000 /* A deferred RSCN being processed */ struct list_head fc_nodes; @@ -267,10 +286,131 @@ uint16_t fc_map_cnt; uint16_t fc_npr_cnt; uint16_t fc_unused_cnt; + struct serv_parm fc_sparam; /* buffer for our service parameters */ + + uint32_t fc_myDID; /* fibre channel S_ID */ + uint32_t fc_prevDID; /* previous fibre channel S_ID */ + + int32_t stopped; /* HBA has not been restarted since last ERATT */ + uint8_t fc_linkspeed; /* Link speed after last READ_LA */ + + uint32_t num_disc_nodes; /*in addition to hba_state */ + + uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */ + uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */ + struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN]; + struct lpfc_name fc_nodename; /* fc nodename */ + struct lpfc_name fc_portname; /* fc portname */ + + struct lpfc_work_evt disc_timeout_evt; + + struct timer_list fc_disctmo; /* Discovery rescue timer */ + uint8_t fc_ns_retry; /* retries for fabric nameserver */ + uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */ + + spinlock_t work_port_lock; + uint32_t work_port_events; /* Timeout to be handled */ +#define WORKER_DISC_TMO 0x1 /* vport: Discovery timeout */ +#define WORKER_ELS_TMO 0x2 /* vport: ELS timeout */ +#define WORKER_FDMI_TMO 0x4 /* vport: FDMI timeout */ + +#define WORKER_MBOX_TMO 0x100 /* hba: MBOX timeout */ +#define WORKER_HB_TMO 0x200 /* hba: Heart beat timeout */ +#define WORKER_FABRIC_BLOCK_TMO 0x400 /* hba: fabric block timout */ +#define WORKER_RAMP_DOWN_QUEUE 0x800 /* hba: Decrease Q depth */ +#define WORKER_RAMP_UP_QUEUE 0x1000 /* hba: Increase Q depth */ + + struct timer_list fc_fdmitmo; + struct timer_list els_tmofunc; + + int unreg_vpi_cmpl; + + uint8_t load_flag; +#define FC_LOADING 0x1 /* HBA in process of loading drvr */ +#define FC_UNLOADING 0x2 /* HBA in process of unloading drvr */ + char *vname; /* Application assigned name */ + struct fc_vport *fc_vport; + +#ifdef CONFIG_LPFC_DEBUG_FS + struct dentry *debug_disc_trc; + struct dentry *debug_nodelist; + struct dentry *vport_debugfs_root; + struct lpfc_disc_trc *disc_trc; + atomic_t disc_trc_cnt; +#endif +}; + +struct hbq_s { + uint16_t entry_count; /* Current number of HBQ slots */ + uint32_t next_hbqPutIdx; /* Index to next HBQ slot to use */ + uint32_t hbqPutIdx; /* HBQ slot to use */ + uint32_t local_hbqGetIdx; /* Local copy of Get index from Port */ +}; + +#define LPFC_MAX_HBQS 16 +/* this matches the possition in the lpfc_hbq_defs array */ +#define LPFC_ELS_HBQ 0 + +struct lpfc_hba { + struct lpfc_sli sli; + uint32_t sli_rev; /* SLI2 or SLI3 */ + uint32_t sli3_options; /* Mask of enabled SLI3 options */ +#define LPFC_SLI3_ENABLED 0x01 +#define LPFC_SLI3_HBQ_ENABLED 0x02 +#define LPFC_SLI3_NPIV_ENABLED 0x04 +#define LPFC_SLI3_VPORT_TEARDOWN 0x08 + uint32_t iocb_cmd_size; + uint32_t iocb_rsp_size; + + enum hba_state link_state; + uint32_t link_flag; /* link state flags */ +#define LS_LOOPBACK_MODE 0x1 /* NPort is in Loopback mode */ + /* This flag is set while issuing */ + /* INIT_LINK mailbox command */ +#define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */ +#define LS_IGNORE_ERATT 0x3 /* intr handler should ignore ERATT */ + + struct lpfc_sli2_slim *slim2p; + struct lpfc_dmabuf hbqslimp; + + dma_addr_t slim2p_mapping; + + uint16_t pci_cfg_value; + + uint8_t work_found; +#define LPFC_MAX_WORKER_ITERATION 4 + + uint8_t fc_linkspeed; /* Link speed after last READ_LA */ + + uint32_t fc_eventTag; /* event tag for link attention */ + + + struct timer_list fc_estabtmo; /* link establishment timer */ + /* These fields used to be binfo */ + uint32_t fc_pref_DID; /* preferred D_ID */ + uint8_t fc_pref_ALPA; /* preferred AL_PA */ + uint32_t fc_edtov; /* E_D_TOV timer value */ + uint32_t fc_arbtov; /* ARB_TOV timer value */ + uint32_t fc_ratov; /* R_A_TOV timer value */ + uint32_t fc_rttov; /* R_T_TOV timer value */ + uint32_t fc_altov; /* AL_TOV timer value */ + uint32_t fc_crtov; /* C_R_TOV timer value */ + uint32_t fc_citov; /* C_I_TOV timer value */ + + struct serv_parm fc_fabparam; /* fabric service parameters buffer */ + uint8_t alpa_map[128]; /* AL_PA map from READ_LA */ + + uint32_t lmt; + + uint32_t fc_topology; /* link topology, from LINK INIT */ + + struct lpfc_stats fc_stat; + struct lpfc_nodelist fc_fcpnodev; /* nodelist entry for no device */ uint32_t nport_event_cnt; /* timestamp for nlplist entry */ - uint32_t wwnn[2]; + uint8_t wwnn[8]; + uint8_t wwpn[8]; uint32_t RandomData[7]; uint32_t cfg_log_verbose; @@ -278,6 +418,9 @@ uint32_t cfg_nodev_tmo; uint32_t cfg_devloss_tmo; uint32_t cfg_hba_queue_depth; + uint32_t cfg_peer_port_login; + uint32_t cfg_vport_restrict_login; + uint32_t cfg_npiv_enable; uint32_t cfg_fcp_class; uint32_t cfg_use_adisc; uint32_t cfg_ack0; @@ -304,22 +447,20 @@ lpfc_vpd_t vpd; /* vital product data */ - struct Scsi_Host *host; struct pci_dev *pcidev; struct list_head work_list; uint32_t work_ha; /* Host Attention Bits for WT */ uint32_t work_ha_mask; /* HA Bits owned by WT */ uint32_t work_hs; /* HS stored in case of ERRAT */ uint32_t work_status[2]; /* Extra status from SLIM */ - uint32_t work_hba_events; /* Timeout to be handled */ -#define WORKER_DISC_TMO 0x1 /* Discovery timeout */ -#define WORKER_ELS_TMO 0x2 /* ELS timeout */ -#define WORKER_MBOX_TMO 0x4 /* MBOX timeout */ -#define WORKER_FDMI_TMO 0x8 /* FDMI timeout */ wait_queue_head_t *work_wait; struct task_struct *worker_thread; + struct list_head hbq_buffer_list; + uint32_t hbq_count; /* Count of configured HBQs */ + struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */ + unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */ unsigned long pci_bar2_map; /* Physical address for PCI BAR2 */ void __iomem *slim_memmap_p; /* Kernel memory mapped address for @@ -334,6 +475,10 @@ reg */ void __iomem *HCregaddr; /* virtual address for host ctl reg */ + struct lpfc_hgp __iomem *host_gp; /* Host side get/put pointers */ + uint32_t __iomem *hbq_put; /* Address in SLIM to HBQ put ptrs */ + uint32_t *hbq_get; /* Host mem address of HBQ get ptrs */ + int brd_no; /* FC board number */ char SerialNumber[32]; /* adapter Serial Number */ @@ -353,7 +498,6 @@ uint8_t soft_wwn_enable; struct timer_list fcp_poll_timer; - struct timer_list els_tmofunc; /* * stat counters @@ -370,31 +514,69 @@ uint32_t total_scsi_bufs; struct list_head lpfc_iocb_list; uint32_t total_iocbq_bufs; + spinlock_t hbalock; /* pci_mem_pools */ struct pci_pool *lpfc_scsi_dma_buf_pool; struct pci_pool *lpfc_mbuf_pool; + struct pci_pool *lpfc_hbq_pool; struct lpfc_dma_pool lpfc_mbuf_safety_pool; mempool_t *mbox_mem_pool; mempool_t *nlp_mem_pool; struct fc_host_statistics link_stats; + + struct list_head port_list; + struct lpfc_vport *pport; /* physical lpfc_vport pointer */ + uint16_t max_vpi; /* Maximum virtual nports */ +#define LPFC_MAX_VPI 100 /* Max number of VPorts supported */ + unsigned long *vpi_bmask; /* vpi allocation table */ + + /* Data structure used by fabric iocb scheduler */ + struct list_head fabric_iocb_list; + atomic_t fabric_iocb_count; + struct timer_list fabric_block_timer; + unsigned long bit_flags; +#define FABRIC_COMANDS_BLOCKED 0 + atomic_t num_rsrc_err; + atomic_t num_cmd_success; + unsigned long last_rsrc_error_time; + unsigned long last_ramp_down_time; + unsigned long last_ramp_up_time; +#ifdef CONFIG_LPFC_DEBUG_FS + struct dentry *hba_debugfs_root; + atomic_t debugfs_vport_count; +#endif + + /* Fields used for heart beat. */ + unsigned long last_completion_time; + struct timer_list hb_tmofunc; + uint8_t hb_outstanding; }; +static inline struct Scsi_Host * +lpfc_shost_from_vport(struct lpfc_vport *vport) +{ + return container_of((void *) vport, struct Scsi_Host, hostdata[0]); +} + static inline void -lpfc_set_loopback_flag(struct lpfc_hba *phba) { +lpfc_set_loopback_flag(struct lpfc_hba *phba) +{ if (phba->cfg_topology == FLAGS_LOCAL_LB) - phba->fc_flag |= FC_LOOPBACK_MODE; + phba->link_flag |= LS_LOOPBACK_MODE; else - phba->fc_flag &= ~FC_LOOPBACK_MODE; + phba->link_flag &= ~LS_LOOPBACK_MODE; } -struct rnidrsp { - void *buf; - uint32_t uniqueid; - struct list_head list; - uint32_t data; -}; +static inline int +lpfc_is_link_up(struct lpfc_hba *phba) +{ + return phba->link_state == LPFC_LINK_UP || + phba->link_state == LPFC_CLEAR_LA || + phba->link_state == LPFC_HBA_READY; +} #define FC_REG_DUMP_EVENT 0x10 /* Register for Dump events */ + diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_attr.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_attr.c 2007-12-21 15:36:12.000000000 -0500 @@ -39,6 +39,7 @@ #include "lpfc_version.h" #include "lpfc_compat.h" #include "lpfc_crtn.h" +#include "lpfc_vport.h" #define LPFC_DEF_DEVLOSS_TMO 30 #define LPFC_MIN_DEVLOSS_TMO 1 @@ -76,116 +77,156 @@ lpfc_info_show(struct class_device *cdev, char *buf) { struct Scsi_Host *host = class_to_shost(cdev); + return snprintf(buf, PAGE_SIZE, "%s\n",lpfc_info(host)); } static ssize_t lpfc_serialnum_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%s\n",phba->SerialNumber); } static ssize_t lpfc_modeldesc_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelDesc); } static ssize_t lpfc_modelname_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelName); } static ssize_t lpfc_programtype_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%s\n",phba->ProgramType); } static ssize_t -lpfc_portnum_show(struct class_device *cdev, char *buf) +lpfc_vportnum_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%s\n",phba->Port); } static ssize_t lpfc_fwrev_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; char fwrev[32]; + lpfc_decode_firmware_rev(phba, fwrev, 1); - return snprintf(buf, PAGE_SIZE, "%s\n",fwrev); + return snprintf(buf, PAGE_SIZE, "%s, sli-%d\n", fwrev, phba->sli_rev); } static ssize_t lpfc_hdw_show(struct class_device *cdev, char *buf) { char hdw[9]; - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; lpfc_vpd_t *vp = &phba->vpd; + lpfc_jedec_to_ascii(vp->rev.biuRev, hdw); return snprintf(buf, PAGE_SIZE, "%s\n", hdw); } static ssize_t lpfc_option_rom_version_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion); } static ssize_t lpfc_state_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; int len = 0; - switch (phba->hba_state) { - case LPFC_STATE_UNKNOWN: + + switch (phba->link_state) { + case LPFC_LINK_UNKNOWN: case LPFC_WARM_START: case LPFC_INIT_START: case LPFC_INIT_MBX_CMDS: case LPFC_LINK_DOWN: + case LPFC_HBA_ERROR: len += snprintf(buf + len, PAGE_SIZE-len, "Link Down\n"); break; case LPFC_LINK_UP: + case LPFC_CLEAR_LA: + case LPFC_HBA_READY: + len += snprintf(buf + len, PAGE_SIZE-len, "Link Up - \n"); + + switch (vport->port_state) { + len += snprintf(buf + len, PAGE_SIZE-len, + "initializing\n"); + break; case LPFC_LOCAL_CFG_LINK: - len += snprintf(buf + len, PAGE_SIZE-len, "Link Up\n"); + len += snprintf(buf + len, PAGE_SIZE-len, + "Configuring Link\n"); break; + case LPFC_FDISC: case LPFC_FLOGI: case LPFC_FABRIC_CFG_LINK: case LPFC_NS_REG: case LPFC_NS_QRY: case LPFC_BUILD_DISC_LIST: case LPFC_DISC_AUTH: - case LPFC_CLEAR_LA: - len += snprintf(buf + len, PAGE_SIZE-len, - "Link Up - Discovery\n"); + len += snprintf(buf + len, PAGE_SIZE - len, + "Discovery\n"); break; - case LPFC_HBA_READY: - len += snprintf(buf + len, PAGE_SIZE-len, - "Link Up - Ready:\n"); + case LPFC_VPORT_READY: + len += snprintf(buf + len, PAGE_SIZE - len, "Ready\n"); + break; + + case LPFC_VPORT_FAILED: + len += snprintf(buf + len, PAGE_SIZE - len, "Failed\n"); + break; + + case LPFC_VPORT_UNKNOWN: + len += snprintf(buf + len, PAGE_SIZE - len, + "Unknown\n"); + break; + } + if (phba->fc_topology == TOPOLOGY_LOOP) { - if (phba->fc_flag & FC_PUBLIC_LOOP) + if (vport->fc_flag & FC_PUBLIC_LOOP) len += snprintf(buf + len, PAGE_SIZE-len, " Public Loop\n"); else len += snprintf(buf + len, PAGE_SIZE-len, " Private Loop\n"); } else { - if (phba->fc_flag & FC_FABRIC) + if (vport->fc_flag & FC_FABRIC) len += snprintf(buf + len, PAGE_SIZE-len, " Fabric\n"); else @@ -193,29 +234,32 @@ " Point-2-Point\n"); } } + return len; } static ssize_t lpfc_num_discovered_ports_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->fc_map_cnt + - phba->fc_unmap_cnt); + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + + return snprintf(buf, PAGE_SIZE, "%d\n", + vport->fc_map_cnt + vport->fc_unmap_cnt); } static int -lpfc_issue_lip(struct Scsi_Host *host) +lpfc_issue_lip(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *pmboxq; int mbxstatus = MBXERR_ERROR; - if ((phba->fc_flag & FC_OFFLINE_MODE) || - (phba->fc_flag & FC_BLOCK_MGMT_IO) || - (phba->hba_state != LPFC_HBA_READY)) + if ((vport->fc_flag & FC_OFFLINE_MODE) || + (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) || + (vport->port_state != LPFC_VPORT_READY)) return -EPERM; pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL); @@ -238,9 +282,7 @@ } lpfc_set_loopback_flag(phba); - if (mbxstatus == MBX_TIMEOUT) - pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - else + if (mbxstatus != MBX_TIMEOUT) mempool_free(pmboxq, phba->mbox_mem_pool); if (mbxstatus == MBXERR_ERROR) @@ -320,8 +362,10 @@ static ssize_t lpfc_issue_reset(struct class_device *cdev, const char *buf, size_t count) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + int status = -EINVAL; if (strncmp(buf, "selective", sizeof("selective") - 1) == 0) @@ -336,23 +380,26 @@ static ssize_t lpfc_nport_evt_cnt_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt); } static ssize_t lpfc_board_mode_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; char * state; - if (phba->hba_state == LPFC_HBA_ERROR) + if (phba->link_state == LPFC_HBA_ERROR) state = "error"; - else if (phba->hba_state == LPFC_WARM_START) + else if (phba->link_state == LPFC_WARM_START) state = "warm start"; - else if (phba->hba_state == LPFC_INIT_START) + else if (phba->link_state == LPFC_INIT_START) state = "offline"; else state = "online"; @@ -363,8 +410,9 @@ static ssize_t lpfc_board_mode_store(struct class_device *cdev, const char *buf, size_t count) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct completion online_compl; int status=0; @@ -389,11 +437,166 @@ return -EIO; } +int +lpfc_get_hba_info(struct lpfc_hba *phba, + uint32_t *mxri, uint32_t *axri, + uint32_t *mrpi, uint32_t *arpi, + uint32_t *mvpi, uint32_t *avpi) +{ + struct lpfc_sli *psli = &phba->sli; + LPFC_MBOXQ_t *pmboxq; + MAILBOX_t *pmb; + int rc = 0; + + /* + * prevent udev from issuing mailbox commands until the port is + * configured. + */ + if (phba->link_state < LPFC_LINK_DOWN || + !phba->mbox_mem_pool || + (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0) + return 0; + + if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) + return 0; + + pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!pmboxq) + return 0; + memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t)); + + pmb = &pmboxq->mb; + pmb->mbxCommand = MBX_READ_CONFIG; + pmb->mbxOwner = OWN_HOST; + pmboxq->context1 = NULL; + + if ((phba->pport->fc_flag & FC_OFFLINE_MODE) || + (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) + rc = MBX_NOT_FINISHED; + else + rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); + + if (rc != MBX_SUCCESS) { + if (rc != MBX_TIMEOUT) + mempool_free(pmboxq, phba->mbox_mem_pool); + return 0; + } + + if (mrpi) + *mrpi = pmb->un.varRdConfig.max_rpi; + if (arpi) + *arpi = pmb->un.varRdConfig.avail_rpi; + if (mxri) + *mxri = pmb->un.varRdConfig.max_xri; + if (axri) + *axri = pmb->un.varRdConfig.avail_xri; + if (mvpi) + *mvpi = pmb->un.varRdConfig.max_vpi; + if (avpi) + *avpi = pmb->un.varRdConfig.avail_vpi; + + mempool_free(pmboxq, phba->mbox_mem_pool); + return 1; +} + +static ssize_t +lpfc_max_rpi_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + uint32_t cnt; + + if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, NULL, NULL, NULL)) + return snprintf(buf, PAGE_SIZE, "%d\n", cnt); + return snprintf(buf, PAGE_SIZE, "Unknown\n"); +} + +static ssize_t +lpfc_used_rpi_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + uint32_t cnt, acnt; + + if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, &acnt, NULL, NULL)) + return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); + return snprintf(buf, PAGE_SIZE, "Unknown\n"); +} + +static ssize_t +lpfc_max_xri_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + uint32_t cnt; + + if (lpfc_get_hba_info(phba, &cnt, NULL, NULL, NULL, NULL, NULL)) + return snprintf(buf, PAGE_SIZE, "%d\n", cnt); + return snprintf(buf, PAGE_SIZE, "Unknown\n"); +} + +static ssize_t +lpfc_used_xri_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + uint32_t cnt, acnt; + + if (lpfc_get_hba_info(phba, &cnt, &acnt, NULL, NULL, NULL, NULL)) + return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); + return snprintf(buf, PAGE_SIZE, "Unknown\n"); +} + +static ssize_t +lpfc_max_vpi_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + uint32_t cnt; + + if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, NULL)) + return snprintf(buf, PAGE_SIZE, "%d\n", cnt); + return snprintf(buf, PAGE_SIZE, "Unknown\n"); +} + +static ssize_t +lpfc_used_vpi_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + uint32_t cnt, acnt; + + if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, &acnt)) + return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); + return snprintf(buf, PAGE_SIZE, "Unknown\n"); +} + +static ssize_t +lpfc_npiv_info_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + + if (!(phba->max_vpi)) + return snprintf(buf, PAGE_SIZE, "NPIV Not Supported\n"); + if (vport->port_type == LPFC_PHYSICAL_PORT) + return snprintf(buf, PAGE_SIZE, "NPIV Physical\n"); + return snprintf(buf, PAGE_SIZE, "NPIV Virtual (VPI %d)\n", vport->vpi); +} + static ssize_t lpfc_poll_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; return snprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll); } @@ -402,8 +605,9 @@ lpfc_poll_store(struct class_device *cdev, const char *buf, size_t count) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; uint32_t creg_val; uint32_t old_val; int val=0; @@ -417,7 +621,7 @@ if ((val & 0x3) != val) return -EINVAL; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); old_val = phba->cfg_poll; @@ -432,16 +636,16 @@ lpfc_poll_start_timer(phba); } } else if (val != 0x0) { - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EINVAL; } if (!(val & DISABLE_FCP_RING_INT) && (old_val & DISABLE_FCP_RING_INT)) { - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); del_timer(&phba->fcp_poll_timer); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); creg_val = readl(phba->HCregaddr); creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING); writel(creg_val, phba->HCregaddr); @@ -450,7 +654,7 @@ phba->cfg_poll = val; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return strlen(buf); } @@ -459,8 +663,9 @@ static ssize_t \ lpfc_##attr##_show(struct class_device *cdev, char *buf) \ { \ - struct Scsi_Host *host = class_to_shost(cdev);\ - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\ + struct Scsi_Host *shost = class_to_shost(cdev);\ + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ + struct lpfc_hba *phba = vport->phba;\ int val = 0;\ val = phba->cfg_##attr;\ return snprintf(buf, PAGE_SIZE, "%d\n",\ @@ -471,8 +676,9 @@ static ssize_t \ lpfc_##attr##_show(struct class_device *cdev, char *buf) \ { \ - struct Scsi_Host *host = class_to_shost(cdev);\ - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\ + struct Scsi_Host *shost = class_to_shost(cdev);\ + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ + struct lpfc_hba *phba = vport->phba;\ int val = 0;\ val = phba->cfg_##attr;\ return snprintf(buf, PAGE_SIZE, "%#x\n",\ @@ -514,8 +720,9 @@ static ssize_t \ lpfc_##attr##_store(struct class_device *cdev, const char *buf, size_t count) \ { \ - struct Scsi_Host *host = class_to_shost(cdev);\ - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\ + struct Scsi_Host *shost = class_to_shost(cdev);\ + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ + struct lpfc_hba *phba = vport->phba;\ int val=0;\ if (!isdigit(buf[0]))\ return -EINVAL;\ @@ -576,7 +783,7 @@ static CLASS_DEVICE_ATTR(modeldesc, S_IRUGO, lpfc_modeldesc_show, NULL); static CLASS_DEVICE_ATTR(modelname, S_IRUGO, lpfc_modelname_show, NULL); static CLASS_DEVICE_ATTR(programtype, S_IRUGO, lpfc_programtype_show, NULL); -static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_portnum_show, NULL); +static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_vportnum_show, NULL); static CLASS_DEVICE_ATTR(fwrev, S_IRUGO, lpfc_fwrev_show, NULL); static CLASS_DEVICE_ATTR(hdw, S_IRUGO, lpfc_hdw_show, NULL); static CLASS_DEVICE_ATTR(state, S_IRUGO, lpfc_state_show, NULL); @@ -592,6 +799,13 @@ static CLASS_DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR, lpfc_board_mode_show, lpfc_board_mode_store); static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset); +static CLASS_DEVICE_ATTR(max_vpi, S_IRUGO, lpfc_max_vpi_show, NULL); +static CLASS_DEVICE_ATTR(used_vpi, S_IRUGO, lpfc_used_vpi_show, NULL); +static CLASS_DEVICE_ATTR(max_rpi, S_IRUGO, lpfc_max_rpi_show, NULL); +static CLASS_DEVICE_ATTR(used_rpi, S_IRUGO, lpfc_used_rpi_show, NULL); +static CLASS_DEVICE_ATTR(max_xri, S_IRUGO, lpfc_max_xri_show, NULL); +static CLASS_DEVICE_ATTR(used_xri, S_IRUGO, lpfc_used_xri_show, NULL); +static CLASS_DEVICE_ATTR(npiv_info, S_IRUGO, lpfc_npiv_info_show, NULL); static char *lpfc_soft_wwn_key = "C99G71SL8032A"; @@ -600,8 +814,9 @@ lpfc_soft_wwn_enable_store(struct class_device *cdev, const char *buf, size_t count) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; unsigned int cnt = count; /* @@ -634,8 +849,10 @@ static ssize_t lpfc_soft_wwpn_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + return snprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)phba->cfg_soft_wwpn); } @@ -644,8 +861,9 @@ static ssize_t lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct completion online_compl; int stat1=0, stat2=0; unsigned int i, j, cnt=count; @@ -680,9 +898,9 @@ } } phba->cfg_soft_wwpn = wwn_to_u64(wwpn); - fc_host_port_name(host) = phba->cfg_soft_wwpn; + fc_host_port_name(shost) = phba->cfg_soft_wwpn; if (phba->cfg_soft_wwnn) - fc_host_node_name(host) = phba->cfg_soft_wwnn; + fc_host_node_name(shost) = phba->cfg_soft_wwnn; dev_printk(KERN_NOTICE, &phba->pcidev->dev, "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no); @@ -777,6 +995,15 @@ static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR, lpfc_poll_show, lpfc_poll_store); +int lpfc_sli_mode = 0; +module_param(lpfc_sli_mode, int, 0); +MODULE_PARM_DESC(lpfc_sli_mode, "SLI mode selector:" + " 0 - auto (SLI-3 if supported)," + " 2 - select SLI-2 even on SLI-3 capable HBAs," + " 3 - select SLI-3"); + +LPFC_ATTR_R(npiv_enable, 0, 0, 1, "Enable NPIV functionality"); + /* # lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear # until the timer expires. Value range is [0,255]. Default value is 30. @@ -790,8 +1017,9 @@ static ssize_t lpfc_nodev_tmo_show(struct class_device *cdev, char *buf) { - struct Scsi_Host *host = class_to_shost(cdev); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; int val = 0; val = phba->cfg_devloss_tmo; return snprintf(buf, PAGE_SIZE, "%d\n", @@ -832,13 +1060,19 @@ static void lpfc_update_rport_devloss_tmo(struct lpfc_hba *phba) { + struct lpfc_vport *vport; + struct Scsi_Host *shost; struct lpfc_nodelist *ndlp; - spin_lock_irq(phba->host->host_lock); - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) + list_for_each_entry(vport, &phba->port_list, listentry) { + shost = lpfc_shost_from_vport(vport); + spin_lock_irq(shost->host_lock); + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) if (ndlp->rport) - ndlp->rport->dev_loss_tmo = phba->cfg_devloss_tmo; - spin_unlock_irq(phba->host->host_lock); + ndlp->rport->dev_loss_tmo = + phba->cfg_devloss_tmo; + spin_unlock_irq(shost->host_lock); + } } static int @@ -946,6 +1180,33 @@ "Max number of FCP commands we can queue to a lpfc HBA"); /* +# peer_port_login: This parameter allows/prevents logins +# between peer ports hosted on the same physical port. +# When this parameter is set 0 peer ports of same physical port +# are not allowed to login to each other. +# When this parameter is set 1 peer ports of same physical port +# are allowed to login to each other. +# Default value of this parameter is 0. +*/ +LPFC_ATTR_R(peer_port_login, 0, 0, 1, + "Allow peer ports on the same physical port to login to each " + "other."); + +/* +# vport_restrict_login: This parameter allows/prevents logins +# between Virtual Ports and remote initiators. +# When this parameter is not set (0) Virtual Ports will accept PLOGIs from +# other initiators and will attempt to PLOGI all remote ports. +# When this parameter is set (1) Virtual Ports will reject PLOGIs from +# remote ports and will not attempt to PLOGI to other initiators. +# This parameter does not restrict to the physical port. +# This parameter does not restrict logins to Fabric resident remote ports. +# Default value of this parameter is 1. +*/ +LPFC_ATTR_RW(vport_restrict_login, 1, 0, 1, + "Restrict virtual ports login to remote initiators."); + +/* # Some disk devices have a "select ID" or "select Target" capability. # From a protocol standpoint "select ID" usually means select the # Fibre channel "ALPA". In the FC-AL Profile there is an "informative @@ -1088,7 +1349,8 @@ LPFC_ATTR_R(use_msi, 0, 0, 1, "Use Message Signaled Interrupts, if possible"); -struct class_device_attribute *lpfc_host_attrs[] = { + +struct class_device_attribute *lpfc_hba_attrs[] = { &class_device_attr_info, &class_device_attr_serialnum, &class_device_attr_modeldesc, @@ -1104,6 +1366,8 @@ &class_device_attr_lpfc_log_verbose, &class_device_attr_lpfc_lun_queue_depth, &class_device_attr_lpfc_hba_queue_depth, + &class_device_attr_lpfc_peer_port_login, + &class_device_attr_lpfc_vport_restrict_login, &class_device_attr_lpfc_nodev_tmo, &class_device_attr_lpfc_devloss_tmo, &class_device_attr_lpfc_fcp_class, @@ -1119,9 +1383,17 @@ &class_device_attr_lpfc_multi_ring_type, &class_device_attr_lpfc_fdmi_on, &class_device_attr_lpfc_max_luns, + &class_device_attr_lpfc_npiv_enable, &class_device_attr_nport_evt_cnt, &class_device_attr_management_version, &class_device_attr_board_mode, + &class_device_attr_max_vpi, + &class_device_attr_used_vpi, + &class_device_attr_max_rpi, + &class_device_attr_used_rpi, + &class_device_attr_max_xri, + &class_device_attr_used_xri, + &class_device_attr_npiv_info, &class_device_attr_issue_reset, &class_device_attr_lpfc_poll, &class_device_attr_lpfc_poll_tmo, @@ -1136,9 +1408,11 @@ sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count) { size_t buf_off; - struct Scsi_Host *host = class_to_shost(container_of(kobj, - struct class_device, kobj)); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct class_device *cdev = container_of(kobj, struct class_device, + kobj); + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; if ((off + count) > FF_REG_AREA_SIZE) return -ERANGE; @@ -1148,18 +1422,16 @@ if (off % 4 || count % 4 || (unsigned long)buf % 4) return -EINVAL; - spin_lock_irq(phba->host->host_lock); - - if (!(phba->fc_flag & FC_OFFLINE_MODE)) { - spin_unlock_irq(phba->host->host_lock); + if (!(vport->fc_flag & FC_OFFLINE_MODE)) { return -EPERM; } + spin_lock_irq(&phba->hbalock); for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) writel(*((uint32_t *)(buf + buf_off)), phba->ctrl_regs_memmap_p + off + buf_off); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return count; } @@ -1169,9 +1441,11 @@ { size_t buf_off; uint32_t * tmp_ptr; - struct Scsi_Host *host = class_to_shost(container_of(kobj, - struct class_device, kobj)); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct class_device *cdev = container_of(kobj, struct class_device, + kobj); + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; if (off > FF_REG_AREA_SIZE) return -ERANGE; @@ -1184,14 +1458,14 @@ if (off % 4 || count % 4 || (unsigned long)buf % 4) return -EINVAL; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) { tmp_ptr = (uint32_t *)(buf + buf_off); *tmp_ptr = readl(phba->ctrl_regs_memmap_p + off + buf_off); } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return count; } @@ -1200,7 +1474,6 @@ .attr = { .name = "ctlreg", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 256, .read = sysfs_ctlreg_read, @@ -1209,7 +1482,7 @@ static void -sysfs_mbox_idle (struct lpfc_hba * phba) +sysfs_mbox_idle(struct lpfc_hba *phba) { phba->sysfs_mbox.state = SMBOX_IDLE; phba->sysfs_mbox.offset = 0; @@ -1224,10 +1497,12 @@ static ssize_t sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count) { - struct Scsi_Host * host = - class_to_shost(container_of(kobj, struct class_device, kobj)); - struct lpfc_hba * phba = (struct lpfc_hba*)host->hostdata; - struct lpfcMboxq * mbox = NULL; + struct class_device *cdev = container_of(kobj, struct class_device, + kobj); + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + struct lpfcMboxq *mbox = NULL; if ((count + off) > MAILBOX_CMD_SIZE) return -ERANGE; @@ -1245,7 +1520,7 @@ memset(mbox, 0, sizeof (LPFC_MBOXQ_t)); } - spin_lock_irq(host->host_lock); + spin_lock_irq(&phba->hbalock); if (off == 0) { if (phba->sysfs_mbox.mbox) @@ -1256,9 +1531,9 @@ } else { if (phba->sysfs_mbox.state != SMBOX_WRITING || phba->sysfs_mbox.offset != off || - phba->sysfs_mbox.mbox == NULL ) { + phba->sysfs_mbox.mbox == NULL) { sysfs_mbox_idle(phba); - spin_unlock_irq(host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EAGAIN; } } @@ -1268,7 +1543,7 @@ phba->sysfs_mbox.offset = off + count; - spin_unlock_irq(host->host_lock); + spin_unlock_irq(&phba->hbalock); return count; } @@ -1276,10 +1551,11 @@ static ssize_t sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count) { - struct Scsi_Host *host = - class_to_shost(container_of(kobj, struct class_device, - kobj)); - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct class_device *cdev = container_of(kobj, struct class_device, + kobj); + struct Scsi_Host *shost = class_to_shost(cdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; int rc; if (off > MAILBOX_CMD_SIZE) @@ -1294,7 +1570,7 @@ if (off && count == 0) return 0; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); if (off == 0 && phba->sysfs_mbox.state == SMBOX_WRITING && @@ -1317,12 +1593,12 @@ case MBX_SET_MASK: case MBX_SET_SLIM: case MBX_SET_DEBUG: - if (!(phba->fc_flag & FC_OFFLINE_MODE)) { + if (!(vport->fc_flag & FC_OFFLINE_MODE)) { printk(KERN_WARNING "mbox_read:Command 0x%x " "is illegal in on-line state\n", phba->sysfs_mbox.mbox->mb.mbxCommand); sysfs_mbox_idle(phba); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EPERM; } case MBX_LOAD_SM: @@ -1352,48 +1628,48 @@ printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n", phba->sysfs_mbox.mbox->mb.mbxCommand); sysfs_mbox_idle(phba); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EPERM; default: printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n", phba->sysfs_mbox.mbox->mb.mbxCommand); sysfs_mbox_idle(phba); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EPERM; } - if (phba->fc_flag & FC_BLOCK_MGMT_IO) { + phba->sysfs_mbox.mbox->vport = vport; + + if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) { sysfs_mbox_idle(phba); - spin_unlock_irq(host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EAGAIN; } - if ((phba->fc_flag & FC_OFFLINE_MODE) || + if ((vport->fc_flag & FC_OFFLINE_MODE) || (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){ - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); rc = lpfc_sli_issue_mbox (phba, phba->sysfs_mbox.mbox, MBX_POLL); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); } else { - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); rc = lpfc_sli_issue_mbox_wait (phba, phba->sysfs_mbox.mbox, lpfc_mbox_tmo_val(phba, phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); } if (rc != MBX_SUCCESS) { if (rc == MBX_TIMEOUT) { - phba->sysfs_mbox.mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; phba->sysfs_mbox.mbox = NULL; } sysfs_mbox_idle(phba); - spin_unlock_irq(host->host_lock); + spin_unlock_irq(&phba->hbalock); return (rc == MBX_TIMEOUT) ? -ETIME : -ENODEV; } phba->sysfs_mbox.state = SMBOX_READING; @@ -1402,7 +1678,7 @@ phba->sysfs_mbox.state != SMBOX_READING) { printk(KERN_WARNING "mbox_read: Bad State\n"); sysfs_mbox_idle(phba); - spin_unlock_irq(host->host_lock); + spin_unlock_irq(&phba->hbalock); return -EAGAIN; } @@ -1413,7 +1689,7 @@ if (phba->sysfs_mbox.offset == MAILBOX_CMD_SIZE) sysfs_mbox_idle(phba); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return count; } @@ -1422,7 +1698,6 @@ .attr = { .name = "mbox", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = MAILBOX_CMD_SIZE, .read = sysfs_mbox_read, @@ -1430,35 +1705,35 @@ }; int -lpfc_alloc_sysfs_attr(struct lpfc_hba *phba) +lpfc_alloc_sysfs_attr(struct lpfc_vport *vport) { - struct Scsi_Host *host = phba->host; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); int error; - error = sysfs_create_bin_file(&host->shost_classdev.kobj, + error = sysfs_create_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr); if (error) goto out; - error = sysfs_create_bin_file(&host->shost_classdev.kobj, + error = sysfs_create_bin_file(&shost->shost_classdev.kobj, &sysfs_mbox_attr); if (error) goto out_remove_ctlreg_attr; return 0; out_remove_ctlreg_attr: - sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr); + sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr); out: return error; } void -lpfc_free_sysfs_attr(struct lpfc_hba *phba) +lpfc_free_sysfs_attr(struct lpfc_vport *vport) { - struct Scsi_Host *host = phba->host; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_mbox_attr); - sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr); + sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_mbox_attr); + sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr); } @@ -1469,26 +1744,30 @@ static void lpfc_get_host_port_id(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + /* note: fc_myDID already in cpu endianness */ - fc_host_port_id(shost) = phba->fc_myDID; + fc_host_port_id(shost) = vport->fc_myDID; } static void lpfc_get_host_port_type(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; spin_lock_irq(shost->host_lock); - if (phba->hba_state == LPFC_HBA_READY) { + if (vport->port_type == LPFC_NPIV_PORT) { + fc_host_port_type(shost) = FC_PORTTYPE_NPIV; + } else if (lpfc_is_link_up(phba)) { if (phba->fc_topology == TOPOLOGY_LOOP) { - if (phba->fc_flag & FC_PUBLIC_LOOP) + if (vport->fc_flag & FC_PUBLIC_LOOP) fc_host_port_type(shost) = FC_PORTTYPE_NLPORT; else fc_host_port_type(shost) = FC_PORTTYPE_LPORT; } else { - if (phba->fc_flag & FC_FABRIC) + if (vport->fc_flag & FC_FABRIC) fc_host_port_type(shost) = FC_PORTTYPE_NPORT; else fc_host_port_type(shost) = FC_PORTTYPE_PTP; @@ -1502,29 +1781,20 @@ static void lpfc_get_host_port_state(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; spin_lock_irq(shost->host_lock); - if (phba->fc_flag & FC_OFFLINE_MODE) + if (vport->fc_flag & FC_OFFLINE_MODE) fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; else { - switch (phba->hba_state) { - case LPFC_STATE_UNKNOWN: - case LPFC_WARM_START: - case LPFC_INIT_START: - case LPFC_INIT_MBX_CMDS: + switch (phba->link_state) { + case LPFC_LINK_UNKNOWN: case LPFC_LINK_DOWN: fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN; break; case LPFC_LINK_UP: - case LPFC_LOCAL_CFG_LINK: - case LPFC_FLOGI: - case LPFC_FABRIC_CFG_LINK: - case LPFC_NS_REG: - case LPFC_NS_QRY: - case LPFC_BUILD_DISC_LIST: - case LPFC_DISC_AUTH: case LPFC_CLEAR_LA: case LPFC_HBA_READY: /* Links up, beyond this port_type reports state */ @@ -1545,11 +1815,12 @@ static void lpfc_get_host_speed(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; spin_lock_irq(shost->host_lock); - if (phba->hba_state == LPFC_HBA_READY) { + if (lpfc_is_link_up(phba)) { switch(phba->fc_linkspeed) { case LA_1GHZ_LINK: fc_host_speed(shost) = FC_PORTSPEED_1GBIT; @@ -1575,38 +1846,30 @@ static void lpfc_get_host_fabric_name (struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; u64 node_name; spin_lock_irq(shost->host_lock); - if ((phba->fc_flag & FC_FABRIC) || + if ((vport->fc_flag & FC_FABRIC) || ((phba->fc_topology == TOPOLOGY_LOOP) && - (phba->fc_flag & FC_PUBLIC_LOOP))) + (vport->fc_flag & FC_PUBLIC_LOOP))) node_name = wwn_to_u64(phba->fc_fabparam.nodeName.u.wwn); else /* fabric is local port if there is no F/FL_Port */ - node_name = wwn_to_u64(phba->fc_nodename.u.wwn); + node_name = wwn_to_u64(vport->fc_nodename.u.wwn); spin_unlock_irq(shost->host_lock); fc_host_fabric_name(shost) = node_name; } -static void -lpfc_get_host_symbolic_name (struct Scsi_Host *shost) -{ - struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; - - spin_lock_irq(shost->host_lock); - lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost)); - spin_unlock_irq(shost->host_lock); -} - static struct fc_host_statistics * lpfc_get_stats(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_sli *psli = &phba->sli; struct fc_host_statistics *hs = &phba->link_stats; struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets; @@ -1615,7 +1878,16 @@ unsigned long seconds; int rc = 0; - if (phba->fc_flag & FC_BLOCK_MGMT_IO) + /* + * prevent udev from issuing mailbox commands until the port is + * configured. + */ + if (phba->link_state < LPFC_LINK_DOWN || + !phba->mbox_mem_pool || + (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0) + return NULL; + + if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) return NULL; pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -1627,17 +1899,16 @@ pmb->mbxCommand = MBX_READ_STATUS; pmb->mbxOwner = OWN_HOST; pmboxq->context1 = NULL; + pmboxq->vport = vport; - if ((phba->fc_flag & FC_OFFLINE_MODE) || + if ((vport->fc_flag & FC_OFFLINE_MODE) || (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); else rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); if (rc != MBX_SUCCESS) { - if (rc == MBX_TIMEOUT) - pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - else + if (rc != MBX_TIMEOUT) mempool_free(pmboxq, phba->mbox_mem_pool); return NULL; } @@ -1653,18 +1924,17 @@ pmb->mbxCommand = MBX_READ_LNK_STAT; pmb->mbxOwner = OWN_HOST; pmboxq->context1 = NULL; + pmboxq->vport = vport; - if ((phba->fc_flag & FC_OFFLINE_MODE) || + if ((vport->fc_flag & FC_OFFLINE_MODE) || (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); else rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); if (rc != MBX_SUCCESS) { - if (rc == MBX_TIMEOUT) - pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - else - mempool_free( pmboxq, phba->mbox_mem_pool); + if (rc != MBX_TIMEOUT) + mempool_free(pmboxq, phba->mbox_mem_pool); return NULL; } @@ -1711,14 +1981,15 @@ static void lpfc_reset_stats(struct Scsi_Host *shost) { - struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_sli *psli = &phba->sli; - struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets; + struct lpfc_lnk_stat *lso = &psli->lnk_stat_offsets; LPFC_MBOXQ_t *pmboxq; MAILBOX_t *pmb; int rc = 0; - if (phba->fc_flag & FC_BLOCK_MGMT_IO) + if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) return; pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -1731,17 +2002,16 @@ pmb->mbxOwner = OWN_HOST; pmb->un.varWords[0] = 0x1; /* reset request */ pmboxq->context1 = NULL; + pmboxq->vport = vport; - if ((phba->fc_flag & FC_OFFLINE_MODE) || + if ((vport->fc_flag & FC_OFFLINE_MODE) || (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); else rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); if (rc != MBX_SUCCESS) { - if (rc == MBX_TIMEOUT) - pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - else + if (rc != MBX_TIMEOUT) mempool_free(pmboxq, phba->mbox_mem_pool); return; } @@ -1750,17 +2020,16 @@ pmb->mbxCommand = MBX_READ_LNK_STAT; pmb->mbxOwner = OWN_HOST; pmboxq->context1 = NULL; + pmboxq->vport = vport; - if ((phba->fc_flag & FC_OFFLINE_MODE) || + if ((vport->fc_flag & FC_OFFLINE_MODE) || (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); else rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); if (rc != MBX_SUCCESS) { - if (rc == MBX_TIMEOUT) - pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - else + if (rc != MBX_TIMEOUT) mempool_free( pmboxq, phba->mbox_mem_pool); return; } @@ -1790,12 +2059,12 @@ lpfc_get_node_by_target(struct scsi_target *starget) { struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); - struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_nodelist *ndlp; spin_lock_irq(shost->host_lock); /* Search for this, mapped, target ID */ - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state == NLP_STE_MAPPED_NODE && starget->id == ndlp->nlp_sid) { spin_unlock_irq(shost->host_lock); @@ -1885,8 +2154,66 @@ .get_host_fabric_name = lpfc_get_host_fabric_name, .show_host_fabric_name = 1, - .get_host_symbolic_name = lpfc_get_host_symbolic_name, - .show_host_symbolic_name = 1, + /* + * The LPFC driver treats linkdown handling as target loss events + * so there are no sysfs handlers for link_down_tmo. + */ + + .get_fc_host_stats = lpfc_get_stats, + .reset_fc_host_stats = lpfc_reset_stats, + + .dd_fcrport_size = sizeof(struct lpfc_rport_data), + .show_rport_maxframe_size = 1, + .show_rport_supported_classes = 1, + + .set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo, + .show_rport_dev_loss_tmo = 1, + + .get_starget_port_id = lpfc_get_starget_port_id, + .show_starget_port_id = 1, + + .get_starget_node_name = lpfc_get_starget_node_name, + .show_starget_node_name = 1, + + .get_starget_port_name = lpfc_get_starget_port_name, + .show_starget_port_name = 1, + + .issue_fc_host_lip = lpfc_issue_lip, + .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk, + .terminate_rport_io = lpfc_terminate_rport_io, + + .vport_create = lpfc_vport_create, + .vport_delete = lpfc_vport_delete, + .dd_fcvport_size = sizeof(struct lpfc_vport *), +}; + +struct fc_function_template lpfc_vport_transport_functions = { + /* fixed attributes the driver supports */ + .show_host_node_name = 1, + .show_host_port_name = 1, + .show_host_supported_classes = 1, + .show_host_supported_fc4s = 1, + .show_host_supported_speeds = 1, + .show_host_maxframe_size = 1, + + /* dynamic attributes the driver supports */ + .get_host_port_id = lpfc_get_host_port_id, + .show_host_port_id = 1, + + .get_host_port_type = lpfc_get_host_port_type, + .show_host_port_type = 1, + + .get_host_port_state = lpfc_get_host_port_state, + .show_host_port_state = 1, + + /* active_fc4s is shown but doesn't change (thus no get function) */ + .show_host_active_fc4s = 1, + + .get_host_speed = lpfc_get_host_speed, + .show_host_speed = 1, + + .get_host_fabric_name = lpfc_get_host_fabric_name, + .show_host_fabric_name = 1, /* * The LPFC driver treats linkdown handling as target loss events @@ -1915,6 +2242,8 @@ .issue_fc_host_lip = lpfc_issue_lip, .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk, .terminate_rport_io = lpfc_terminate_rport_io, + + .vport_disable = lpfc_vport_disable, }; void @@ -1937,6 +2266,9 @@ lpfc_discovery_threads_init(phba, lpfc_discovery_threads); lpfc_max_luns_init(phba, lpfc_max_luns); lpfc_poll_tmo_init(phba, lpfc_poll_tmo); + lpfc_peer_port_login_init(phba, lpfc_peer_port_login); + lpfc_npiv_enable_init(phba, lpfc_npiv_enable); + lpfc_vport_restrict_login_init(phba, lpfc_vport_restrict_login); lpfc_use_msi_init(phba, lpfc_use_msi); lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo); lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo); diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_crtn.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_crtn.h 2007-12-21 15:36:12.000000000 -0500 @@ -23,92 +23,114 @@ struct fc_rport; void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t); void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *); +void lpfc_heart_beat(struct lpfc_hba *, LPFC_MBOXQ_t *); int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, struct lpfc_dmabuf *mp); void lpfc_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *); +void lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport); void lpfc_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *); -int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *); +int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *, int); void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_read_lnk_stat(struct lpfc_hba *, LPFC_MBOXQ_t *); -int lpfc_reg_login(struct lpfc_hba *, uint32_t, uint8_t *, LPFC_MBOXQ_t *, - uint32_t); -void lpfc_unreg_login(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *); -void lpfc_unreg_did(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *); +int lpfc_reg_login(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *, + LPFC_MBOXQ_t *, uint32_t); +void lpfc_unreg_login(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *); +void lpfc_unreg_did(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *); +void lpfc_reg_vpi(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *); +void lpfc_unreg_vpi(struct lpfc_hba *, uint16_t, LPFC_MBOXQ_t *); void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t); - +void lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove); int lpfc_linkdown(struct lpfc_hba *); void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); +void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); -void lpfc_dequeue_node(struct lpfc_hba *, struct lpfc_nodelist *); -void lpfc_nlp_set_state(struct lpfc_hba *, struct lpfc_nodelist *, int); -void lpfc_drop_node(struct lpfc_hba *, struct lpfc_nodelist *); -void lpfc_set_disctmo(struct lpfc_hba *); -int lpfc_can_disctmo(struct lpfc_hba *); -int lpfc_unreg_rpi(struct lpfc_hba *, struct lpfc_nodelist *); +void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *); +void lpfc_nlp_set_state(struct lpfc_vport *, struct lpfc_nodelist *, int); +void lpfc_drop_node(struct lpfc_vport *, struct lpfc_nodelist *); +void lpfc_set_disctmo(struct lpfc_vport *); +int lpfc_can_disctmo(struct lpfc_vport *); +int lpfc_unreg_rpi(struct lpfc_vport *, struct lpfc_nodelist *); +void lpfc_unreg_all_rpis(struct lpfc_vport *); +void lpfc_unreg_default_rpis(struct lpfc_vport *); +void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *); + int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *, struct lpfc_nodelist *); -void lpfc_nlp_init(struct lpfc_hba *, struct lpfc_nodelist *, uint32_t); +void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t); struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); int lpfc_nlp_put(struct lpfc_nodelist *); -struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_hba *, uint32_t); -void lpfc_disc_list_loopmap(struct lpfc_hba *); -void lpfc_disc_start(struct lpfc_hba *); -void lpfc_disc_flush_list(struct lpfc_hba *); +struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t); +void lpfc_disc_list_loopmap(struct lpfc_vport *); +void lpfc_disc_start(struct lpfc_vport *); +void lpfc_disc_flush_list(struct lpfc_vport *); +void lpfc_cleanup_discovery_resources(struct lpfc_vport *); void lpfc_disc_timeout(unsigned long); -struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi); -struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi); +struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_vport *, uint16_t); +struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_vport *, uint16_t); +void lpfc_worker_wake_up(struct lpfc_hba *); int lpfc_workq_post_event(struct lpfc_hba *, void *, void *, uint32_t); int lpfc_do_work(void *); -int lpfc_disc_state_machine(struct lpfc_hba *, struct lpfc_nodelist *, void *, +int lpfc_disc_state_machine(struct lpfc_vport *, struct lpfc_nodelist *, void *, uint32_t); -int lpfc_check_sparm(struct lpfc_hba *, struct lpfc_nodelist *, +void lpfc_register_new_vport(struct lpfc_hba *, struct lpfc_vport *, + struct lpfc_nodelist *); +void lpfc_do_scr_ns_plogi(struct lpfc_hba *, struct lpfc_vport *); +int lpfc_check_sparm(struct lpfc_vport *, struct lpfc_nodelist *, struct serv_parm *, uint32_t); -int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist * ndlp); +int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *); +int lpfc_els_chk_latt(struct lpfc_vport *); int lpfc_els_abort_flogi(struct lpfc_hba *); -int lpfc_initial_flogi(struct lpfc_hba *); -int lpfc_issue_els_plogi(struct lpfc_hba *, uint32_t, uint8_t); -int lpfc_issue_els_prli(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t); -int lpfc_issue_els_adisc(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t); -int lpfc_issue_els_logo(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t); -int lpfc_issue_els_scr(struct lpfc_hba *, uint32_t, uint8_t); +int lpfc_initial_flogi(struct lpfc_vport *); +int lpfc_initial_fdisc(struct lpfc_vport *); +int lpfc_issue_els_fdisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); +int lpfc_issue_els_plogi(struct lpfc_vport *, uint32_t, uint8_t); +int lpfc_issue_els_prli(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); +int lpfc_issue_els_adisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); +int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); +int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *); +int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t); int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *); -int lpfc_els_rsp_acc(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *, +int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *); +int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *, struct lpfc_nodelist *, LPFC_MBOXQ_t *, uint8_t); -int lpfc_els_rsp_reject(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *, +int lpfc_els_rsp_reject(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *, + struct lpfc_nodelist *, LPFC_MBOXQ_t *); +int lpfc_els_rsp_adisc_acc(struct lpfc_vport *, struct lpfc_iocbq *, struct lpfc_nodelist *); -int lpfc_els_rsp_adisc_acc(struct lpfc_hba *, struct lpfc_iocbq *, +int lpfc_els_rsp_prli_acc(struct lpfc_vport *, struct lpfc_iocbq *, struct lpfc_nodelist *); -int lpfc_els_rsp_prli_acc(struct lpfc_hba *, struct lpfc_iocbq *, - struct lpfc_nodelist *); -void lpfc_cancel_retry_delay_tmo(struct lpfc_hba *, struct lpfc_nodelist *); +void lpfc_cancel_retry_delay_tmo(struct lpfc_vport *, struct lpfc_nodelist *); void lpfc_els_retry_delay(unsigned long); void lpfc_els_retry_delay_handler(struct lpfc_nodelist *); +void lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *); void lpfc_els_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -int lpfc_els_handle_rscn(struct lpfc_hba *); -int lpfc_els_flush_rscn(struct lpfc_hba *); -int lpfc_rscn_payload_check(struct lpfc_hba *, uint32_t); -void lpfc_els_flush_cmd(struct lpfc_hba *); -int lpfc_els_disc_adisc(struct lpfc_hba *); -int lpfc_els_disc_plogi(struct lpfc_hba *); +int lpfc_els_handle_rscn(struct lpfc_vport *); +void lpfc_els_flush_rscn(struct lpfc_vport *); +int lpfc_rscn_payload_check(struct lpfc_vport *, uint32_t); +void lpfc_els_flush_cmd(struct lpfc_vport *); +int lpfc_els_disc_adisc(struct lpfc_vport *); +int lpfc_els_disc_plogi(struct lpfc_vport *); void lpfc_els_timeout(unsigned long); -void lpfc_els_timeout_handler(struct lpfc_hba *); +void lpfc_els_timeout_handler(struct lpfc_vport *); +void lpfc_hb_timeout(unsigned long); +void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -int lpfc_ns_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int); -int lpfc_fdmi_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int); +int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); +int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); void lpfc_fdmi_tmo(unsigned long); -void lpfc_fdmi_tmo_handler(struct lpfc_hba *); +void lpfc_fdmi_timeout_handler(struct lpfc_vport *vport); int lpfc_config_port_prep(struct lpfc_hba *); int lpfc_config_port_post(struct lpfc_hba *); @@ -136,16 +158,23 @@ void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *); LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *); +void lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *); int lpfc_mbox_tmo_val(struct lpfc_hba *, int); +void lpfc_config_hbq(struct lpfc_hba *, struct lpfc_hbq_init *, uint32_t , + LPFC_MBOXQ_t *); +struct lpfc_hbq_entry * lpfc_sli_next_hbq_slot(struct lpfc_hba *, uint32_t); + int lpfc_mem_alloc(struct lpfc_hba *); void lpfc_mem_free(struct lpfc_hba *); +void lpfc_stop_vport_timers(struct lpfc_vport *); void lpfc_poll_timeout(unsigned long ptr); void lpfc_poll_start_timer(struct lpfc_hba * phba); void lpfc_sli_poll_fcp_ring(struct lpfc_hba * hba); struct lpfc_iocbq * lpfc_sli_get_iocbq(struct lpfc_hba *); void lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb); +void __lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb); uint16_t lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocb); void lpfc_reset_barrier(struct lpfc_hba * phba); @@ -154,6 +183,7 @@ int lpfc_sli_brdreset(struct lpfc_hba *); int lpfc_sli_brdrestart(struct lpfc_hba *); int lpfc_sli_hba_setup(struct lpfc_hba *); +int lpfc_sli_host_down(struct lpfc_vport *); int lpfc_sli_hba_down(struct lpfc_hba *); int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t); int lpfc_sli_handle_mb_event(struct lpfc_hba *); @@ -164,12 +194,17 @@ int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *, uint32_t); void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t); -int lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *); +void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *); int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_dmabuf *); struct lpfc_dmabuf *lpfc_sli_ringpostbuf_get(struct lpfc_hba *, struct lpfc_sli_ring *, dma_addr_t); +int lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *, uint32_t); +int lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *, uint32_t); +void lpfc_sli_hbqbuf_free_all(struct lpfc_hba *); +struct hbq_dmabuf *lpfc_sli_hbqbuf_find(struct lpfc_hba *, uint32_t); +int lpfc_sli_hbq_size(void); int lpfc_sli_issue_abort_iotag(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); int lpfc_sli_sum_iocb(struct lpfc_hba *, struct lpfc_sli_ring *, uint16_t, @@ -180,8 +215,12 @@ void lpfc_mbox_timeout(unsigned long); void lpfc_mbox_timeout_handler(struct lpfc_hba *); -struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_hba *, uint32_t); -struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_hba *, struct lpfc_name *); +struct lpfc_nodelist *__lpfc_find_node(struct lpfc_vport *, node_filter, + void *); +struct lpfc_nodelist *lpfc_find_node(struct lpfc_vport *, node_filter, void *); +struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_vport *, uint32_t); +struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_vport *, + struct lpfc_name *); int lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq, uint32_t timeout); @@ -195,25 +234,56 @@ struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb); +void *lpfc_hbq_alloc(struct lpfc_hba *, int, dma_addr_t *); +void lpfc_hbq_free(struct lpfc_hba *, void *, dma_addr_t); +void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *); + void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *); +void __lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t); void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t); +void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *); /* Function prototypes. */ const char* lpfc_info(struct Scsi_Host *); -void lpfc_scan_start(struct Scsi_Host *); int lpfc_scan_finished(struct Scsi_Host *, unsigned long); void lpfc_get_cfgparam(struct lpfc_hba *); -int lpfc_alloc_sysfs_attr(struct lpfc_hba *); -void lpfc_free_sysfs_attr(struct lpfc_hba *); -extern struct class_device_attribute *lpfc_host_attrs[]; +int lpfc_alloc_sysfs_attr(struct lpfc_vport *); +void lpfc_free_sysfs_attr(struct lpfc_vport *); +extern struct class_device_attribute *lpfc_hba_attrs[]; extern struct scsi_host_template lpfc_template; extern struct fc_function_template lpfc_transport_functions; +extern struct fc_function_template lpfc_vport_transport_functions; +extern int lpfc_sli_mode; -void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp); +int lpfc_vport_symbolic_node_name(struct lpfc_vport *, char *, size_t); void lpfc_terminate_rport_io(struct fc_rport *); void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport); +struct lpfc_vport *lpfc_create_port(struct lpfc_hba *, int, struct fc_vport *); +int lpfc_vport_disable(struct fc_vport *fc_vport, bool disable); +void lpfc_mbx_unreg_vpi(struct lpfc_vport *); +void destroy_port(struct lpfc_vport *); +int lpfc_get_instance(void); +void lpfc_host_attrib_init(struct Scsi_Host *); + +extern void lpfc_debugfs_initialize(struct lpfc_vport *); +extern void lpfc_debugfs_terminate(struct lpfc_vport *); +extern void lpfc_debugfs_disc_trc(struct lpfc_vport *, int, char *, uint32_t, + uint32_t, uint32_t); + +/* Interface exported by fabric iocb scheduler */ +int lpfc_issue_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *); +void lpfc_fabric_abort_vport(struct lpfc_vport *); +void lpfc_fabric_abort_nport(struct lpfc_nodelist *); +void lpfc_fabric_abort_hba(struct lpfc_hba *); +void lpfc_fabric_abort_flogi(struct lpfc_hba *); +void lpfc_fabric_block_timeout(unsigned long); +void lpfc_unblock_fabric_iocbs(struct lpfc_hba *); +void lpfc_adjust_queue_depth(struct lpfc_hba *); +void lpfc_ramp_down_queue_handler(struct lpfc_hba *); +void lpfc_ramp_up_queue_handler(struct lpfc_hba *); + #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code) #define HBA_EVENT_RSCN 5 #define HBA_EVENT_LINK_UP 2 diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_ct.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_ct.c 2007-12-21 15:36:12.000000000 -0500 @@ -40,6 +40,8 @@ #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_version.h" +#include "lpfc_vport.h" +#include "lpfc_debugfs.h" #define HBA_PORTSPEED_UNKNOWN 0 /* Unknown - transceiver * incapable of reporting */ @@ -58,24 +60,68 @@ /* * lpfc_ct_unsol_event */ +static void +lpfc_ct_unsol_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq, + struct lpfc_dmabuf *mp, uint32_t size) +{ + if (!mp) { + printk(KERN_ERR "%s (%d): Unsolited CT, no buffer, " + "piocbq = %p, status = x%x, mp = %p, size = %d\n", + __FUNCTION__, __LINE__, + piocbq, piocbq->iocb.ulpStatus, mp, size); + } + + printk(KERN_ERR "%s (%d): Ignoring unsolicted CT piocbq = %p, " + "buffer = %p, size = %d, status = x%x\n", + __FUNCTION__, __LINE__, + piocbq, mp, size, + piocbq->iocb.ulpStatus); + +} + +static void +lpfc_ct_ignore_hbq_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq, + struct lpfc_dmabuf *mp, uint32_t size) +{ + if (!mp) { + printk(KERN_ERR "%s (%d): Unsolited CT, no " + "HBQ buffer, piocbq = %p, status = x%x\n", + __FUNCTION__, __LINE__, + piocbq, piocbq->iocb.ulpStatus); + } else { + lpfc_ct_unsol_buffer(phba, piocbq, mp, size); + printk(KERN_ERR "%s (%d): Ignoring unsolicted CT " + "piocbq = %p, buffer = %p, size = %d, " + "status = x%x\n", + __FUNCTION__, __LINE__, + piocbq, mp, size, piocbq->iocb.ulpStatus); + } +} + void -lpfc_ct_unsol_event(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocbq) +lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *piocbq) { - struct lpfc_iocbq *next_piocbq; - struct lpfc_dmabuf *pmbuf = NULL; - struct lpfc_dmabuf *matp, *next_matp; - uint32_t ctx = 0, size = 0, cnt = 0; + struct lpfc_dmabuf *mp = NULL; IOCB_t *icmd = &piocbq->iocb; - IOCB_t *save_icmd = icmd; - int i, go_exit = 0; - struct list_head head; + int i; + struct lpfc_iocbq *iocbq; + dma_addr_t paddr; + uint32_t size; + struct lpfc_dmabuf *bdeBuf1 = piocbq->context2; + struct lpfc_dmabuf *bdeBuf2 = piocbq->context3; + + piocbq->context2 = NULL; + piocbq->context3 = NULL; - if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) && + if (unlikely(icmd->ulpStatus == IOSTAT_NEED_BUFFER)) { + lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ); + } else if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) && ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) { /* Not enough posted buffers; Try posting more buffers */ phba->fc_stat.NoRcvBuf++; + if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)) lpfc_post_buffer(phba, pring, 0, 1); return; } @@ -86,66 +132,56 @@ if (icmd->ulpBdeCount == 0) return; - INIT_LIST_HEAD(&head); - list_add_tail(&head, &piocbq->list); - - list_for_each_entry_safe(piocbq, next_piocbq, &head, list) { - icmd = &piocbq->iocb; - if (ctx == 0) - ctx = (uint32_t) (icmd->ulpContext); - if (icmd->ulpBdeCount == 0) + if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { + list_for_each_entry(iocbq, &piocbq->list, list) { + icmd = &iocbq->iocb; + if (icmd->ulpBdeCount == 0) { + printk(KERN_ERR "%s (%d): Unsolited CT, no " + "BDE, iocbq = %p, status = x%x\n", + __FUNCTION__, __LINE__, + iocbq, iocbq->iocb.ulpStatus); continue; - - for (i = 0; i < icmd->ulpBdeCount; i++) { - matp = lpfc_sli_ringpostbuf_get(phba, pring, - getPaddr(icmd->un. - cont64[i]. - addrHigh, - icmd->un. - cont64[i]. - addrLow)); - if (!matp) { - /* Insert lpfc log message here */ - lpfc_post_buffer(phba, pring, cnt, 1); - go_exit = 1; - goto ct_unsol_event_exit_piocbq; - } - - /* Typically for Unsolicited CT requests */ - if (!pmbuf) { - pmbuf = matp; - INIT_LIST_HEAD(&pmbuf->list); - } else - list_add_tail(&matp->list, &pmbuf->list); - - size += icmd->un.cont64[i].tus.f.bdeSize; - cnt++; } - icmd->ulpBdeCount = 0; + size = icmd->un.cont64[0].tus.f.bdeSize; + lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf1, size); + lpfc_in_buf_free(phba, bdeBuf1); + if (icmd->ulpBdeCount == 2) { + lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf2, + size); + lpfc_in_buf_free(phba, bdeBuf2); + } } + } else { + struct lpfc_iocbq *next; - lpfc_post_buffer(phba, pring, cnt, 1); - if (save_icmd->ulpStatus) { - go_exit = 1; + list_for_each_entry_safe(iocbq, next, &piocbq->list, list) { + icmd = &iocbq->iocb; + if (icmd->ulpBdeCount == 0) { + printk(KERN_ERR "%s (%d): Unsolited CT, no " + "BDE, iocbq = %p, status = x%x\n", + __FUNCTION__, __LINE__, + iocbq, iocbq->iocb.ulpStatus); + continue; } -ct_unsol_event_exit_piocbq: - list_del(&head); - if (pmbuf) { - list_for_each_entry_safe(matp, next_matp, &pmbuf->list, list) { - lpfc_mbuf_free(phba, matp->virt, matp->phys); - list_del(&matp->list); - kfree(matp); + for (i = 0; i < icmd->ulpBdeCount; i++) { + paddr = getPaddr(icmd->un.cont64[i].addrHigh, + icmd->un.cont64[i].addrLow); + mp = lpfc_sli_ringpostbuf_get(phba, pring, + paddr); + size = icmd->un.cont64[i].tus.f.bdeSize; + lpfc_ct_unsol_buffer(phba, piocbq, mp, size); + lpfc_in_buf_free(phba, mp); + } + list_del(&iocbq->list); + lpfc_sli_release_iocbq(phba, iocbq); } - lpfc_mbuf_free(phba, pmbuf->virt, pmbuf->phys); - kfree(pmbuf); } - return; } static void -lpfc_free_ct_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mlist) +lpfc_free_ct_rsp(struct lpfc_hba *phba, struct lpfc_dmabuf *mlist) { struct lpfc_dmabuf *mlast, *next_mlast; @@ -160,7 +196,7 @@ } static struct lpfc_dmabuf * -lpfc_alloc_ct_rsp(struct lpfc_hba * phba, int cmdcode, struct ulp_bde64 * bpl, +lpfc_alloc_ct_rsp(struct lpfc_hba *phba, int cmdcode, struct ulp_bde64 *bpl, uint32_t size, int *entries) { struct lpfc_dmabuf *mlist = NULL; @@ -181,7 +217,8 @@ INIT_LIST_HEAD(&mp->list); - if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT)) + if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT) || + cmdcode == be16_to_cpu(SLI_CTNS_GFF_ID)) mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys)); else mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys)); @@ -201,8 +238,8 @@ bpl->tus.f.bdeFlags = BUFF_USE_RCV; /* build buffer ptr list for IOCB */ - bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) ); - bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) ); + bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) ); + bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) ); bpl->tus.f.bdeSize = (uint16_t) cnt; bpl->tus.w = le32_to_cpu(bpl->tus.w); bpl++; @@ -215,24 +252,49 @@ return mlist; } +int +lpfc_ct_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *ctiocb) +{ + struct lpfc_dmabuf *buf_ptr; + + if (ctiocb->context1) { + buf_ptr = (struct lpfc_dmabuf *) ctiocb->context1; + lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); + kfree(buf_ptr); + ctiocb->context1 = NULL; + } + if (ctiocb->context2) { + lpfc_free_ct_rsp(phba, (struct lpfc_dmabuf *) ctiocb->context2); + ctiocb->context2 = NULL; + } + + if (ctiocb->context3) { + buf_ptr = (struct lpfc_dmabuf *) ctiocb->context3; + lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); + kfree(buf_ptr); + ctiocb->context1 = NULL; + } + lpfc_sli_release_iocbq(phba, ctiocb); + return 0; +} + static int -lpfc_gen_req(struct lpfc_hba *phba, struct lpfc_dmabuf *bmp, +lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, struct lpfc_dmabuf *inp, struct lpfc_dmabuf *outp, void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *), struct lpfc_nodelist *ndlp, uint32_t usr_flg, uint32_t num_entry, - uint32_t tmo) + uint32_t tmo, uint8_t retry) { - + struct lpfc_hba *phba = vport->phba; struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; IOCB_t *icmd; struct lpfc_iocbq *geniocb; + int rc; /* Allocate buffer for command iocb */ - spin_lock_irq(phba->host->host_lock); geniocb = lpfc_sli_get_iocbq(phba); - spin_unlock_irq(phba->host->host_lock); if (geniocb == NULL) return 1; @@ -272,31 +334,40 @@ icmd->ulpClass = CLASS3; icmd->ulpContext = ndlp->nlp_rpi; + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { + /* For GEN_REQUEST64_CR, use the RPI */ + icmd->ulpCt_h = 0; + icmd->ulpCt_l = 0; + } + /* Issue GEN REQ IOCB for NPORT */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0119 Issue GEN REQ IOCB for NPORT x%x " - "Data: x%x x%x\n", phba->brd_no, icmd->un.ulpWord[5], - icmd->ulpIoTag, phba->hba_state); + "%d (%d):0119 Issue GEN REQ IOCB to NPORT x%x " + "Data: x%x x%x\n", phba->brd_no, vport->vpi, + ndlp->nlp_DID, icmd->ulpIoTag, + vport->port_state); geniocb->iocb_cmpl = cmpl; geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT; - spin_lock_irq(phba->host->host_lock); - if (lpfc_sli_issue_iocb(phba, pring, geniocb, 0) == IOCB_ERROR) { + geniocb->vport = vport; + geniocb->retry = retry; + rc = lpfc_sli_issue_iocb(phba, pring, geniocb, 0); + + if (rc == IOCB_ERROR) { lpfc_sli_release_iocbq(phba, geniocb); - spin_unlock_irq(phba->host->host_lock); return 1; } - spin_unlock_irq(phba->host->host_lock); return 0; } static int -lpfc_ct_cmd(struct lpfc_hba *phba, struct lpfc_dmabuf *inmp, +lpfc_ct_cmd(struct lpfc_vport *vport, struct lpfc_dmabuf *inmp, struct lpfc_dmabuf *bmp, struct lpfc_nodelist *ndlp, void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *), - uint32_t rsp_size) + uint32_t rsp_size, uint8_t retry) { + struct lpfc_hba *phba = vport->phba; struct ulp_bde64 *bpl = (struct ulp_bde64 *) bmp->virt; struct lpfc_dmabuf *outmp; int cnt = 0, status; @@ -310,8 +381,8 @@ if (!outmp) return -ENOMEM; - status = lpfc_gen_req(phba, bmp, inmp, outmp, cmpl, ndlp, 0, - cnt+1, 0); + status = lpfc_gen_req(vport, bmp, inmp, outmp, cmpl, ndlp, 0, + cnt+1, 0, retry); if (status) { lpfc_free_ct_rsp(phba, outmp); return -ENOMEM; @@ -319,20 +390,35 @@ return 0; } +static struct lpfc_vport * +lpfc_find_vport_by_did(struct lpfc_hba *phba, uint32_t did) { + + struct lpfc_vport *vport_curr; + + list_for_each_entry(vport_curr, &phba->port_list, listentry) { + if ((vport_curr->fc_myDID) && + (vport_curr->fc_myDID == did)) + return vport_curr; + } + + return NULL; +} + static int -lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size) +lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size) { + struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ct_request *Response = (struct lpfc_sli_ct_request *) mp->virt; struct lpfc_nodelist *ndlp = NULL; struct lpfc_dmabuf *mlast, *next_mp; uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType; - uint32_t Did; - uint32_t CTentry; + uint32_t Did, CTentry; int Cnt; struct list_head head; - lpfc_set_disctmo(phba); + lpfc_set_disctmo(vport); + vport->num_disc_nodes = 0; list_add_tail(&head, &mp->list); @@ -350,39 +436,96 @@ /* Loop through entire NameServer list of DIDs */ while (Cnt >= sizeof (uint32_t)) { - /* Get next DID from NameServer List */ CTentry = *ctptr++; Did = ((be32_to_cpu(CTentry)) & Mask_DID); ndlp = NULL; - if (Did != phba->fc_myDID) { - /* Check for rscn processing or not */ - ndlp = lpfc_setup_disc_node(phba, Did); - } - /* Mark all node table entries that are in the - Nameserver */ + + /* + * Check for rscn processing or not + * To conserve rpi's, filter out addresses for other + * vports on the same physical HBAs. + */ + if ((Did != vport->fc_myDID) && + ((lpfc_find_vport_by_did(phba, Did) == NULL) || + phba->cfg_peer_port_login)) { + if ((vport->port_type != LPFC_NPIV_PORT) || + (vport->fc_flag & FC_RFF_NOT_SUPPORTED) || + (!phba->cfg_vport_restrict_login)) { + ndlp = lpfc_setup_disc_node(vport, Did); if (ndlp) { - /* NameServer Rsp */ - lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0238 Process x%x NameServer" - " Rsp Data: x%x x%x x%x\n", - phba->brd_no, + lpfc_debugfs_disc_trc(vport, + LPFC_DISC_TRC_CT, + "Parse GID_FTrsp: " + "did:x%x flg:x%x x%x", Did, ndlp->nlp_flag, - phba->fc_flag, - phba->fc_rscn_id_cnt); + vport->fc_flag); + + lpfc_printf_log(phba, KERN_INFO, + LOG_DISCOVERY, + "%d (%d):0238 Process " + "x%x NameServer Rsp" + "Data: x%x x%x x%x\n", + phba->brd_no, + vport->vpi, Did, + ndlp->nlp_flag, + vport->fc_flag, + vport->fc_rscn_id_cnt); } else { - /* NameServer Rsp */ - lpfc_printf_log(phba, - KERN_INFO, + lpfc_debugfs_disc_trc(vport, + LPFC_DISC_TRC_CT, + "Skip1 GID_FTrsp: " + "did:x%x flg:x%x cnt:%d", + Did, vport->fc_flag, + vport->fc_rscn_id_cnt); + + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0239 Skip x%x NameServer " - "Rsp Data: x%x x%x x%x\n", + "%d (%d):0239 Skip x%x " + "NameServer Rsp Data: " + "x%x x%x\n", phba->brd_no, - Did, Size, phba->fc_flag, - phba->fc_rscn_id_cnt); + vport->vpi, Did, + vport->fc_flag, + vport->fc_rscn_id_cnt); } + } else { + if (!(vport->fc_flag & FC_RSCN_MODE) || + (lpfc_rscn_payload_check(vport, Did))) { + lpfc_debugfs_disc_trc(vport, + LPFC_DISC_TRC_CT, + "Query GID_FTrsp: " + "did:x%x flg:x%x cnt:%d", + Did, vport->fc_flag, + vport->fc_rscn_id_cnt); + + if (lpfc_ns_cmd(vport, + SLI_CTNS_GFF_ID, + 0, Did) == 0) + vport->num_disc_nodes++; + } + else { + lpfc_debugfs_disc_trc(vport, + LPFC_DISC_TRC_CT, + "Skip2 GID_FTrsp: " + "did:x%x flg:x%x cnt:%d", + Did, vport->fc_flag, + vport->fc_rscn_id_cnt); + + lpfc_printf_log(phba, KERN_INFO, + LOG_DISCOVERY, + "%d (%d):0245 Skip x%x " + "NameServer Rsp Data: " + "x%x x%x\n", + phba->brd_no, + vport->vpi, Did, + vport->fc_flag, + vport->fc_rscn_id_cnt); + } + } + } if (CTentry & (be32_to_cpu(SLI_CT_LAST_ENTRY))) goto nsout1; Cnt -= sizeof (uint32_t); @@ -393,190 +536,369 @@ nsout1: list_del(&head); - - /* - * The driver has cycled through all Nports in the RSCN payload. - * Complete the handling by cleaning up and marking the - * current driver state. - */ - if (phba->hba_state == LPFC_HBA_READY) { - lpfc_els_flush_rscn(phba); - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */ - spin_unlock_irq(phba->host->host_lock); - } return 0; } - - - static void -lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp; - struct lpfc_sli *psli; struct lpfc_dmabuf *bmp; - struct lpfc_dmabuf *inp; struct lpfc_dmabuf *outp; - struct lpfc_nodelist *ndlp; struct lpfc_sli_ct_request *CTrsp; + int rc; - psli = &phba->sli; /* we pass cmdiocb to state machine which needs rspiocb as well */ cmdiocb->context_un.rsp_iocb = rspiocb; - inp = (struct lpfc_dmabuf *) cmdiocb->context1; outp = (struct lpfc_dmabuf *) cmdiocb->context2; bmp = (struct lpfc_dmabuf *) cmdiocb->context3; - irsp = &rspiocb->iocb; - if (irsp->ulpStatus) { - if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && - ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) || - (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) { + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "GID_FT cmpl: status:x%x/x%x rtry:%d", + irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_ns_retry); + + /* Don't bother processing response if vport is being torn down. */ + if (vport->load_flag & FC_UNLOADING) + goto out; + + + if (lpfc_els_chk_latt(vport) || lpfc_error_lost_link(irsp)) { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0216 Link event during NS query\n", + phba->brd_no, vport->vpi); + lpfc_vport_set_state(vport, FC_VPORT_FAILED); goto out; } + if (irsp->ulpStatus) { /* Check for retry */ - if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) { - phba->fc_ns_retry++; + if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) { + if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) || + (irsp->un.ulpWord[4] != IOERR_NO_RESOURCES)) + vport->fc_ns_retry++; /* CT command is being retried */ - ndlp = lpfc_findnode_did(phba, NameServer_DID); - if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { - if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == - 0) { + rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, + vport->fc_ns_retry, 0); + if (rc == 0) goto out; } - } - } + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0257 GID_FT Query error: 0x%x 0x%x\n", + phba->brd_no, vport->vpi, irsp->ulpStatus, + vport->fc_ns_retry); } else { /* Good status, continue checking */ CTrsp = (struct lpfc_sli_ct_request *) outp->virt; if (CTrsp->CommandResponse.bits.CmdRsp == be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) { lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0208 NameServer Rsp " + "%d (%d):0208 NameServer Rsp " "Data: x%x\n", - phba->brd_no, - phba->fc_flag); - lpfc_ns_rsp(phba, outp, + phba->brd_no, vport->vpi, + vport->fc_flag); + lpfc_ns_rsp(vport, outp, (uint32_t) (irsp->un.genreq64.bdl.bdeSize)); } else if (CTrsp->CommandResponse.bits.CmdRsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) { /* NameServer Rsp Error */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0240 NameServer Rsp Error " + "%d (%d):0240 NameServer Rsp Error " "Data: x%x x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, CTrsp->CommandResponse.bits.CmdRsp, (uint32_t) CTrsp->ReasonCode, (uint32_t) CTrsp->Explanation, - phba->fc_flag); + vport->fc_flag); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "GID_FT rsp err1 cmd:x%x rsn:x%x exp:x%x", + (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t) CTrsp->ReasonCode, + (uint32_t) CTrsp->Explanation); + } else { /* NameServer Rsp Error */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0241 NameServer Rsp Error " + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0241 NameServer Rsp Error " "Data: x%x x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, CTrsp->CommandResponse.bits.CmdRsp, (uint32_t) CTrsp->ReasonCode, (uint32_t) CTrsp->Explanation, - phba->fc_flag); + vport->fc_flag); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "GID_FT rsp err2 cmd:x%x rsn:x%x exp:x%x", + (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t) CTrsp->ReasonCode, + (uint32_t) CTrsp->Explanation); } } /* Link up / RSCN discovery */ - lpfc_disc_start(phba); + if (vport->num_disc_nodes == 0) { + /* + * The driver has cycled through all Nports in the RSCN payload. + * Complete the handling by cleaning up and marking the + * current driver state. + */ + if (vport->port_state >= LPFC_DISC_AUTH) { + if (vport->fc_flag & FC_RSCN_MODE) { + lpfc_els_flush_rscn(vport); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */ + spin_unlock_irq(shost->host_lock); + } + else + lpfc_els_flush_rscn(vport); + } + + lpfc_disc_start(vport); + } out: - lpfc_free_ct_rsp(phba, outp); - lpfc_mbuf_free(phba, inp->virt, inp->phys); - lpfc_mbuf_free(phba, bmp->virt, bmp->phys); - kfree(inp); - kfree(bmp); - spin_lock_irq(phba->host->host_lock); - lpfc_sli_release_iocbq(phba, cmdiocb); - spin_unlock_irq(phba->host->host_lock); + lpfc_ct_free_iocb(phba, cmdiocb); + return; +} + +void +lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp = &rspiocb->iocb; + struct lpfc_dmabuf *inp = (struct lpfc_dmabuf *) cmdiocb->context1; + struct lpfc_dmabuf *outp = (struct lpfc_dmabuf *) cmdiocb->context2; + struct lpfc_sli_ct_request *CTrsp; + int did; + uint8_t fbits; + struct lpfc_nodelist *ndlp; + + did = ((struct lpfc_sli_ct_request *) inp->virt)->un.gff.PortId; + did = be32_to_cpu(did); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "GFF_ID cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], did); + + if (irsp->ulpStatus == IOSTAT_SUCCESS) { + /* Good status, continue checking */ + CTrsp = (struct lpfc_sli_ct_request *) outp->virt; + fbits = CTrsp->un.gff_acc.fbits[FCP_TYPE_FEATURE_OFFSET]; + + if (CTrsp->CommandResponse.bits.CmdRsp == + be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) { + if ((fbits & FC4_FEATURE_INIT) && + !(fbits & FC4_FEATURE_TARGET)) { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0245 Skip x%x GFF " + "NameServer Rsp Data: (init) " + "x%x x%x\n", phba->brd_no, + vport->vpi, did, fbits, + vport->fc_rscn_id_cnt); + goto out; + } + } + } + else { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0267 NameServer GFF Rsp" + " x%x Error (%d %d) Data: x%x x%x\n", + phba->brd_no, vport->vpi, did, + irsp->ulpStatus, irsp->un.ulpWord[4], + vport->fc_flag, vport->fc_rscn_id_cnt) + } + + /* This is a target port, unregistered port, or the GFF_ID failed */ + ndlp = lpfc_setup_disc_node(vport, did); + if (ndlp) { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0242 Process x%x GFF " + "NameServer Rsp Data: x%x x%x x%x\n", + phba->brd_no, vport->vpi, + did, ndlp->nlp_flag, vport->fc_flag, + vport->fc_rscn_id_cnt); + } else { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0243 Skip x%x GFF " + "NameServer Rsp Data: x%x x%x\n", + phba->brd_no, vport->vpi, did, + vport->fc_flag, vport->fc_rscn_id_cnt); + } +out: + /* Link up / RSCN discovery */ + if (vport->num_disc_nodes) + vport->num_disc_nodes--; + if (vport->num_disc_nodes == 0) { + /* + * The driver has cycled through all Nports in the RSCN payload. + * Complete the handling by cleaning up and marking the + * current driver state. + */ + if (vport->port_state >= LPFC_DISC_AUTH) { + if (vport->fc_flag & FC_RSCN_MODE) { + lpfc_els_flush_rscn(vport); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */ + spin_unlock_irq(shost->host_lock); + } + else + lpfc_els_flush_rscn(vport); + } + lpfc_disc_start(vport); + } + lpfc_ct_free_iocb(phba, cmdiocb); return; } + static void -lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { - struct lpfc_sli *psli; - struct lpfc_dmabuf *bmp; + struct lpfc_vport *vport = cmdiocb->vport; struct lpfc_dmabuf *inp; struct lpfc_dmabuf *outp; IOCB_t *irsp; struct lpfc_sli_ct_request *CTrsp; + int cmdcode, rc; + uint8_t retry; + uint32_t latt; - psli = &phba->sli; /* we pass cmdiocb to state machine which needs rspiocb as well */ cmdiocb->context_un.rsp_iocb = rspiocb; inp = (struct lpfc_dmabuf *) cmdiocb->context1; outp = (struct lpfc_dmabuf *) cmdiocb->context2; - bmp = (struct lpfc_dmabuf *) cmdiocb->context3; irsp = &rspiocb->iocb; + cmdcode = be16_to_cpu(((struct lpfc_sli_ct_request *) inp->virt)-> + CommandResponse.bits.CmdRsp); CTrsp = (struct lpfc_sli_ct_request *) outp->virt; + latt = lpfc_els_chk_latt(vport); + /* RFT request completes status CmdRsp */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0209 RFT request completes ulpStatus x%x " - "CmdRsp x%x\n", phba->brd_no, irsp->ulpStatus, - CTrsp->CommandResponse.bits.CmdRsp); + "%d (%d):0209 RFT request completes, latt %d, " + "ulpStatus x%x CmdRsp x%x, Context x%x, Tag x%x\n", + phba->brd_no, vport->vpi, latt, irsp->ulpStatus, + CTrsp->CommandResponse.bits.CmdRsp, + cmdiocb->iocb.ulpContext, cmdiocb->iocb.ulpIoTag); - lpfc_free_ct_rsp(phba, outp); - lpfc_mbuf_free(phba, inp->virt, inp->phys); - lpfc_mbuf_free(phba, bmp->virt, bmp->phys); - kfree(inp); - kfree(bmp); - spin_lock_irq(phba->host->host_lock); - lpfc_sli_release_iocbq(phba, cmdiocb); - spin_unlock_irq(phba->host->host_lock); + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "CT cmd cmpl: status:x%x/x%x cmd:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], cmdcode); + + if (irsp->ulpStatus) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0268 NS cmd %x Error (%d %d)\n", + phba->brd_no, vport->vpi, cmdcode, + irsp->ulpStatus, irsp->un.ulpWord[4]); + + if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && + ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) || + (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) + goto out; + + retry = cmdiocb->retry; + if (retry >= LPFC_MAX_NS_RETRY) + goto out; + + retry++; + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0216 Retrying NS cmd %x\n", + phba->brd_no, vport->vpi, cmdcode); + rc = lpfc_ns_cmd(vport, cmdcode, retry, 0); + if (rc == 0) + goto out; + } + +out: + lpfc_ct_free_iocb(phba, cmdiocb); return; } static void -lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); return; } static void -lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_ct_cmd_rspn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); return; } static void -lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); return; } -void -lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp) +static void +lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { - char fwrev[16]; + IOCB_t *irsp = &rspiocb->iocb; + struct lpfc_vport *vport = cmdiocb->vport; - lpfc_decode_firmware_rev(phba, fwrev, 0); + if (irsp->ulpStatus != IOSTAT_SUCCESS) + vport->fc_flag |= FC_RFF_NOT_SUPPORTED; - sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName, - fwrev, lpfc_release_version); + lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); return; } +int +lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol, + size_t size) +{ + int n; + uint8_t *wwn = vport->phba->wwpn; + + n = snprintf(symbol, size, + "Emulex PPN-%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + wwn[0], wwn[1], wwn[2], wwn[3], + wwn[4], wwn[5], wwn[6], wwn[7]); + + if (vport->port_type == LPFC_PHYSICAL_PORT) + return n; + + if (n < size) + n += snprintf(symbol + n, size - n, " VPort-%d", vport->vpi); + + if (n < size && vport->vname) + n += snprintf(symbol + n, size - n, " VName-%s", vport->vname); + return n; +} + +int +lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol, + size_t size) +{ + char fwrev[16]; + int n; + + lpfc_decode_firmware_rev(vport->phba, fwrev, 0); + + n = snprintf(symbol, size, "Emulex %s FV%s DV%s", + vport->phba->ModelName, fwrev, lpfc_release_version); + return n; +} + /* * lpfc_ns_cmd * Description: @@ -585,55 +907,76 @@ * LI_CTNS_RFT_ID */ int -lpfc_ns_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode) +lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, + uint8_t retry, uint32_t context) { + struct lpfc_nodelist * ndlp; + struct lpfc_hba *phba = vport->phba; struct lpfc_dmabuf *mp, *bmp; struct lpfc_sli_ct_request *CtReq; struct ulp_bde64 *bpl; void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *) = NULL; uint32_t rsp_size = 1024; + size_t size; + int rc = 0; + + ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (ndlp == NULL || ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) { + rc=1; + goto ns_cmd_exit; + } /* fill in BDEs for command */ /* Allocate buffer for command payload */ mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); - if (!mp) + if (!mp) { + rc=2; goto ns_cmd_exit; + } INIT_LIST_HEAD(&mp->list); mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys)); - if (!mp->virt) + if (!mp->virt) { + rc=3; goto ns_cmd_free_mp; + } /* Allocate buffer for Buffer ptr list */ bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); - if (!bmp) + if (!bmp) { + rc=4; goto ns_cmd_free_mpvirt; + } INIT_LIST_HEAD(&bmp->list); bmp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(bmp->phys)); - if (!bmp->virt) + if (!bmp->virt) { + rc=5; goto ns_cmd_free_bmp; + } /* NameServer Req */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0236 NameServer Req Data: x%x x%x x%x\n", - phba->brd_no, cmdcode, phba->fc_flag, - phba->fc_rscn_id_cnt); + lpfc_printf_log(phba, KERN_INFO ,LOG_DISCOVERY, + "%d (%d):0236 NameServer Req Data: x%x x%x x%x\n", + phba->brd_no, vport->vpi, cmdcode, vport->fc_flag, + vport->fc_rscn_id_cnt); bpl = (struct ulp_bde64 *) bmp->virt; memset(bpl, 0, sizeof(struct ulp_bde64)); - bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) ); - bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) ); + bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) ); + bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) ); bpl->tus.f.bdeFlags = 0; if (cmdcode == SLI_CTNS_GID_FT) bpl->tus.f.bdeSize = GID_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_GFF_ID) + bpl->tus.f.bdeSize = GFF_REQUEST_SZ; else if (cmdcode == SLI_CTNS_RFT_ID) bpl->tus.f.bdeSize = RFT_REQUEST_SZ; else if (cmdcode == SLI_CTNS_RNN_ID) bpl->tus.f.bdeSize = RNN_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_RSPN_ID) + bpl->tus.f.bdeSize = RSPN_REQUEST_SZ; else if (cmdcode == SLI_CTNS_RSNN_NN) bpl->tus.f.bdeSize = RSNN_REQUEST_SZ; else if (cmdcode == SLI_CTNS_RFF_ID) @@ -654,56 +997,78 @@ CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_CTNS_GID_FT); CtReq->un.gid.Fc4Type = SLI_CTPT_FCP; - if (phba->hba_state < LPFC_HBA_READY) - phba->hba_state = LPFC_NS_QRY; - lpfc_set_disctmo(phba); + if (vport->port_state < LPFC_NS_QRY) + vport->port_state = LPFC_NS_QRY; + lpfc_set_disctmo(vport); cmpl = lpfc_cmpl_ct_cmd_gid_ft; rsp_size = FC_MAX_NS_RSP; break; + case SLI_CTNS_GFF_ID: + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_GFF_ID); + CtReq->un.gff.PortId = be32_to_cpu(context); + cmpl = lpfc_cmpl_ct_cmd_gff_id; + break; + case SLI_CTNS_RFT_ID: CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_CTNS_RFT_ID); - CtReq->un.rft.PortId = be32_to_cpu(phba->fc_myDID); + CtReq->un.rft.PortId = be32_to_cpu(vport->fc_myDID); CtReq->un.rft.fcpReg = 1; cmpl = lpfc_cmpl_ct_cmd_rft_id; break; - case SLI_CTNS_RFF_ID: - CtReq->CommandResponse.bits.CmdRsp = - be16_to_cpu(SLI_CTNS_RFF_ID); - CtReq->un.rff.PortId = be32_to_cpu(phba->fc_myDID); - CtReq->un.rff.feature_res = 0; - CtReq->un.rff.feature_tgt = 0; - CtReq->un.rff.type_code = FC_FCP_DATA; - CtReq->un.rff.feature_init = 1; - cmpl = lpfc_cmpl_ct_cmd_rff_id; - break; - case SLI_CTNS_RNN_ID: CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_CTNS_RNN_ID); - CtReq->un.rnn.PortId = be32_to_cpu(phba->fc_myDID); - memcpy(CtReq->un.rnn.wwnn, &phba->fc_nodename, + CtReq->un.rnn.PortId = be32_to_cpu(vport->fc_myDID); + memcpy(CtReq->un.rnn.wwnn, &vport->fc_nodename, sizeof (struct lpfc_name)); cmpl = lpfc_cmpl_ct_cmd_rnn_id; break; + case SLI_CTNS_RSPN_ID: + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_RSPN_ID); + CtReq->un.rspn.PortId = be32_to_cpu(vport->fc_myDID); + size = sizeof(CtReq->un.rspn.symbname); + CtReq->un.rspn.len = + lpfc_vport_symbolic_port_name(vport, + CtReq->un.rspn.symbname, size); + cmpl = lpfc_cmpl_ct_cmd_rspn_id; + break; case SLI_CTNS_RSNN_NN: CtReq->CommandResponse.bits.CmdRsp = be16_to_cpu(SLI_CTNS_RSNN_NN); - memcpy(CtReq->un.rsnn.wwnn, &phba->fc_nodename, + memcpy(CtReq->un.rsnn.wwnn, &vport->fc_nodename, sizeof (struct lpfc_name)); - lpfc_get_hba_sym_node_name(phba, CtReq->un.rsnn.symbname); - CtReq->un.rsnn.len = strlen(CtReq->un.rsnn.symbname); + size = sizeof(CtReq->un.rsnn.symbname); + CtReq->un.rsnn.len = + lpfc_vport_symbolic_node_name(vport, + CtReq->un.rsnn.symbname, size); cmpl = lpfc_cmpl_ct_cmd_rsnn_nn; break; + case SLI_CTNS_RFF_ID: + vport->fc_flag &= ~FC_RFF_NOT_SUPPORTED; + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_RFF_ID); + CtReq->un.rff.PortId = be32_to_cpu(vport->fc_myDID);; + CtReq->un.rff.fbits = FC4_FEATURE_INIT; + CtReq->un.rff.type_code = FC_FCP_DATA; + cmpl = lpfc_cmpl_ct_cmd_rff_id; + break; } - if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, rsp_size)) + if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, rsp_size, retry)) { /* On success, The cmpl function will free the buffers */ + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "Issue CT cmd: cmd:x%x did:x%x", + cmdcode, ndlp->nlp_DID, 0); return 0; + } + rc=6; lpfc_mbuf_free(phba, bmp->virt, bmp->phys); ns_cmd_free_bmp: kfree(bmp); @@ -712,14 +1077,17 @@ ns_cmd_free_mp: kfree(mp); ns_cmd_exit: + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0266 Issue NameServer Req x%x err %d Data: x%x x%x\n", + phba->brd_no, vport->vpi, cmdcode, rc, vport->fc_flag, + vport->fc_rscn_id_cnt); return 1; } static void -lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba * phba, - struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb) +lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq * rspiocb) { - struct lpfc_dmabuf *bmp = cmdiocb->context3; struct lpfc_dmabuf *inp = cmdiocb->context1; struct lpfc_dmabuf *outp = cmdiocb->context2; struct lpfc_sli_ct_request *CTrsp = outp->virt; @@ -727,48 +1095,60 @@ struct lpfc_nodelist *ndlp; uint16_t fdmi_cmd = CTcmd->CommandResponse.bits.CmdRsp; uint16_t fdmi_rsp = CTrsp->CommandResponse.bits.CmdRsp; + struct lpfc_vport *vport = cmdiocb->vport; + IOCB_t *irsp = &rspiocb->iocb; + uint32_t latt; + + latt = lpfc_els_chk_latt(vport); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "FDMI cmpl: status:x%x/x%x latt:%d", + irsp->ulpStatus, irsp->un.ulpWord[4], latt); - ndlp = lpfc_findnode_did(phba, FDMI_DID); + if (latt || irsp->ulpStatus) { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0229 FDMI cmd %04x failed, latt = %d " + "ulpStatus: x%x, rid x%x\n", + phba->brd_no, vport->vpi, + be16_to_cpu(fdmi_cmd), latt, irsp->ulpStatus, + irsp->un.ulpWord[4]); + lpfc_ct_free_iocb(phba, cmdiocb); + return; + } + + ndlp = lpfc_findnode_did(vport, FDMI_DID); if (fdmi_rsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) { /* FDMI rsp failed */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0220 FDMI rsp failed Data: x%x\n", - phba->brd_no, + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0220 FDMI rsp failed Data: x%x\n", + phba->brd_no, vport->vpi, be16_to_cpu(fdmi_cmd)); } switch (be16_to_cpu(fdmi_cmd)) { case SLI_MGMT_RHBA: - lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RPA); + lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RPA); break; case SLI_MGMT_RPA: break; case SLI_MGMT_DHBA: - lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DPRT); + lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DPRT); break; case SLI_MGMT_DPRT: - lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RHBA); + lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RHBA); break; } - - lpfc_free_ct_rsp(phba, outp); - lpfc_mbuf_free(phba, inp->virt, inp->phys); - lpfc_mbuf_free(phba, bmp->virt, bmp->phys); - kfree(inp); - kfree(bmp); - spin_lock_irq(phba->host->host_lock); - lpfc_sli_release_iocbq(phba, cmdiocb); - spin_unlock_irq(phba->host->host_lock); + lpfc_ct_free_iocb(phba, cmdiocb); return; } + int -lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode) +lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode) { + struct lpfc_hba *phba = vport->phba; struct lpfc_dmabuf *mp, *bmp; struct lpfc_sli_ct_request *CtReq; struct ulp_bde64 *bpl; @@ -805,12 +1185,10 @@ INIT_LIST_HEAD(&bmp->list); /* FDMI request */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0218 FDMI Request Data: x%x x%x x%x\n", - phba->brd_no, - phba->fc_flag, phba->hba_state, cmdcode); + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0218 FDMI Request Data: x%x x%x x%x\n", + phba->brd_no, vport->vpi, vport->fc_flag, + vport->port_state, cmdcode); CtReq = (struct lpfc_sli_ct_request *) mp->virt; @@ -833,11 +1211,11 @@ be16_to_cpu(SLI_MGMT_RHBA); CtReq->CommandResponse.bits.Size = 0; rh = (REG_HBA *) & CtReq->un.PortID; - memcpy(&rh->hi.PortName, &phba->fc_sparam.portName, + memcpy(&rh->hi.PortName, &vport->fc_sparam.portName, sizeof (struct lpfc_name)); /* One entry (port) per adapter */ rh->rpl.EntryCnt = be32_to_cpu(1); - memcpy(&rh->rpl.pe, &phba->fc_sparam.portName, + memcpy(&rh->rpl.pe, &vport->fc_sparam.portName, sizeof (struct lpfc_name)); /* point to the HBA attribute block */ @@ -853,7 +1231,7 @@ ae->ad.bits.AttrType = be16_to_cpu(NODE_NAME); ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + sizeof (struct lpfc_name)); - memcpy(&ae->un.NodeName, &phba->fc_sparam.nodeName, + memcpy(&ae->un.NodeName, &vport->fc_sparam.nodeName, sizeof (struct lpfc_name)); ab->EntryCnt++; size += FOURBYTES + sizeof (struct lpfc_name); @@ -991,7 +1369,7 @@ pab = (REG_PORT_ATTRIBUTE *) & CtReq->un.PortID; size = sizeof (struct lpfc_name) + FOURBYTES; memcpy((uint8_t *) & pab->PortName, - (uint8_t *) & phba->fc_sparam.portName, + (uint8_t *) & vport->fc_sparam.portName, sizeof (struct lpfc_name)); pab->ab.EntryCnt = 0; @@ -1053,7 +1431,7 @@ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size); ae->ad.bits.AttrType = be16_to_cpu(MAX_FRAME_SIZE); ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4); - hsp = (struct serv_parm *) & phba->fc_sparam; + hsp = (struct serv_parm *) & vport->fc_sparam; ae->un.MaxFrameSize = (((uint32_t) hsp->cmn. bbRcvSizeMsb) << 8) | (uint32_t) hsp->cmn. @@ -1097,7 +1475,7 @@ CtReq->CommandResponse.bits.Size = 0; pe = (PORT_ENTRY *) & CtReq->un.PortID; memcpy((uint8_t *) & pe->PortName, - (uint8_t *) & phba->fc_sparam.portName, + (uint8_t *) & vport->fc_sparam.portName, sizeof (struct lpfc_name)); size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name); break; @@ -1107,22 +1485,22 @@ CtReq->CommandResponse.bits.Size = 0; pe = (PORT_ENTRY *) & CtReq->un.PortID; memcpy((uint8_t *) & pe->PortName, - (uint8_t *) & phba->fc_sparam.portName, + (uint8_t *) & vport->fc_sparam.portName, sizeof (struct lpfc_name)); size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name); break; } bpl = (struct ulp_bde64 *) bmp->virt; - bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) ); - bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) ); + bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) ); + bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) ); bpl->tus.f.bdeFlags = 0; bpl->tus.f.bdeSize = size; bpl->tus.w = le32_to_cpu(bpl->tus.w); cmpl = lpfc_cmpl_ct_cmd_fdmi; - if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP)) + if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP, 0)) return 0; lpfc_mbuf_free(phba, bmp->virt, bmp->phys); @@ -1134,49 +1512,50 @@ kfree(mp); fdmi_cmd_exit: /* Issue FDMI request failed */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0244 Issue FDMI request failed Data: x%x\n", - phba->brd_no, - cmdcode); + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0244 Issue FDMI request failed Data: x%x\n", + phba->brd_no, vport->vpi, cmdcode); return 1; } void lpfc_fdmi_tmo(unsigned long ptr) { - struct lpfc_hba *phba = (struct lpfc_hba *)ptr; + struct lpfc_vport *vport = (struct lpfc_vport *)ptr; + struct lpfc_hba *phba = vport->phba; unsigned long iflag; - spin_lock_irqsave(phba->host->host_lock, iflag); - if (!(phba->work_hba_events & WORKER_FDMI_TMO)) { - phba->work_hba_events |= WORKER_FDMI_TMO; + spin_lock_irqsave(&vport->work_port_lock, iflag); + if (!(vport->work_port_events & WORKER_FDMI_TMO)) { + vport->work_port_events |= WORKER_FDMI_TMO; + spin_unlock_irqrestore(&vport->work_port_lock, iflag); + + spin_lock_irqsave(&phba->hbalock, iflag); if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, iflag); } - spin_unlock_irqrestore(phba->host->host_lock,iflag); + else + spin_unlock_irqrestore(&vport->work_port_lock, iflag); } void -lpfc_fdmi_tmo_handler(struct lpfc_hba *phba) +lpfc_fdmi_timeout_handler(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp; - ndlp = lpfc_findnode_did(phba, FDMI_DID); + ndlp = lpfc_findnode_did(vport, FDMI_DID); if (ndlp) { - if (init_utsname()->nodename[0] != '\0') { - lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA); - } else { - mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60); - } + if (init_utsname()->nodename[0] != '\0') + lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA); + else + mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60); } return; } - void -lpfc_decode_firmware_rev(struct lpfc_hba * phba, char *fwrevision, int flag) +lpfc_decode_firmware_rev(struct lpfc_hba *phba, char *fwrevision, int flag) { struct lpfc_sli *psli = &phba->sli; lpfc_vpd_t *vp = &phba->vpd; diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,508 @@ +/******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * + * Copyright (C) 2007 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + *******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "lpfc_hw.h" +#include "lpfc_sli.h" +#include "lpfc_disc.h" +#include "lpfc_scsi.h" +#include "lpfc.h" +#include "lpfc_logmsg.h" +#include "lpfc_crtn.h" +#include "lpfc_vport.h" +#include "lpfc_version.h" +#include "lpfc_vport.h" +#include "lpfc_debugfs.h" + +#ifdef CONFIG_LPFC_DEBUG_FS +/* debugfs interface + * + * To access this interface the user should: + * # mkdir /debug + * # mount -t debugfs none /debug + * + * The lpfc debugfs directory hierachy is: + * lpfc/lpfcX/vportY + * where X is the lpfc hba unique_id + * where Y is the vport VPI on that hba + * + * Debugging services available per vport: + * discovery_trace + * This is an ACSII readable file that contains a trace of the last + * lpfc_debugfs_max_disc_trc events that happened on a specific vport. + * See lpfc_debugfs.h for different categories of + * discovery events. To enable the discovery trace, the following + * module parameters must be set: + * lpfc_debugfs_enable=1 Turns on lpfc debugfs filesystem support + * lpfc_debugfs_max_disc_trc=X Where X is the event trace depth for + * EACH vport. X MUST also be a power of 2. + * lpfc_debugfs_mask_disc_trc=Y Where Y is an event mask as defined in + * lpfc_debugfs.h . + */ +static int lpfc_debugfs_enable = 0; +module_param(lpfc_debugfs_enable, int, 0); +MODULE_PARM_DESC(lpfc_debugfs_enable, "Enable debugfs services"); + +static int lpfc_debugfs_max_disc_trc = 0; /* This MUST be a power of 2 */ +module_param(lpfc_debugfs_max_disc_trc, int, 0); +MODULE_PARM_DESC(lpfc_debugfs_max_disc_trc, + "Set debugfs discovery trace depth"); + +static int lpfc_debugfs_mask_disc_trc = 0; +module_param(lpfc_debugfs_mask_disc_trc, int, 0); +MODULE_PARM_DESC(lpfc_debugfs_mask_disc_trc, + "Set debugfs discovery trace mask"); + +#include + +/* size of discovery_trace output line */ +#define LPFC_DISC_TRC_ENTRY_SIZE 80 + +/* nodelist output buffer size */ +#define LPFC_NODELIST_SIZE 8192 +#define LPFC_NODELIST_ENTRY_SIZE 120 + +struct lpfc_debug { + char *buffer; + int len; +}; + +atomic_t lpfc_debugfs_disc_trc_cnt = ATOMIC_INIT(0); +unsigned long lpfc_debugfs_start_time = 0L; + +static int +lpfc_debugfs_disc_trc_data(struct lpfc_vport *vport, char *buf, int size) +{ + int i, index, len, enable; + uint32_t ms; + struct lpfc_disc_trc *dtp; + char buffer[80]; + + + enable = lpfc_debugfs_enable; + lpfc_debugfs_enable = 0; + + len = 0; + index = (atomic_read(&vport->disc_trc_cnt) + 1) & + (lpfc_debugfs_max_disc_trc - 1); + for (i = index; i < lpfc_debugfs_max_disc_trc; i++) { + dtp = vport->disc_trc + i; + if (!dtp->fmt) + continue; + ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time); + snprintf(buffer, 80, "%010d:%010d ms:%s\n", + dtp->seq_cnt, ms, dtp->fmt); + len += snprintf(buf+len, size-len, buffer, + dtp->data1, dtp->data2, dtp->data3); + } + for (i = 0; i < index; i++) { + dtp = vport->disc_trc + i; + if (!dtp->fmt) + continue; + ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time); + snprintf(buffer, 80, "%010d:%010d ms:%s\n", + dtp->seq_cnt, ms, dtp->fmt); + len += snprintf(buf+len, size-len, buffer, + dtp->data1, dtp->data2, dtp->data3); + } + + lpfc_debugfs_enable = enable; + return len; +} + +static int +lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) +{ + int len = 0; + int cnt; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + unsigned char *statep, *name; + + cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE); + + spin_lock_irq(shost->host_lock); + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (!cnt) { + len += snprintf(buf+len, size-len, + "Missing Nodelist Entries\n"); + break; + } + cnt--; + switch (ndlp->nlp_state) { + case NLP_STE_UNUSED_NODE: + statep = "UNUSED"; + break; + case NLP_STE_PLOGI_ISSUE: + statep = "PLOGI "; + break; + case NLP_STE_ADISC_ISSUE: + statep = "ADISC "; + break; + case NLP_STE_REG_LOGIN_ISSUE: + statep = "REGLOG"; + break; + case NLP_STE_PRLI_ISSUE: + statep = "PRLI "; + break; + case NLP_STE_UNMAPPED_NODE: + statep = "UNMAP "; + break; + case NLP_STE_MAPPED_NODE: + statep = "MAPPED"; + break; + case NLP_STE_NPR_NODE: + statep = "NPR "; + break; + default: + statep = "UNKNOWN"; + } + len += snprintf(buf+len, size-len, "%s DID:x%06x ", + statep, ndlp->nlp_DID); + name = (unsigned char *)&ndlp->nlp_portname; + len += snprintf(buf+len, size-len, + "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ", + *name, *(name+1), *(name+2), *(name+3), + *(name+4), *(name+5), *(name+6), *(name+7)); + name = (unsigned char *)&ndlp->nlp_nodename; + len += snprintf(buf+len, size-len, + "WWNN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ", + *name, *(name+1), *(name+2), *(name+3), + *(name+4), *(name+5), *(name+6), *(name+7)); + len += snprintf(buf+len, size-len, "RPI:%03d flag:x%08x ", + ndlp->nlp_rpi, ndlp->nlp_flag); + if (!ndlp->nlp_type) + len += snprintf(buf+len, size-len, "UNKNOWN_TYPE"); + if (ndlp->nlp_type & NLP_FC_NODE) + len += snprintf(buf+len, size-len, "FC_NODE "); + if (ndlp->nlp_type & NLP_FABRIC) + len += snprintf(buf+len, size-len, "FABRIC "); + if (ndlp->nlp_type & NLP_FCP_TARGET) + len += snprintf(buf+len, size-len, "FCP_TGT sid:%d ", + ndlp->nlp_sid); + if (ndlp->nlp_type & NLP_FCP_INITIATOR) + len += snprintf(buf+len, size-len, "FCP_INITIATOR"); + len += snprintf(buf+len, size-len, "\n"); + } + spin_unlock_irq(shost->host_lock); + return len; +} +#endif + + +inline void +lpfc_debugfs_disc_trc(struct lpfc_vport *vport, int mask, char *fmt, + uint32_t data1, uint32_t data2, uint32_t data3) +{ +#ifdef CONFIG_LPFC_DEBUG_FS + struct lpfc_disc_trc *dtp; + int index; + + if (!(lpfc_debugfs_mask_disc_trc & mask)) + return; + + if (!lpfc_debugfs_enable || !lpfc_debugfs_max_disc_trc || + !vport || !vport->disc_trc) + return; + + index = atomic_inc_return(&vport->disc_trc_cnt) & + (lpfc_debugfs_max_disc_trc - 1); + dtp = vport->disc_trc + index; + dtp->fmt = fmt; + dtp->data1 = data1; + dtp->data2 = data2; + dtp->data3 = data3; + dtp->seq_cnt = atomic_inc_return(&lpfc_debugfs_disc_trc_cnt); + dtp->jif = jiffies; +#endif + return; +} + +#ifdef CONFIG_LPFC_DEBUG_FS +static int +lpfc_debugfs_disc_trc_open(struct inode *inode, struct file *file) +{ + struct lpfc_vport *vport = inode->i_private; + struct lpfc_debug *debug; + int size; + int rc = -ENOMEM; + + if (!lpfc_debugfs_max_disc_trc) { + rc = -ENOSPC; + goto out; + } + + debug = kmalloc(sizeof(*debug), GFP_KERNEL); + if (!debug) + goto out; + + /* Round to page boundry */ + size = (lpfc_debugfs_max_disc_trc * LPFC_DISC_TRC_ENTRY_SIZE); + size = PAGE_ALIGN(size); + + debug->buffer = kmalloc(size, GFP_KERNEL); + if (!debug->buffer) { + kfree(debug); + goto out; + } + + debug->len = lpfc_debugfs_disc_trc_data(vport, debug->buffer, size); + file->private_data = debug; + + rc = 0; +out: + return rc; +} + +static int +lpfc_debugfs_nodelist_open(struct inode *inode, struct file *file) +{ + struct lpfc_vport *vport = inode->i_private; + struct lpfc_debug *debug; + int rc = -ENOMEM; + + debug = kmalloc(sizeof(*debug), GFP_KERNEL); + if (!debug) + goto out; + + /* Round to page boundry */ + debug->buffer = kmalloc(LPFC_NODELIST_SIZE, GFP_KERNEL); + if (!debug->buffer) { + kfree(debug); + goto out; + } + + debug->len = lpfc_debugfs_nodelist_data(vport, debug->buffer, + LPFC_NODELIST_SIZE); + file->private_data = debug; + + rc = 0; +out: + return rc; +} + +static loff_t +lpfc_debugfs_lseek(struct file *file, loff_t off, int whence) +{ + struct lpfc_debug *debug; + loff_t pos = -1; + + debug = file->private_data; + + switch (whence) { + case 0: + pos = off; + break; + case 1: + pos = file->f_pos + off; + break; + case 2: + pos = debug->len - off; + } + return (pos < 0 || pos > debug->len) ? -EINVAL : (file->f_pos = pos); +} + +static ssize_t +lpfc_debugfs_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct lpfc_debug *debug = file->private_data; + return simple_read_from_buffer(buf, nbytes, ppos, debug->buffer, + debug->len); +} + +static int +lpfc_debugfs_release(struct inode *inode, struct file *file) +{ + struct lpfc_debug *debug = file->private_data; + + kfree(debug->buffer); + kfree(debug); + + return 0; +} + +#undef lpfc_debugfs_op_disc_trc +static struct file_operations lpfc_debugfs_op_disc_trc = { + .owner = THIS_MODULE, + .open = lpfc_debugfs_disc_trc_open, + .llseek = lpfc_debugfs_lseek, + .read = lpfc_debugfs_read, + .release = lpfc_debugfs_release, +}; + +#undef lpfc_debugfs_op_nodelist +static struct file_operations lpfc_debugfs_op_nodelist = { + .owner = THIS_MODULE, + .open = lpfc_debugfs_nodelist_open, + .llseek = lpfc_debugfs_lseek, + .read = lpfc_debugfs_read, + .release = lpfc_debugfs_release, +}; + +static struct dentry *lpfc_debugfs_root = NULL; +static atomic_t lpfc_debugfs_hba_count; +#endif + +inline void +lpfc_debugfs_initialize(struct lpfc_vport *vport) +{ +#ifdef CONFIG_LPFC_DEBUG_FS + struct lpfc_hba *phba = vport->phba; + char name[64]; + uint32_t num, i; + + if (!lpfc_debugfs_enable) + return; + + if (lpfc_debugfs_max_disc_trc) { + num = lpfc_debugfs_max_disc_trc - 1; + if (num & lpfc_debugfs_max_disc_trc) { + /* Change to be a power of 2 */ + num = lpfc_debugfs_max_disc_trc; + i = 0; + while (num > 1) { + num = num >> 1; + i++; + } + lpfc_debugfs_max_disc_trc = (1 << i); + printk(KERN_ERR + "lpfc_debugfs_max_disc_trc changed to %d\n", + lpfc_debugfs_max_disc_trc); + } + } + + if (!lpfc_debugfs_root) { + lpfc_debugfs_root = debugfs_create_dir("lpfc", NULL); + atomic_set(&lpfc_debugfs_hba_count, 0); + if (!lpfc_debugfs_root) + goto debug_failed; + } + + snprintf(name, sizeof(name), "lpfc%d", phba->brd_no); + if (!phba->hba_debugfs_root) { + phba->hba_debugfs_root = + debugfs_create_dir(name, lpfc_debugfs_root); + if (!phba->hba_debugfs_root) + goto debug_failed; + atomic_inc(&lpfc_debugfs_hba_count); + atomic_set(&phba->debugfs_vport_count, 0); + } + + snprintf(name, sizeof(name), "vport%d", vport->vpi); + if (!vport->vport_debugfs_root) { + vport->vport_debugfs_root = + debugfs_create_dir(name, phba->hba_debugfs_root); + if (!vport->vport_debugfs_root) + goto debug_failed; + atomic_inc(&phba->debugfs_vport_count); + } + + if (!lpfc_debugfs_start_time) + lpfc_debugfs_start_time = jiffies; + + vport->disc_trc = kmalloc( + (sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc), + GFP_KERNEL); + + if (!vport->disc_trc) + goto debug_failed; + memset(vport->disc_trc, 0, + (sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc)); + + snprintf(name, sizeof(name), "discovery_trace"); + vport->debug_disc_trc = + debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR, + vport->vport_debugfs_root, + vport, &lpfc_debugfs_op_disc_trc); + if (!vport->debug_disc_trc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0409 Cannot create debugfs", + phba->brd_no); + goto debug_failed; + } + snprintf(name, sizeof(name), "nodelist"); + vport->debug_nodelist = + debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR, + vport->vport_debugfs_root, + vport, &lpfc_debugfs_op_nodelist); + if (!vport->debug_nodelist) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0409 Cannot create debugfs", + phba->brd_no); + goto debug_failed; + } +debug_failed: + return; +#endif +} + + +inline void +lpfc_debugfs_terminate(struct lpfc_vport *vport) +{ +#ifdef CONFIG_LPFC_DEBUG_FS + struct lpfc_hba *phba = vport->phba; + + if (vport->disc_trc) { + kfree(vport->disc_trc); + vport->disc_trc = NULL; + } + if (vport->debug_disc_trc) { + debugfs_remove(vport->debug_disc_trc); /* discovery_trace */ + vport->debug_disc_trc = NULL; + } + if (vport->debug_nodelist) { + debugfs_remove(vport->debug_nodelist); /* nodelist */ + vport->debug_nodelist = NULL; + } + if (vport->vport_debugfs_root) { + debugfs_remove(vport->vport_debugfs_root); /* vportX */ + vport->vport_debugfs_root = NULL; + atomic_dec(&phba->debugfs_vport_count); + } + if (atomic_read(&phba->debugfs_vport_count) == 0) { + debugfs_remove(vport->phba->hba_debugfs_root); /* lpfcX */ + vport->phba->hba_debugfs_root = NULL; + atomic_dec(&lpfc_debugfs_hba_count); + if (atomic_read(&lpfc_debugfs_hba_count) == 0) { + debugfs_remove(lpfc_debugfs_root); /* lpfc */ + lpfc_debugfs_root = NULL; + } + } +#endif +} + + diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,50 @@ +/******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * + * Copyright (C) 2007 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + *******************************************************************/ + +#ifndef _H_LPFC_DEBUG_FS +#define _H_LPFC_DEBUG_FS + +#ifdef CONFIG_LPFC_DEBUG_FS +struct lpfc_disc_trc { + char *fmt; + uint32_t data1; + uint32_t data2; + uint32_t data3; + uint32_t seq_cnt; + unsigned long jif; +}; +#endif + +/* Mask for discovery_trace */ +#define LPFC_DISC_TRC_ELS_CMD 0x1 /* Trace ELS commands */ +#define LPFC_DISC_TRC_ELS_RSP 0x2 /* Trace ELS response */ +#define LPFC_DISC_TRC_ELS_UNSOL 0x4 /* Trace ELS rcv'ed */ +#define LPFC_DISC_TRC_ELS_ALL 0x7 /* Trace ELS */ +#define LPFC_DISC_TRC_MBOX_VPORT 0x8 /* Trace vport MBOXs */ +#define LPFC_DISC_TRC_MBOX 0x10 /* Trace other MBOXs */ +#define LPFC_DISC_TRC_MBOX_ALL 0x18 /* Trace all MBOXs */ +#define LPFC_DISC_TRC_CT 0x20 /* Trace disc CT requests */ +#define LPFC_DISC_TRC_DSM 0x40 /* Trace DSM events */ +#define LPFC_DISC_TRC_RPORT 0x80 /* Trace rport events */ +#define LPFC_DISC_TRC_NODE 0x100 /* Trace ndlp state changes */ + +#define LPFC_DISC_TRC_DISCOVERY 0xef /* common mask for general + * discovery */ +#endif /* H_LPFC_DEBUG_FS */ diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_disc.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_disc.h 2007-12-21 15:36:12.000000000 -0500 @@ -36,21 +36,23 @@ LPFC_EVT_WARM_START, LPFC_EVT_KILL, LPFC_EVT_ELS_RETRY, + LPFC_EVT_DEV_LOSS_DELAY, + LPFC_EVT_DEV_LOSS, }; /* structure used to queue event to the discovery tasklet */ struct lpfc_work_evt { struct list_head evt_listp; - void * evt_arg1; - void * evt_arg2; + void *evt_arg1; + void *evt_arg2; enum lpfc_work_type evt; }; struct lpfc_nodelist { struct list_head nlp_listp; - struct lpfc_name nlp_portname; /* port name */ - struct lpfc_name nlp_nodename; /* node name */ + struct lpfc_name nlp_portname; + struct lpfc_name nlp_nodename; uint32_t nlp_flag; /* entry flags */ uint32_t nlp_DID; /* FC D_ID of entry */ uint32_t nlp_last_elscmd; /* Last ELS cmd sent */ @@ -75,8 +77,9 @@ struct timer_list nlp_delayfunc; /* Used for delayed ELS cmds */ struct fc_rport *rport; /* Corresponding FC transport port structure */ - struct lpfc_hba *nlp_phba; + struct lpfc_vport *vport; struct lpfc_work_evt els_retry_evt; + struct lpfc_work_evt dev_loss_evt; unsigned long last_ramp_up_time; /* jiffy of last ramp up */ unsigned long last_q_full_time; /* jiffy of last queue full */ struct kref kref; @@ -98,7 +101,9 @@ ACC */ #define NLP_NPR_ADISC 0x2000000 /* Issue ADISC when dq'ed from NPR list */ +#define NLP_RM_DFLT_RPI 0x4000000 /* need to remove leftover dflt RPI */ #define NLP_NODEV_REMOVE 0x8000000 /* Defer removal till discovery ends */ +#define NLP_TARGET_REMOVE 0x10000000 /* Target remove in process */ /* There are 4 different double linked lists nodelist entries can reside on. * The Port Login (PLOGI) list and Address Discovery (ADISC) list are used diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_els.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_els.c 2007-12-21 15:36:12.000000000 -0500 @@ -35,38 +35,38 @@ #include "lpfc.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" +#include "lpfc_vport.h" +#include "lpfc_debugfs.h" static int lpfc_els_retry(struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *); +static void lpfc_cmpl_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *); + static int lpfc_max_els_tries = 3; -static int -lpfc_els_chk_latt(struct lpfc_hba * phba) +int +lpfc_els_chk_latt(struct lpfc_vport *vport) { - struct lpfc_sli *psli; - LPFC_MBOXQ_t *mbox; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; uint32_t ha_copy; - int rc; - - psli = &phba->sli; - if ((phba->hba_state >= LPFC_HBA_READY) || - (phba->hba_state == LPFC_LINK_DOWN)) + if (vport->port_state >= LPFC_VPORT_READY || + phba->link_state == LPFC_LINK_DOWN) return 0; /* Read the HBA Host Attention Register */ - spin_lock_irq(phba->host->host_lock); ha_copy = readl(phba->HAregaddr); - spin_unlock_irq(phba->host->host_lock); if (!(ha_copy & HA_LATT)) return 0; /* Pending Link Event during Discovery */ - lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY, - "%d:0237 Pending Link Event during " + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0237 Pending Link Event during " "Discovery: State x%x\n", - phba->brd_no, phba->hba_state); + phba->brd_no, vport->vpi, phba->pport->port_state); /* CLEAR_LA should re-enable link attention events and * we should then imediately take a LATT event. The @@ -74,48 +74,34 @@ * will cleanup any left over in-progress discovery * events. */ - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_ABORT_DISCOVERY; - spin_unlock_irq(phba->host->host_lock); - - if (phba->hba_state != LPFC_CLEAR_LA) { - if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { - phba->hba_state = LPFC_CLEAR_LA; - lpfc_clear_la(phba, mbox); - mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; - rc = lpfc_sli_issue_mbox (phba, mbox, - (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) { - mempool_free(mbox, phba->mbox_mem_pool); - phba->hba_state = LPFC_HBA_ERROR; - } - } - } + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_ABORT_DISCOVERY; + spin_unlock_irq(shost->host_lock); - return 1; + if (phba->link_state != LPFC_CLEAR_LA) + lpfc_issue_clear_la(phba, vport); + return 1; } static struct lpfc_iocbq * -lpfc_prep_els_iocb(struct lpfc_hba * phba, uint8_t expectRsp, - uint16_t cmdSize, uint8_t retry, struct lpfc_nodelist * ndlp, - uint32_t did, uint32_t elscmd) +lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp, + uint16_t cmdSize, uint8_t retry, + struct lpfc_nodelist *ndlp, uint32_t did, + uint32_t elscmd) { - struct lpfc_sli_ring *pring; + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *elsiocb; struct lpfc_dmabuf *pcmd, *prsp, *pbuflist; struct ulp_bde64 *bpl; IOCB_t *icmd; - pring = &phba->sli.ring[LPFC_ELS_RING]; - if (phba->hba_state < LPFC_LINK_UP) + if (!lpfc_is_link_up(phba)) return NULL; /* Allocate buffer for command iocb */ - spin_lock_irq(phba->host->host_lock); elsiocb = lpfc_sli_get_iocbq(phba); - spin_unlock_irq(phba->host->host_lock); if (elsiocb == NULL) return NULL; @@ -123,14 +109,12 @@ /* fill in BDEs for command */ /* Allocate buffer for command payload */ - if (((pcmd = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL)) == 0) || + if (((pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL)) == 0) || ((pcmd->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(pcmd->phys))) == 0)) { kfree(pcmd); - spin_lock_irq(phba->host->host_lock); lpfc_sli_release_iocbq(phba, elsiocb); - spin_unlock_irq(phba->host->host_lock); return NULL; } @@ -138,7 +122,7 @@ /* Allocate buffer for response payload */ if (expectRsp) { - prsp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); + prsp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); if (prsp) prsp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &prsp->phys); @@ -146,9 +130,7 @@ kfree(prsp); lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys); kfree(pcmd); - spin_lock_irq(phba->host->host_lock); lpfc_sli_release_iocbq(phba, elsiocb); - spin_unlock_irq(phba->host->host_lock); return NULL; } INIT_LIST_HEAD(&prsp->list); @@ -157,14 +139,12 @@ } /* Allocate buffer for Buffer ptr list */ - pbuflist = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); + pbuflist = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); if (pbuflist) pbuflist->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &pbuflist->phys); if (pbuflist == 0 || pbuflist->virt == 0) { - spin_lock_irq(phba->host->host_lock); lpfc_sli_release_iocbq(phba, elsiocb); - spin_unlock_irq(phba->host->host_lock); lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys); lpfc_mbuf_free(phba, prsp->virt, prsp->phys); kfree(pcmd); @@ -178,20 +158,28 @@ icmd->un.elsreq64.bdl.addrHigh = putPaddrHigh(pbuflist->phys); icmd->un.elsreq64.bdl.addrLow = putPaddrLow(pbuflist->phys); icmd->un.elsreq64.bdl.bdeFlags = BUFF_TYPE_BDL; - if (expectRsp) { - icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof (struct ulp_bde64)); icmd->un.elsreq64.remoteID = did; /* DID */ + if (expectRsp) { + icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof(struct ulp_bde64)); icmd->ulpCommand = CMD_ELS_REQUEST64_CR; icmd->ulpTimeout = phba->fc_ratov * 2; } else { - icmd->un.elsreq64.bdl.bdeSize = sizeof (struct ulp_bde64); + icmd->un.elsreq64.bdl.bdeSize = sizeof(struct ulp_bde64); icmd->ulpCommand = CMD_XMIT_ELS_RSP64_CX; } - icmd->ulpBdeCount = 1; icmd->ulpLe = 1; icmd->ulpClass = CLASS3; + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { + icmd->un.elsreq64.myID = vport->fc_myDID; + + /* For ELS_REQUEST64_CR, use the VPI by default */ + icmd->ulpContext = vport->vpi; + icmd->ulpCt_h = 0; + icmd->ulpCt_l = 1; + } + bpl = (struct ulp_bde64 *) pbuflist->virt; bpl->addrLow = le32_to_cpu(putPaddrLow(pcmd->phys)); bpl->addrHigh = le32_to_cpu(putPaddrHigh(pcmd->phys)); @@ -209,10 +197,12 @@ } /* Save for completion so we can release these resources */ + if (elscmd != ELS_CMD_LS_RJT) elsiocb->context1 = lpfc_nlp_get(ndlp); elsiocb->context2 = pcmd; elsiocb->context3 = pbuflist; elsiocb->retry = retry; + elsiocb->vport = vport; elsiocb->drvrTimeout = (phba->fc_ratov << 1) + LPFC_DRVR_TIMEOUT; if (prsp) { @@ -222,16 +212,16 @@ if (expectRsp) { /* Xmit ELS command to remote NPORT */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0116 Xmit ELS command x%x to remote " - "NPORT x%x I/O tag: x%x, HBA state: x%x\n", - phba->brd_no, elscmd, - did, elsiocb->iotag, phba->hba_state); + "%d (%d):0116 Xmit ELS command x%x to remote " + "NPORT x%x I/O tag: x%x, port state: x%x\n", + phba->brd_no, vport->vpi, elscmd, did, + elsiocb->iotag, vport->port_state); } else { /* Xmit ELS response to remote NPORT */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0117 Xmit ELS response x%x to remote " + "%d (%d):0117 Xmit ELS response x%x to remote " "NPORT x%x I/O tag: x%x, size: x%x\n", - phba->brd_no, elscmd, + phba->brd_no, vport->vpi, elscmd, ndlp->nlp_DID, elsiocb->iotag, cmdSize); } @@ -240,16 +230,79 @@ static int -lpfc_cmpl_els_flogi_fabric(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, - struct serv_parm *sp, IOCB_t *irsp) +lpfc_issue_fabric_reglogin(struct lpfc_vport *vport) { + struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *mbox; struct lpfc_dmabuf *mp; + struct lpfc_nodelist *ndlp; + struct serv_parm *sp; int rc; - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_FABRIC; - spin_unlock_irq(phba->host->host_lock); + sp = &phba->fc_fabparam; + ndlp = lpfc_findnode_did(vport, Fabric_DID); + if (!ndlp) + goto fail; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) + goto fail; + + vport->port_state = LPFC_FABRIC_CFG_LINK; + lpfc_config_link(phba, mbox); + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mbox->vport = vport; + + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); + if (rc == MBX_NOT_FINISHED) + goto fail_free_mbox; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) + goto fail; + rc = lpfc_reg_login(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox, + 0); + if (rc) + goto fail_free_mbox; + + mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login; + mbox->vport = vport; + mbox->context2 = lpfc_nlp_get(ndlp); + + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); + if (rc == MBX_NOT_FINISHED) + goto fail_issue_reg_login; + + return 0; + +fail_issue_reg_login: + lpfc_nlp_put(ndlp); + mp = (struct lpfc_dmabuf *) mbox->context1; + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); +fail_free_mbox: + mempool_free(mbox, phba->mbox_mem_pool); + +fail: + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0249 Cannot issue Register Fabric login\n", + phba->brd_no, vport->vpi); + return -ENXIO; +} + +static int +lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct serv_parm *sp, IOCB_t *irsp) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *np; + struct lpfc_nodelist *next_np; + + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_FABRIC; + spin_unlock_irq(shost->host_lock); phba->fc_edtov = be32_to_cpu(sp->cmn.e_d_tov); if (sp->cmn.edtovResolution) /* E_D_TOV ticks are in nanoseconds */ @@ -258,20 +311,20 @@ phba->fc_ratov = (be32_to_cpu(sp->cmn.w2.r_a_tov) + 999) / 1000; if (phba->fc_topology == TOPOLOGY_LOOP) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_PUBLIC_LOOP; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_PUBLIC_LOOP; + spin_unlock_irq(shost->host_lock); } else { /* * If we are a N-port connected to a Fabric, fixup sparam's so * logins to devices on remote loops work. */ - phba->fc_sparam.cmn.altBbCredit = 1; + vport->fc_sparam.cmn.altBbCredit = 1; } - phba->fc_myDID = irsp->un.ulpWord[4] & Mask_DID; + vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID; memcpy(&ndlp->nlp_portname, &sp->portName, sizeof(struct lpfc_name)); - memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name)); + memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof(struct lpfc_name)); ndlp->nlp_class_sup = 0; if (sp->cls1.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS1; @@ -285,68 +338,85 @@ sp->cmn.bbRcvSizeLsb; memcpy(&phba->fc_fabparam, sp, sizeof(struct serv_parm)); - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mbox) - goto fail; - - phba->hba_state = LPFC_FABRIC_CFG_LINK; - lpfc_config_link(phba, mbox); - mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - - rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); - if (rc == MBX_NOT_FINISHED) - goto fail_free_mbox; + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { + if (sp->cmn.response_multiple_NPort) { + lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT, + "%d:1816 FLOGI NPIV supported, " + "response data 0x%x\n", + phba->brd_no, + sp->cmn.response_multiple_NPort); + phba->link_flag |= LS_NPIV_FAB_SUPPORTED; - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mbox) - goto fail; + } else { + /* Because we asked f/w for NPIV it still expects us + to call reg_vnpid atleast for the physcial host */ + lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT, + "%d:1817 Fabric does not support NPIV " + "- configuring single port mode.\n", + phba->brd_no); + phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED; + } + } - if (lpfc_reg_login(phba, Fabric_DID, (uint8_t *) sp, mbox, 0)) - goto fail_free_mbox; + if ((vport->fc_prevDID != vport->fc_myDID) && + !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) { - mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login; - mbox->context2 = lpfc_nlp_get(ndlp); + /* If our NportID changed, we need to ensure all + * remaining NPORTs get unreg_login'ed. + */ + list_for_each_entry_safe(np, next_np, + &vport->fc_nodes, nlp_listp) { + if ((np->nlp_state != NLP_STE_NPR_NODE) || + !(np->nlp_flag & NLP_NPR_ADISC)) + continue; + spin_lock_irq(shost->host_lock); + np->nlp_flag &= ~NLP_NPR_ADISC; + spin_unlock_irq(shost->host_lock); + lpfc_unreg_rpi(vport, np); + } + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { + lpfc_mbx_unreg_vpi(vport); + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + } + } - rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); - if (rc == MBX_NOT_FINISHED) - goto fail_issue_reg_login; + ndlp->nlp_sid = irsp->un.ulpWord[4] & Mask_DID; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE); + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED && + vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) { + lpfc_register_new_vport(phba, vport, ndlp); + return 0; + } + lpfc_issue_fabric_reglogin(vport); return 0; - - fail_issue_reg_login: - lpfc_nlp_put(ndlp); - mp = (struct lpfc_dmabuf *) mbox->context1; - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - fail_free_mbox: - mempool_free(mbox, phba->mbox_mem_pool); - fail: - return -ENXIO; } /* * We FLOGIed into an NPort, initiate pt2pt protocol */ static int -lpfc_cmpl_els_flogi_nport(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, +lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct serv_parm *sp) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *mbox; int rc; - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + spin_unlock_irq(shost->host_lock); phba->fc_edtov = FF_DEF_EDTOV; phba->fc_ratov = FF_DEF_RATOV; - rc = memcmp(&phba->fc_portname, &sp->portName, - sizeof(struct lpfc_name)); + rc = memcmp(&vport->fc_portname, &sp->portName, + sizeof(vport->fc_portname)); if (rc >= 0) { /* This side will initiate the PLOGI */ - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_PT2PT_PLOGI; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_PT2PT_PLOGI; + spin_unlock_irq(shost->host_lock); /* * N_Port ID cannot be 0, set our to LocalID the other @@ -355,7 +425,7 @@ /* not equal */ if (rc) - phba->fc_myDID = PT2PT_LocalID; + vport->fc_myDID = PT2PT_LocalID; mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!mbox) @@ -364,6 +434,7 @@ lpfc_config_link(phba, mbox); mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mbox->vport = vport; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); if (rc == MBX_NOT_FINISHED) { @@ -372,7 +443,7 @@ } lpfc_nlp_put(ndlp); - ndlp = lpfc_findnode_did(phba, PT2PT_RemoteID); + ndlp = lpfc_findnode_did(vport, PT2PT_RemoteID); if (!ndlp) { /* * Cannot find existing Fabric ndlp, so allocate a @@ -382,28 +453,30 @@ if (!ndlp) goto fail; - lpfc_nlp_init(phba, ndlp, PT2PT_RemoteID); + lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID); } memcpy(&ndlp->nlp_portname, &sp->portName, sizeof(struct lpfc_name)); memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof(struct lpfc_name)); - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; + spin_unlock_irq(shost->host_lock); } else { /* This side will wait for the PLOGI */ lpfc_nlp_put(ndlp); } - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_PT2PT; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_PT2PT; + spin_unlock_irq(shost->host_lock); /* Start discovery - this should just do CLEAR_LA */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); return 0; - fail: +fail: return -ENXIO; } @@ -411,6 +484,8 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb) { + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp = &rspiocb->iocb; struct lpfc_nodelist *ndlp = cmdiocb->context1; struct lpfc_dmabuf *pcmd = cmdiocb->context2, *prsp; @@ -418,21 +493,25 @@ int rc; /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba)) { + if (lpfc_els_chk_latt(vport)) { lpfc_nlp_put(ndlp); goto out; } + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "FLOGI cmpl: status:x%x/x%x state:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + vport->port_state); + if (irsp->ulpStatus) { /* Check for retry */ - if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { - /* ELS command is being retried */ + if (lpfc_els_retry(phba, cmdiocb, rspiocb)) goto out; - } + /* FLOGI failed, so there is no fabric */ - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + spin_unlock_irq(shost->host_lock); /* If private loop, then allow max outstanding els to be * LPFC_MAX_DISC_THREADS (32). Scanning in the case of no @@ -443,11 +522,10 @@ } /* FLOGI failure */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_ELS, - "%d:0100 FLOGI failure Data: x%x x%x x%x\n", - phba->brd_no, + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0100 FLOGI failure Data: x%x x%x " + "x%x\n", + phba->brd_no, vport->vpi, irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout); goto flogifail; @@ -463,21 +541,21 @@ /* FLOGI completes successfully */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0101 FLOGI completes sucessfully " + "%d (%d):0101 FLOGI completes sucessfully " "Data: x%x x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, irsp->un.ulpWord[4], sp->cmn.e_d_tov, sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution); - if (phba->hba_state == LPFC_FLOGI) { + if (vport->port_state == LPFC_FLOGI) { /* * If Common Service Parameters indicate Nport * we are point to point, if Fport we are Fabric. */ if (sp->cmn.fPort) - rc = lpfc_cmpl_els_flogi_fabric(phba, ndlp, sp, irsp); + rc = lpfc_cmpl_els_flogi_fabric(vport, ndlp, sp, irsp); else - rc = lpfc_cmpl_els_flogi_nport(phba, ndlp, sp); + rc = lpfc_cmpl_els_flogi_nport(vport, ndlp, sp); if (!rc) goto out; @@ -486,14 +564,12 @@ flogifail: lpfc_nlp_put(ndlp); - if (irsp->ulpStatus != IOSTAT_LOCAL_REJECT || - (irsp->un.ulpWord[4] != IOERR_SLI_ABORTED && - irsp->un.ulpWord[4] != IOERR_SLI_DOWN)) { + if (!lpfc_error_lost_link(irsp)) { /* FLOGI failed, so just use loop map to make discovery list */ - lpfc_disc_list_loopmap(phba); + lpfc_disc_list_loopmap(vport); /* Start discovery */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); } out: @@ -501,9 +577,10 @@ } static int -lpfc_issue_els_flogi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, +lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint8_t retry) { + struct lpfc_hba *phba = vport->phba; struct serv_parm *sp; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; @@ -515,9 +592,10 @@ pring = &phba->sli.ring[LPFC_ELS_RING]; - cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm)); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, + cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_FLOGI); + if (!elsiocb) return 1; @@ -526,8 +604,8 @@ /* For FLOGI request, remainder of payload is service parameters */ *((uint32_t *) (pcmd)) = ELS_CMD_FLOGI; - pcmd += sizeof (uint32_t); - memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm)); + pcmd += sizeof(uint32_t); + memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm)); sp = (struct serv_parm *) pcmd; /* Setup CSPs accordingly for Fabric */ @@ -541,16 +619,32 @@ if (sp->cmn.fcphHigh < FC_PH3) sp->cmn.fcphHigh = FC_PH3; + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { + sp->cmn.request_multiple_Nport = 1; + + /* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */ + icmd->ulpCt_h = 1; + icmd->ulpCt_l = 0; + } + + if (phba->fc_topology != TOPOLOGY_LOOP) { + icmd->un.elsreq64.myID = 0; + icmd->un.elsreq64.fl = 1; + } + tmo = phba->fc_ratov; phba->fc_ratov = LPFC_DISC_FLOGI_TMO; - lpfc_set_disctmo(phba); + lpfc_set_disctmo(vport); phba->fc_ratov = tmo; phba->fc_stat.elsXmitFLOGI++; elsiocb->iocb_cmpl = lpfc_cmpl_els_flogi; - spin_lock_irq(phba->host->host_lock); - rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); - spin_unlock_irq(phba->host->host_lock); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue FLOGI: opt:x%x", + phba->sli3_options, 0, 0); + + rc = lpfc_issue_fabric_iocb(phba, elsiocb); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -559,7 +653,7 @@ } int -lpfc_els_abort_flogi(struct lpfc_hba * phba) +lpfc_els_abort_flogi(struct lpfc_hba *phba) { struct lpfc_sli_ring *pring; struct lpfc_iocbq *iocb, *next_iocb; @@ -577,73 +671,99 @@ * Check the txcmplq for an iocb that matches the nport the driver is * searching for. */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { icmd = &iocb->iocb; - if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR) { + if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR && + icmd->un.elsreq64.bdl.ulpIoTag32) { ndlp = (struct lpfc_nodelist *)(iocb->context1); - if (ndlp && (ndlp->nlp_DID == Fabric_DID)) + if (ndlp && (ndlp->nlp_DID == Fabric_DID)) { lpfc_sli_issue_abort_iotag(phba, pring, iocb); } } - spin_unlock_irq(phba->host->host_lock); + } + spin_unlock_irq(&phba->hbalock); return 0; } int -lpfc_initial_flogi(struct lpfc_hba *phba) +lpfc_initial_flogi(struct lpfc_vport *vport) { + struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* First look for the Fabric ndlp */ - ndlp = lpfc_findnode_did(phba, Fabric_DID); + ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); if (!ndlp) return 0; - lpfc_nlp_init(phba, ndlp, Fabric_DID); + lpfc_nlp_init(vport, ndlp, Fabric_DID); } else { - lpfc_dequeue_node(phba, ndlp); + lpfc_dequeue_node(vport, ndlp); } - if (lpfc_issue_els_flogi(phba, ndlp, 0)) { + if (lpfc_issue_els_flogi(vport, ndlp, 0)) { lpfc_nlp_put(ndlp); } return 1; } +int +lpfc_initial_fdisc(struct lpfc_vport *vport) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp; + + /* First look for the Fabric ndlp */ + ndlp = lpfc_findnode_did(vport, Fabric_DID); + if (!ndlp) { + /* Cannot find existing Fabric ndlp, so allocate a new one */ + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) + return 0; + lpfc_nlp_init(vport, ndlp, Fabric_DID); + } else { + lpfc_dequeue_node(vport, ndlp); + } + if (lpfc_issue_els_fdisc(vport, ndlp, 0)) { + lpfc_nlp_put(ndlp); + } + return 1; +} static void -lpfc_more_plogi(struct lpfc_hba * phba) +lpfc_more_plogi(struct lpfc_vport *vport) { int sentplogi; + struct lpfc_hba *phba = vport->phba; - if (phba->num_disc_nodes) - phba->num_disc_nodes--; + if (vport->num_disc_nodes) + vport->num_disc_nodes--; /* Continue discovery with PLOGIs to go */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0232 Continue discovery with %d PLOGIs to go " + "%d (%d):0232 Continue discovery with %d PLOGIs to go " "Data: x%x x%x x%x\n", - phba->brd_no, phba->num_disc_nodes, phba->fc_plogi_cnt, - phba->fc_flag, phba->hba_state); + phba->brd_no, vport->vpi, vport->num_disc_nodes, + vport->fc_plogi_cnt, vport->fc_flag, vport->port_state); /* Check to see if there are more PLOGIs to be sent */ - if (phba->fc_flag & FC_NLP_MORE) { - /* go thru NPR list and issue any remaining ELS PLOGIs */ - sentplogi = lpfc_els_disc_plogi(phba); - } + if (vport->fc_flag & FC_NLP_MORE) + /* go thru NPR nodes and issue any remaining ELS PLOGIs */ + sentplogi = lpfc_els_disc_plogi(vport); + return; } static struct lpfc_nodelist * -lpfc_plogi_confirm_nport(struct lpfc_hba *phba, struct lpfc_dmabuf *prsp, +lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, struct lpfc_nodelist *ndlp) { + struct lpfc_vport *vport = ndlp->vport; struct lpfc_nodelist *new_ndlp; - uint32_t *lp; struct serv_parm *sp; - uint8_t name[sizeof (struct lpfc_name)]; + uint8_t name[sizeof(struct lpfc_name)]; uint32_t rc; /* Fabric nodes can have the same WWPN so we don't bother searching @@ -652,50 +772,51 @@ if (ndlp->nlp_type & NLP_FABRIC) return ndlp; - lp = (uint32_t *) prsp->virt; - sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); + sp = (struct serv_parm *) ((uint8_t *) prsp + sizeof(uint32_t)); memset(name, 0, sizeof(struct lpfc_name)); /* Now we find out if the NPort we are logging into, matches the WWPN * we have for that ndlp. If not, we have some work to do. */ - new_ndlp = lpfc_findnode_wwpn(phba, &sp->portName); + new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName); if (new_ndlp == ndlp) return ndlp; if (!new_ndlp) { - rc = - memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)); + rc = memcmp(&ndlp->nlp_portname, name, + sizeof(struct lpfc_name)); if (!rc) return ndlp; new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); if (!new_ndlp) return ndlp; - lpfc_nlp_init(phba, new_ndlp, ndlp->nlp_DID); + lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID); } - lpfc_unreg_rpi(phba, new_ndlp); + lpfc_unreg_rpi(vport, new_ndlp); new_ndlp->nlp_DID = ndlp->nlp_DID; new_ndlp->nlp_prev_state = ndlp->nlp_prev_state; - lpfc_nlp_set_state(phba, new_ndlp, ndlp->nlp_state); + lpfc_nlp_set_state(vport, new_ndlp, ndlp->nlp_state); - /* Move this back to NPR list */ + /* Move this back to NPR state */ if (memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)) == 0) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); else { - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); ndlp->nlp_DID = 0; /* Two ndlps cannot have the same did */ - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); } return new_ndlp; } static void -lpfc_cmpl_els_plogi(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp; struct lpfc_nodelist *ndlp; struct lpfc_dmabuf *prsp; @@ -705,32 +826,43 @@ cmdiocb->context_un.rsp_iocb = rspiocb; irsp = &rspiocb->iocb; - ndlp = lpfc_findnode_did(phba, irsp->un.elsreq64.remoteID); - if (!ndlp) + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "PLOGI cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->un.elsreq64.remoteID); + + ndlp = lpfc_findnode_did(vport, irsp->un.elsreq64.remoteID); + if (!ndlp) { + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0136 PLOGI completes to NPort x%x " + "with no ndlp. Data: x%x x%x x%x\n", + phba->brd_no, vport->vpi, irsp->un.elsreq64.remoteID, + irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpIoTag); goto out; + } /* Since ndlp can be freed in the disc state machine, note if this node * is being used during discovery. */ + spin_lock_irq(shost->host_lock); disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC); - spin_lock_irq(phba->host->host_lock); ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); rc = 0; /* PLOGI completes to NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0102 PLOGI completes to NPort x%x " + "%d (%d):0102 PLOGI completes to NPort x%x " "Data: x%x x%x x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, - irsp->un.ulpWord[4], irsp->ulpTimeout, disc, - phba->num_disc_nodes); + phba->brd_no, vport->vpi, ndlp->nlp_DID, + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->ulpTimeout, disc, vport->num_disc_nodes); /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba)) { - spin_lock_irq(phba->host->host_lock); + if (lpfc_els_chk_latt(vport)) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); goto out; } @@ -743,22 +875,28 @@ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { /* ELS command is being retried */ if (disc) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); } goto out; } /* PLOGI failed */ + if (ndlp->nlp_DID == NameServer_DID) { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0250 Nameserver login error: " + "0x%x / 0x%x\n", + phba->brd_no, vport->vpi, + irsp->ulpStatus, irsp->un.ulpWord[4]); + } + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ - if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && - ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || - (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || - (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) { + if (lpfc_error_lost_link(irsp)) { rc = NLP_STE_FREED_NODE; } else { - rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb, + rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_PLOGI); } } else { @@ -766,33 +904,33 @@ prsp = list_entry(((struct lpfc_dmabuf *) cmdiocb->context2)->list.next, struct lpfc_dmabuf, list); - ndlp = lpfc_plogi_confirm_nport(phba, prsp, ndlp); - rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb, + ndlp = lpfc_plogi_confirm_nport(phba, prsp->virt, ndlp); + rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_PLOGI); } - if (disc && phba->num_disc_nodes) { + if (disc && vport->num_disc_nodes) { /* Check to see if there are more PLOGIs to be sent */ - lpfc_more_plogi(phba); + lpfc_more_plogi(vport); - if (phba->num_disc_nodes == 0) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_NDISC_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + if (vport->num_disc_nodes == 0) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NDISC_ACTIVE; + spin_unlock_irq(shost->host_lock); - lpfc_can_disctmo(phba); - if (phba->fc_flag & FC_RSCN_MODE) { + lpfc_can_disctmo(vport); + if (vport->fc_flag & FC_RSCN_MODE) { /* * Check to see if more RSCNs came in while * we were processing this one. */ - if ((phba->fc_rscn_id_cnt == 0) && - (!(phba->fc_flag & FC_RSCN_DISCOVERY))) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_RSCN_MODE; - spin_unlock_irq(phba->host->host_lock); + if ((vport->fc_rscn_id_cnt == 0) && + (!(vport->fc_flag & FC_RSCN_DISCOVERY))) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_RSCN_MODE; + spin_unlock_irq(shost->host_lock); } else { - lpfc_els_handle_rscn(phba); + lpfc_els_handle_rscn(vport); } } } @@ -804,8 +942,9 @@ } int -lpfc_issue_els_plogi(struct lpfc_hba * phba, uint32_t did, uint8_t retry) +lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry) { + struct lpfc_hba *phba = vport->phba; struct serv_parm *sp; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; @@ -813,12 +952,13 @@ struct lpfc_sli *psli; uint8_t *pcmd; uint16_t cmdsize; + int ret; psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm)); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, NULL, did, + cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, NULL, did, ELS_CMD_PLOGI); if (!elsiocb) return 1; @@ -828,8 +968,8 @@ /* For PLOGI request, remainder of payload is service parameters */ *((uint32_t *) (pcmd)) = ELS_CMD_PLOGI; - pcmd += sizeof (uint32_t); - memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm)); + pcmd += sizeof(uint32_t); + memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm)); sp = (struct serv_parm *) pcmd; if (sp->cmn.fcphLow < FC_PH_4_3) @@ -838,22 +978,27 @@ if (sp->cmn.fcphHigh < FC_PH3) sp->cmn.fcphHigh = FC_PH3; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue PLOGI: did:x%x", + did, 0, 0); + phba->fc_stat.elsXmitPLOGI++; elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi; - spin_lock_irq(phba->host->host_lock); - if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { - spin_unlock_irq(phba->host->host_lock); + ret = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); + + if (ret == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; } - spin_unlock_irq(phba->host->host_lock); return 0; } static void -lpfc_cmpl_els_prli(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp; struct lpfc_sli *psli; struct lpfc_nodelist *ndlp; @@ -864,21 +1009,26 @@ irsp = &(rspiocb->iocb); ndlp = (struct lpfc_nodelist *) cmdiocb->context1; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_PRLI_SND; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "PRLI cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + ndlp->nlp_DID); /* PRLI completes to NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0103 PRLI completes to NPort x%x " + "%d (%d):0103 PRLI completes to NPort x%x " "Data: x%x x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, - irsp->un.ulpWord[4], irsp->ulpTimeout, - phba->num_disc_nodes); + phba->brd_no, vport->vpi, ndlp->nlp_DID, + irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout, + vport->num_disc_nodes); - phba->fc_prli_sent--; + vport->fc_prli_sent--; /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba)) + if (lpfc_els_chk_latt(vport)) goto out; if (irsp->ulpStatus) { @@ -889,18 +1039,16 @@ } /* PRLI failed */ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ - if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && - ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || - (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || - (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) { + if (lpfc_error_lost_link(irsp)) { goto out; } else { - lpfc_disc_state_machine(phba, ndlp, cmdiocb, + lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI); } } else { /* Good status, call state machine */ - lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI); + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_PRLI); } out: @@ -909,9 +1057,11 @@ } int -lpfc_issue_els_prli(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, +lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint8_t retry) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; PRLI *npr; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; @@ -923,8 +1073,8 @@ psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - cmdsize = (sizeof (uint32_t) + sizeof (PRLI)); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, + cmdsize = (sizeof(uint32_t) + sizeof(PRLI)); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_PRLI); if (!elsiocb) return 1; @@ -933,9 +1083,9 @@ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); /* For PRLI request, remainder of payload is service parameters */ - memset(pcmd, 0, (sizeof (PRLI) + sizeof (uint32_t))); + memset(pcmd, 0, (sizeof(PRLI) + sizeof(uint32_t))); *((uint32_t *) (pcmd)) = ELS_CMD_PRLI; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); /* For PRLI, remainder of payload is PRLI parameter page */ npr = (PRLI *) pcmd; @@ -955,81 +1105,88 @@ npr->prliType = PRLI_FCP_TYPE; npr->initiatorFunc = 1; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue PRLI: did:x%x", + ndlp->nlp_DID, 0, 0); + phba->fc_stat.elsXmitPRLI++; elsiocb->iocb_cmpl = lpfc_cmpl_els_prli; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_PRLI_SND; + spin_unlock_irq(shost->host_lock); if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_PRLI_SND; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); lpfc_els_free_iocb(phba, elsiocb); return 1; } - spin_unlock_irq(phba->host->host_lock); - phba->fc_prli_sent++; + vport->fc_prli_sent++; return 0; } static void -lpfc_more_adisc(struct lpfc_hba * phba) +lpfc_more_adisc(struct lpfc_vport *vport) { int sentadisc; + struct lpfc_hba *phba = vport->phba; - if (phba->num_disc_nodes) - phba->num_disc_nodes--; + if (vport->num_disc_nodes) + vport->num_disc_nodes--; /* Continue discovery with ADISCs to go */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0210 Continue discovery with %d ADISCs to go " + "%d (%d):0210 Continue discovery with %d ADISCs to go " "Data: x%x x%x x%x\n", - phba->brd_no, phba->num_disc_nodes, phba->fc_adisc_cnt, - phba->fc_flag, phba->hba_state); + phba->brd_no, vport->vpi, vport->num_disc_nodes, + vport->fc_adisc_cnt, vport->fc_flag, vport->port_state); /* Check to see if there are more ADISCs to be sent */ - if (phba->fc_flag & FC_NLP_MORE) { - lpfc_set_disctmo(phba); - - /* go thru NPR list and issue any remaining ELS ADISCs */ - sentadisc = lpfc_els_disc_adisc(phba); + if (vport->fc_flag & FC_NLP_MORE) { + lpfc_set_disctmo(vport); + /* go thru NPR nodes and issue any remaining ELS ADISCs */ + sentadisc = lpfc_els_disc_adisc(vport); } return; } static void -lpfc_rscn_disc(struct lpfc_hba * phba) +lpfc_rscn_disc(struct lpfc_vport *vport) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + lpfc_can_disctmo(vport); + /* RSCN discovery */ - /* go thru NPR list and issue ELS PLOGIs */ - if (phba->fc_npr_cnt) { - if (lpfc_els_disc_plogi(phba)) + /* go thru NPR nodes and issue ELS PLOGIs */ + if (vport->fc_npr_cnt) + if (lpfc_els_disc_plogi(vport)) return; - } - if (phba->fc_flag & FC_RSCN_MODE) { + + if (vport->fc_flag & FC_RSCN_MODE) { /* Check to see if more RSCNs came in while we were * processing this one. */ - if ((phba->fc_rscn_id_cnt == 0) && - (!(phba->fc_flag & FC_RSCN_DISCOVERY))) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_RSCN_MODE; - spin_unlock_irq(phba->host->host_lock); + if ((vport->fc_rscn_id_cnt == 0) && + (!(vport->fc_flag & FC_RSCN_DISCOVERY))) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_RSCN_MODE; + spin_unlock_irq(shost->host_lock); } else { - lpfc_els_handle_rscn(phba); + lpfc_els_handle_rscn(vport); } } } static void -lpfc_cmpl_els_adisc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp; - struct lpfc_sli *psli; struct lpfc_nodelist *ndlp; - LPFC_MBOXQ_t *mbox; - int disc, rc; - - psli = &phba->sli; + int disc; /* we pass cmdiocb to state machine which needs rspiocb as well */ cmdiocb->context_un.rsp_iocb = rspiocb; @@ -1037,27 +1194,32 @@ irsp = &(rspiocb->iocb); ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "ADISC cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + ndlp->nlp_DID); + /* Since ndlp can be freed in the disc state machine, note if this node * is being used during discovery. */ + spin_lock_irq(shost->host_lock); disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC); - spin_lock_irq(phba->host->host_lock); ndlp->nlp_flag &= ~(NLP_ADISC_SND | NLP_NPR_2B_DISC); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); /* ADISC completes to NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0104 ADISC completes to NPort x%x " + "%d (%d):0104 ADISC completes to NPort x%x " "Data: x%x x%x x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, - irsp->un.ulpWord[4], irsp->ulpTimeout, disc, - phba->num_disc_nodes); + phba->brd_no, vport->vpi, ndlp->nlp_DID, + irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout, + disc, vport->num_disc_nodes); /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba)) { - spin_lock_irq(phba->host->host_lock); + if (lpfc_els_chk_latt(vport)) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); goto out; } @@ -1066,67 +1228,68 @@ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { /* ELS command is being retried */ if (disc) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; - spin_unlock_irq(phba->host->host_lock); - lpfc_set_disctmo(phba); + spin_unlock_irq(shost->host_lock); + lpfc_set_disctmo(vport); } goto out; } /* ADISC failed */ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ - if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) || - ((irsp->un.ulpWord[4] != IOERR_SLI_ABORTED) && - (irsp->un.ulpWord[4] != IOERR_LINK_DOWN) && - (irsp->un.ulpWord[4] != IOERR_SLI_DOWN))) { - lpfc_disc_state_machine(phba, ndlp, cmdiocb, + if (!lpfc_error_lost_link(irsp)) { + lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_ADISC); } } else { /* Good status, call state machine */ - lpfc_disc_state_machine(phba, ndlp, cmdiocb, + lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_ADISC); } - if (disc && phba->num_disc_nodes) { + if (disc && vport->num_disc_nodes) { /* Check to see if there are more ADISCs to be sent */ - lpfc_more_adisc(phba); + lpfc_more_adisc(vport); /* Check to see if we are done with ADISC authentication */ - if (phba->num_disc_nodes == 0) { - lpfc_can_disctmo(phba); - /* If we get here, there is nothing left to wait for */ - if ((phba->hba_state < LPFC_HBA_READY) && - (phba->hba_state != LPFC_CLEAR_LA)) { - /* Link up discovery */ - if ((mbox = mempool_alloc(phba->mbox_mem_pool, - GFP_KERNEL))) { - phba->hba_state = LPFC_CLEAR_LA; - lpfc_clear_la(phba, mbox); - mbox->mbox_cmpl = - lpfc_mbx_cmpl_clear_la; - rc = lpfc_sli_issue_mbox - (phba, mbox, - (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) { - mempool_free(mbox, - phba->mbox_mem_pool); - lpfc_disc_flush_list(phba); - psli->ring[(psli->extra_ring)]. - flag &= - ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->fcp_ring)]. - flag &= - ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->next_ring)]. - flag &= - ~LPFC_STOP_IOCB_EVENT; - phba->hba_state = - LPFC_HBA_READY; + if (vport->num_disc_nodes == 0) { + /* If we get here, there is nothing left to ADISC */ + /* + * For NPIV, cmpl_reg_vpi will set port_state to READY, + * and continue discovery. + */ + if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + !(vport->fc_flag & FC_RSCN_MODE)) { + lpfc_issue_reg_vpi(phba, vport); + goto out; + } + /* + * For SLI2, we need to set port_state to READY + * and continue discovery. + */ + if (vport->port_state < LPFC_VPORT_READY) { + /* If we get here, there is nothing to ADISC */ + if (vport->port_type == LPFC_PHYSICAL_PORT) + lpfc_issue_clear_la(phba, vport); + + if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) { + vport->num_disc_nodes = 0; + /* go thru NPR list, issue ELS PLOGIs */ + if (vport->fc_npr_cnt) + lpfc_els_disc_plogi(vport); + + if (!vport->num_disc_nodes) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= + ~FC_NDISC_ACTIVE; + spin_unlock_irq( + shost->host_lock); + lpfc_can_disctmo(vport); } } + vport->port_state = LPFC_VPORT_READY; } else { - lpfc_rscn_disc(phba); + lpfc_rscn_disc(vport); } } } @@ -1136,22 +1299,21 @@ } int -lpfc_issue_els_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, +lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint8_t retry) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; ADISC *ap; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; - struct lpfc_sli_ring *pring; - struct lpfc_sli *psli; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; uint8_t *pcmd; uint16_t cmdsize; - psli = &phba->sli; - pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - - cmdsize = (sizeof (uint32_t) + sizeof (ADISC)); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, + cmdsize = (sizeof(uint32_t) + sizeof(ADISC)); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_ADISC); if (!elsiocb) return 1; @@ -1161,81 +1323,97 @@ /* For ADISC request, remainder of payload is service parameters */ *((uint32_t *) (pcmd)) = ELS_CMD_ADISC; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); /* Fill in ADISC payload */ ap = (ADISC *) pcmd; ap->hardAL_PA = phba->fc_pref_ALPA; - memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name)); - memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); - ap->DID = be32_to_cpu(phba->fc_myDID); + memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name)); + memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); + ap->DID = be32_to_cpu(vport->fc_myDID); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue ADISC: did:x%x", + ndlp->nlp_DID, 0, 0); phba->fc_stat.elsXmitADISC++; elsiocb->iocb_cmpl = lpfc_cmpl_els_adisc; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_ADISC_SND; + spin_unlock_irq(shost->host_lock); if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_ADISC_SND; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); lpfc_els_free_iocb(phba, elsiocb); return 1; } - spin_unlock_irq(phba->host->host_lock); return 0; } static void -lpfc_cmpl_els_logo(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + struct lpfc_vport *vport = ndlp->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp; struct lpfc_sli *psli; - struct lpfc_nodelist *ndlp; psli = &phba->sli; /* we pass cmdiocb to state machine which needs rspiocb as well */ cmdiocb->context_un.rsp_iocb = rspiocb; irsp = &(rspiocb->iocb); - ndlp = (struct lpfc_nodelist *) cmdiocb->context1; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_LOGO_SND; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "LOGO cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + ndlp->nlp_DID); /* LOGO completes to NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0105 LOGO completes to NPort x%x " + "%d (%d):0105 LOGO completes to NPort x%x " "Data: x%x x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, - irsp->un.ulpWord[4], irsp->ulpTimeout, - phba->num_disc_nodes); + phba->brd_no, vport->vpi, ndlp->nlp_DID, + irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout, + vport->num_disc_nodes); /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba)) + if (lpfc_els_chk_latt(vport)) + goto out; + + if (ndlp->nlp_flag & NLP_TARGET_REMOVE) { + /* NLP_EVT_DEVICE_RM should unregister the RPI + * which should abort all outstanding IOs. + */ + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_DEVICE_RM); goto out; + } if (irsp->ulpStatus) { /* Check for retry */ - if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { + if (lpfc_els_retry(phba, cmdiocb, rspiocb)) /* ELS command is being retried */ goto out; - } /* LOGO failed */ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ - if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && - ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || - (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || - (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) { + if (lpfc_error_lost_link(irsp)) goto out; - } else { - lpfc_disc_state_machine(phba, ndlp, cmdiocb, + else + lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO); - } } else { /* Good status, call state machine. * This will unregister the rpi if needed. */ - lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO); + lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_LOGO); } out: @@ -1244,21 +1422,24 @@ } int -lpfc_issue_els_logo(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, +lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint8_t retry) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; struct lpfc_sli_ring *pring; struct lpfc_sli *psli; uint8_t *pcmd; uint16_t cmdsize; + int rc; psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; - cmdsize = (2 * sizeof (uint32_t)) + sizeof (struct lpfc_name); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, + cmdsize = (2 * sizeof(uint32_t)) + sizeof(struct lpfc_name); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_LOGO); if (!elsiocb) return 1; @@ -1266,53 +1447,66 @@ icmd = &elsiocb->iocb; pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_LOGO; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); /* Fill in LOGO payload */ - *((uint32_t *) (pcmd)) = be32_to_cpu(phba->fc_myDID); - pcmd += sizeof (uint32_t); - memcpy(pcmd, &phba->fc_portname, sizeof (struct lpfc_name)); + *((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID); + pcmd += sizeof(uint32_t); + memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name)); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue LOGO: did:x%x", + ndlp->nlp_DID, 0, 0); phba->fc_stat.elsXmitLOGO++; elsiocb->iocb_cmpl = lpfc_cmpl_els_logo; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_LOGO_SND; - if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { + spin_unlock_irq(shost->host_lock); + rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); + + if (rc == IOCB_ERROR) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_LOGO_SND; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); lpfc_els_free_iocb(phba, elsiocb); return 1; } - spin_unlock_irq(phba->host->host_lock); return 0; } static void -lpfc_cmpl_els_cmd(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_els_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { + struct lpfc_vport *vport = cmdiocb->vport; IOCB_t *irsp; irsp = &rspiocb->iocb; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "ELS cmd cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->un.elsreq64.remoteID); + /* ELS cmd tag completes */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_ELS, - "%d:0106 ELS cmd tag x%x completes Data: x%x x%x x%x\n", - phba->brd_no, + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0106 ELS cmd tag x%x completes Data: x%x x%x " + "x%x\n", + phba->brd_no, vport->vpi, irsp->ulpIoTag, irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout); /* Check to see if link went down during discovery */ - lpfc_els_chk_latt(phba); + lpfc_els_chk_latt(vport); lpfc_els_free_iocb(phba, cmdiocb); return; } int -lpfc_issue_els_scr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry) +lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) { + struct lpfc_hba *phba = vport->phba; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; struct lpfc_sli_ring *pring; @@ -1323,15 +1517,16 @@ psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - cmdsize = (sizeof (uint32_t) + sizeof (SCR)); + cmdsize = (sizeof(uint32_t) + sizeof(SCR)); ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); if (!ndlp) return 1; - lpfc_nlp_init(phba, ndlp, nportid); + lpfc_nlp_init(vport, ndlp, nportid); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_SCR); + if (!elsiocb) { lpfc_nlp_put(ndlp); return 1; @@ -1341,29 +1536,31 @@ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_SCR; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); /* For SCR, remainder of payload is SCR parameter page */ - memset(pcmd, 0, sizeof (SCR)); + memset(pcmd, 0, sizeof(SCR)); ((SCR *) pcmd)->Function = SCR_FUNC_FULL; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue SCR: did:x%x", + ndlp->nlp_DID, 0, 0); + phba->fc_stat.elsXmitSCR++; elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; - spin_lock_irq(phba->host->host_lock); if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { - spin_unlock_irq(phba->host->host_lock); lpfc_nlp_put(ndlp); lpfc_els_free_iocb(phba, elsiocb); return 1; } - spin_unlock_irq(phba->host->host_lock); lpfc_nlp_put(ndlp); return 0; } static int -lpfc_issue_els_farpr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry) +lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) { + struct lpfc_hba *phba = vport->phba; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; struct lpfc_sli_ring *pring; @@ -1377,13 +1574,14 @@ psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - cmdsize = (sizeof (uint32_t) + sizeof (FARP)); + cmdsize = (sizeof(uint32_t) + sizeof(FARP)); ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); if (!ndlp) return 1; - lpfc_nlp_init(phba, ndlp, nportid); - elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, + lpfc_nlp_init(vport, ndlp, nportid); + + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_RNID); if (!elsiocb) { lpfc_nlp_put(ndlp); @@ -1394,44 +1592,71 @@ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_FARPR; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); /* Fill in FARPR payload */ fp = (FARP *) (pcmd); - memset(fp, 0, sizeof (FARP)); + memset(fp, 0, sizeof(FARP)); lp = (uint32_t *) pcmd; *lp++ = be32_to_cpu(nportid); - *lp++ = be32_to_cpu(phba->fc_myDID); + *lp++ = be32_to_cpu(vport->fc_myDID); fp->Rflags = 0; fp->Mflags = (FARP_MATCH_PORT | FARP_MATCH_NODE); - memcpy(&fp->RportName, &phba->fc_portname, sizeof (struct lpfc_name)); - memcpy(&fp->RnodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); - if ((ondlp = lpfc_findnode_did(phba, nportid))) { + memcpy(&fp->RportName, &vport->fc_portname, sizeof(struct lpfc_name)); + memcpy(&fp->RnodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); + ondlp = lpfc_findnode_did(vport, nportid); + if (ondlp) { memcpy(&fp->OportName, &ondlp->nlp_portname, - sizeof (struct lpfc_name)); + sizeof(struct lpfc_name)); memcpy(&fp->OnodeName, &ondlp->nlp_nodename, - sizeof (struct lpfc_name)); + sizeof(struct lpfc_name)); } + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue FARPR: did:x%x", + ndlp->nlp_DID, 0, 0); + phba->fc_stat.elsXmitFARPR++; elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; - spin_lock_irq(phba->host->host_lock); if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { - spin_unlock_irq(phba->host->host_lock); lpfc_nlp_put(ndlp); lpfc_els_free_iocb(phba, elsiocb); return 1; } - spin_unlock_irq(phba->host->host_lock); lpfc_nlp_put(ndlp); return 0; } +static void +lpfc_end_rscn(struct lpfc_vport *vport) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + if (vport->fc_flag & FC_RSCN_MODE) { + /* + * Check to see if more RSCNs came in while we were + * processing this one. + */ + if (vport->fc_rscn_id_cnt || + (vport->fc_flag & FC_RSCN_DISCOVERY) != 0) + lpfc_els_handle_rscn(vport); + else { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_RSCN_MODE; + spin_unlock_irq(shost->host_lock); + } + } +} + void -lpfc_cancel_retry_delay_tmo(struct lpfc_hba *phba, struct lpfc_nodelist * nlp) +lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + spin_lock_irq(shost->host_lock); nlp->nlp_flag &= ~NLP_DELAY_TMO; + spin_unlock_irq(shost->host_lock); del_timer_sync(&nlp->nlp_delayfunc); nlp->nlp_last_elscmd = 0; @@ -1439,30 +1664,21 @@ list_del_init(&nlp->els_retry_evt.evt_listp); if (nlp->nlp_flag & NLP_NPR_2B_DISC) { + spin_lock_irq(shost->host_lock); nlp->nlp_flag &= ~NLP_NPR_2B_DISC; - if (phba->num_disc_nodes) { + spin_unlock_irq(shost->host_lock); + if (vport->num_disc_nodes) { /* Check to see if there are more * PLOGIs to be sent */ - lpfc_more_plogi(phba); + lpfc_more_plogi(vport); - if (phba->num_disc_nodes == 0) { - phba->fc_flag &= ~FC_NDISC_ACTIVE; - lpfc_can_disctmo(phba); - if (phba->fc_flag & FC_RSCN_MODE) { - /* - * Check to see if more RSCNs - * came in while we were - * processing this one. - */ - if((phba->fc_rscn_id_cnt==0) && - !(phba->fc_flag & FC_RSCN_DISCOVERY)) { - phba->fc_flag &= ~FC_RSCN_MODE; - } - else { - lpfc_els_handle_rscn(phba); - } - } + if (vport->num_disc_nodes == 0) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NDISC_ACTIVE; + spin_unlock_irq(shost->host_lock); + lpfc_can_disctmo(vport); + lpfc_end_rscn(vport); } } } @@ -1472,18 +1688,19 @@ void lpfc_els_retry_delay(unsigned long ptr) { - struct lpfc_nodelist *ndlp; - struct lpfc_hba *phba; - unsigned long iflag; - struct lpfc_work_evt *evtp; + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr; + struct lpfc_vport *vport = ndlp->vport; + struct lpfc_hba *phba = vport->phba; + unsigned long flags; + struct lpfc_work_evt *evtp = &ndlp->els_retry_evt; - ndlp = (struct lpfc_nodelist *)ptr; - phba = ndlp->nlp_phba; + ndlp = (struct lpfc_nodelist *) ptr; + phba = ndlp->vport->phba; evtp = &ndlp->els_retry_evt; - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, flags); if (!list_empty(&evtp->evt_listp)) { - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, flags); return; } @@ -1491,33 +1708,31 @@ evtp->evt = LPFC_EVT_ELS_RETRY; list_add_tail(&evtp->evt_listp, &phba->work_list); if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, flags); return; } void lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp) { - struct lpfc_hba *phba; - uint32_t cmd; - uint32_t did; - uint8_t retry; + struct lpfc_vport *vport = ndlp->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + uint32_t cmd, did, retry; - phba = ndlp->nlp_phba; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); did = ndlp->nlp_DID; cmd = ndlp->nlp_last_elscmd; ndlp->nlp_last_elscmd = 0; if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); return; } ndlp->nlp_flag &= ~NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); /* * If a discovery event readded nlp_delayfunc after timer * firing and before processing the timer, cancel the @@ -1528,57 +1743,54 @@ switch (cmd) { case ELS_CMD_FLOGI: - lpfc_issue_els_flogi(phba, ndlp, retry); + lpfc_issue_els_flogi(vport, ndlp, retry); break; case ELS_CMD_PLOGI: - if(!lpfc_issue_els_plogi(phba, ndlp->nlp_DID, retry)) { + if (!lpfc_issue_els_plogi(vport, ndlp->nlp_DID, retry)) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); } break; case ELS_CMD_ADISC: - if (!lpfc_issue_els_adisc(phba, ndlp, retry)) { + if (!lpfc_issue_els_adisc(vport, ndlp, retry)) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); } break; case ELS_CMD_PRLI: - if (!lpfc_issue_els_prli(phba, ndlp, retry)) { + if (!lpfc_issue_els_prli(vport, ndlp, retry)) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); } break; case ELS_CMD_LOGO: - if (!lpfc_issue_els_logo(phba, ndlp, retry)) { + if (!lpfc_issue_els_logo(vport, ndlp, retry)) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); } break; + case ELS_CMD_FDISC: + lpfc_issue_els_fdisc(vport, ndlp, retry); + break; } return; } static int -lpfc_els_retry(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { - IOCB_t *irsp; - struct lpfc_dmabuf *pcmd; - struct lpfc_nodelist *ndlp; + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp = &rspiocb->iocb; + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; uint32_t *elscmd; struct ls_rjt stat; - int retry, maxretry; - int delay; - uint32_t cmd; + int retry = 0, maxretry = lpfc_max_els_tries, delay = 0; + uint32_t cmd = 0; uint32_t did; - retry = 0; - delay = 0; - maxretry = lpfc_max_els_tries; - irsp = &rspiocb->iocb; - ndlp = (struct lpfc_nodelist *) cmdiocb->context1; - pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; - cmd = 0; /* Note: context2 may be 0 for internal driver abort * of delays ELS command. @@ -1594,11 +1806,15 @@ else { /* We should only hit this case for retrying PLOGI */ did = irsp->un.elsreq64.remoteID; - ndlp = lpfc_findnode_did(phba, did); + ndlp = lpfc_findnode_did(vport, did); if (!ndlp && (cmd != ELS_CMD_PLOGI)) return 1; } + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Retry ELS: wd7:x%x wd4:x%x did:x%x", + *(((uint32_t *) irsp) + 7), irsp->un.ulpWord[4], ndlp->nlp_DID); + switch (irsp->ulpStatus) { case IOSTAT_FCP_RSP_ERROR: case IOSTAT_REMOTE_STOP: @@ -1607,25 +1823,37 @@ case IOSTAT_LOCAL_REJECT: switch ((irsp->un.ulpWord[4] & 0xff)) { case IOERR_LOOP_OPEN_FAILURE: - if (cmd == ELS_CMD_PLOGI) { - if (cmdiocb->retry == 0) { - delay = 1; - } - } + if (cmd == ELS_CMD_PLOGI && cmdiocb->retry == 0) + delay = 1000; retry = 1; break; - case IOERR_SEQUENCE_TIMEOUT: + case IOERR_ILLEGAL_COMMAND: + if ((phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) && + (cmd == ELS_CMD_FDISC)) { + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0124 FDISC failed (3/6) retrying...\n", + phba->brd_no, vport->vpi); + lpfc_mbx_unreg_vpi(vport); retry = 1; + /* Always retry for this case */ + cmdiocb->retry = 0; + } break; case IOERR_NO_RESOURCES: - if (cmd == ELS_CMD_PLOGI) { - delay = 1; - } + retry = 1; + if (cmdiocb->retry > 100) + delay = 100; + maxretry = 250; + break; + + case IOERR_ILLEGAL_FRAME: + delay = 100; retry = 1; break; + case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: retry = 1; break; @@ -1655,27 +1883,57 @@ if (stat.un.b.lsRjtRsnCodeExp == LSEXP_CMD_IN_PROGRESS) { if (cmd == ELS_CMD_PLOGI) { - delay = 1; + delay = 1000; maxretry = 48; } retry = 1; break; } if (cmd == ELS_CMD_PLOGI) { - delay = 1; + delay = 1000; maxretry = lpfc_max_els_tries + 1; retry = 1; break; } + if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + (cmd == ELS_CMD_FDISC) && + (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){ + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0125 FDISC Failed (x%x)." + " Fabric out of resources\n", + phba->brd_no, vport->vpi, stat.un.lsRjtError); + lpfc_vport_set_state(vport, + FC_VPORT_NO_FABRIC_RSCS); + } break; case LSRJT_LOGICAL_BSY: - if (cmd == ELS_CMD_PLOGI) { - delay = 1; + if ((cmd == ELS_CMD_PLOGI) || + (cmd == ELS_CMD_PRLI)) { + delay = 1000; maxretry = 48; + } else if (cmd == ELS_CMD_FDISC) { + /* Always retry for this case */ + cmdiocb->retry = 0; } retry = 1; break; + + case LSRJT_LOGICAL_ERR: + case LSRJT_PROTOCOL_ERR: + if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + (cmd == ELS_CMD_FDISC) && + ((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) || + (stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID)) + ) { + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0123 FDISC Failed (x%x)." + " Fabric Detected Bad WWN\n", + phba->brd_no, vport->vpi, stat.un.lsRjtError); + lpfc_vport_set_state(vport, + FC_VPORT_FABRIC_REJ_WWN); + } + break; } break; @@ -1695,21 +1953,27 @@ retry = 0; } + if ((vport->load_flag & FC_UNLOADING) != 0) + retry = 0; + if (retry) { /* Retry ELS command to remote NPORT */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0107 Retry ELS command x%x to remote " + "%d (%d):0107 Retry ELS command x%x to remote " "NPORT x%x Data: x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, cmd, did, cmdiocb->retry, delay); - if ((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) { + if (((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) && + ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) || + ((irsp->un.ulpWord[4] & 0xff) != IOERR_NO_RESOURCES))) { + /* Don't reset timer for no resources */ + /* If discovery / RSCN timer is running, reset it */ - if (timer_pending(&phba->fc_disctmo) || - (phba->fc_flag & FC_RSCN_MODE)) { - lpfc_set_disctmo(phba); - } + if (timer_pending(&vport->fc_disctmo) || + (vport->fc_flag & FC_RSCN_MODE)) + lpfc_set_disctmo(vport); } phba->fc_stat.elsXmitRetry++; @@ -1717,50 +1981,62 @@ phba->fc_stat.elsDelayRetry++; ndlp->nlp_retry = cmdiocb->retry; - mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ); + /* delay is specified in milliseconds */ + mod_timer(&ndlp->nlp_delayfunc, + jiffies + msecs_to_jiffies(delay)); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; + spin_unlock_irq(shost->host_lock); ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + if (cmd == ELS_CMD_PRLI) + lpfc_nlp_set_state(vport, ndlp, + NLP_STE_REG_LOGIN_ISSUE); + else + lpfc_nlp_set_state(vport, ndlp, + NLP_STE_NPR_NODE); ndlp->nlp_last_elscmd = cmd; return 1; } switch (cmd) { case ELS_CMD_FLOGI: - lpfc_issue_els_flogi(phba, ndlp, cmdiocb->retry); + lpfc_issue_els_flogi(vport, ndlp, cmdiocb->retry); + return 1; + case ELS_CMD_FDISC: + lpfc_issue_els_fdisc(vport, ndlp, cmdiocb->retry); return 1; case ELS_CMD_PLOGI: if (ndlp) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); } - lpfc_issue_els_plogi(phba, did, cmdiocb->retry); + lpfc_issue_els_plogi(vport, did, cmdiocb->retry); return 1; case ELS_CMD_ADISC: ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); - lpfc_issue_els_adisc(phba, ndlp, cmdiocb->retry); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); + lpfc_issue_els_adisc(vport, ndlp, cmdiocb->retry); return 1; case ELS_CMD_PRLI: ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE); - lpfc_issue_els_prli(phba, ndlp, cmdiocb->retry); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); + lpfc_issue_els_prli(vport, ndlp, cmdiocb->retry); return 1; case ELS_CMD_LOGO: ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - lpfc_issue_els_logo(phba, ndlp, cmdiocb->retry); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + lpfc_issue_els_logo(vport, ndlp, cmdiocb->retry); return 1; } } /* No retry ELS command to remote NPORT */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0108 No retry ELS command x%x to remote NPORT x%x " - "Data: x%x\n", - phba->brd_no, + "%d (%d):0108 No retry ELS command x%x to remote " + "NPORT x%x Data: x%x\n", + phba->brd_no, vport->vpi, cmd, did, cmdiocb->retry); return 0; @@ -1795,33 +2071,36 @@ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); kfree(buf_ptr); } - spin_lock_irq(phba->host->host_lock); lpfc_sli_release_iocbq(phba, elsiocb); - spin_unlock_irq(phba->host->host_lock); return 0; } static void -lpfc_cmpl_els_logo_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { - struct lpfc_nodelist *ndlp; + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + struct lpfc_vport *vport = cmdiocb->vport; + IOCB_t *irsp; - ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + irsp = &rspiocb->iocb; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "ACC LOGO cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], ndlp->nlp_DID); /* ACC to LOGO completes to NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0109 ACC to LOGO completes to NPort x%x " + "%d (%d):0109 ACC to LOGO completes to NPort x%x " "Data: x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_state, ndlp->nlp_rpi); + phba->brd_no, vport->vpi, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); switch (ndlp->nlp_state) { case NLP_STE_UNUSED_NODE: /* node is just allocated */ - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; case NLP_STE_NPR_NODE: /* NPort Recovery mode */ - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); break; default: break; @@ -1830,24 +2109,38 @@ return; } +void +lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) +{ + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + + pmb->context1 = NULL; + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + mempool_free(pmb, phba->mbox_mem_pool); + lpfc_nlp_put(ndlp); + return; +} + static void -lpfc_cmpl_els_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, +lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb) { + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + struct lpfc_vport *vport = ndlp ? ndlp->vport : NULL; + struct Scsi_Host *shost = vport ? lpfc_shost_from_vport(vport) : NULL; IOCB_t *irsp; - struct lpfc_nodelist *ndlp; LPFC_MBOXQ_t *mbox = NULL; - struct lpfc_dmabuf *mp; + struct lpfc_dmabuf *mp = NULL; irsp = &rspiocb->iocb; - ndlp = (struct lpfc_nodelist *) cmdiocb->context1; if (cmdiocb->context_un.mbox) mbox = cmdiocb->context_un.mbox; - /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba) || !ndlp) { + if (!ndlp || lpfc_els_chk_latt(vport)) { if (mbox) { mp = (struct lpfc_dmabuf *) mbox->context1; if (mp) { @@ -1859,11 +2152,16 @@ goto out; } + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "ACC cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->un.rcvels.remoteID); + /* ELS response tag completes */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0110 ELS response tag x%x completes " + "%d (%d):0110 ELS response tag x%x completes " "Data: x%x x%x x%x x%x x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, cmdiocb->iocb.ulpIoTag, rspiocb->iocb.ulpStatus, rspiocb->iocb.un.ulpWord[4], rspiocb->iocb.ulpTimeout, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, @@ -1872,11 +2170,19 @@ if (mbox) { if ((rspiocb->iocb.ulpStatus == 0) && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) { - lpfc_unreg_rpi(phba, ndlp); - mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; + lpfc_unreg_rpi(vport, ndlp); mbox->context2 = lpfc_nlp_get(ndlp); + mbox->vport = vport; + if (ndlp->nlp_flag & NLP_RM_DFLT_RPI) { + mbox->mbox_flag |= LPFC_MBX_IMED_UNREG; + mbox->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; + } + else { + mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE); + lpfc_nlp_set_state(vport, ndlp, + NLP_STE_REG_LOGIN_ISSUE); + } if (lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) { @@ -1886,17 +2192,13 @@ /* NOTE: we should have messages for unsuccessful reglogin */ } else { - /* Do not call NO_LIST for lpfc_els_abort'ed ELS cmds */ - if (!((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && - ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || - (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || - (irsp->un.ulpWord[4] == IOERR_SLI_DOWN)))) { - if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) { - lpfc_drop_node(phba, ndlp); + /* Do not drop node for lpfc_els_abort'ed ELS cmds */ + if (!lpfc_error_lost_link(irsp) && + ndlp->nlp_flag & NLP_ACC_REGLOGIN) { + lpfc_drop_node(vport, ndlp); ndlp = NULL; } } - } mp = (struct lpfc_dmabuf *) mbox->context1; if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -1906,19 +2208,21 @@ } out: if (ndlp) { - spin_lock_irq(phba->host->host_lock); - ndlp->nlp_flag &= ~NLP_ACC_REGLOGIN; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); + spin_unlock_irq(shost->host_lock); } lpfc_els_free_iocb(phba, cmdiocb); return; } int -lpfc_els_rsp_acc(struct lpfc_hba * phba, uint32_t flag, - struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp, - LPFC_MBOXQ_t * mbox, uint8_t newnode) +lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag, + struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp, + LPFC_MBOXQ_t *mbox, uint8_t newnode) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; IOCB_t *icmd; IOCB_t *oldcmd; struct lpfc_iocbq *elsiocb; @@ -1935,22 +2239,29 @@ switch (flag) { case ELS_CMD_ACC: - cmdsize = sizeof (uint32_t); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, + cmdsize = sizeof(uint32_t); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ndlp->nlp_DID, ELS_CMD_ACC); if (!elsiocb) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_LOGO_ACC; + spin_unlock_irq(shost->host_lock); return 1; } + icmd = &elsiocb->iocb; icmd->ulpContext = oldcmd->ulpContext; /* Xri */ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_ACC; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue ACC: did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); break; case ELS_CMD_PLOGI: - cmdsize = (sizeof (struct serv_parm) + sizeof (uint32_t)); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, + cmdsize = (sizeof(struct serv_parm) + sizeof(uint32_t)); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ndlp->nlp_DID, ELS_CMD_ACC); if (!elsiocb) return 1; @@ -1963,12 +2274,16 @@ elsiocb->context_un.mbox = mbox; *((uint32_t *) (pcmd)) = ELS_CMD_ACC; - pcmd += sizeof (uint32_t); - memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm)); + pcmd += sizeof(uint32_t); + memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm)); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue ACC PLOGI: did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); break; case ELS_CMD_PRLO: - cmdsize = sizeof (uint32_t) + sizeof (PRLO); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, + cmdsize = sizeof(uint32_t) + sizeof(PRLO); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ndlp->nlp_DID, ELS_CMD_PRLO); if (!elsiocb) return 1; @@ -1978,10 +2293,14 @@ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt); memcpy(pcmd, ((struct lpfc_dmabuf *) oldiocb->context2)->virt, - sizeof (uint32_t) + sizeof (PRLO)); + sizeof(uint32_t) + sizeof(PRLO)); *((uint32_t *) (pcmd)) = ELS_CMD_PRLO_ACC; els_pkt_ptr = (ELS_PKT *) pcmd; els_pkt_ptr->un.prlo.acceptRspCode = PRLO_REQ_EXECUTED; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue ACC PRLO: did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); break; default: return 1; @@ -1994,25 +2313,23 @@ /* Xmit ELS ACC response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0128 Xmit ELS ACC response tag x%x, XRI: x%x, " + "%d (%d):0128 Xmit ELS ACC response tag x%x, XRI: x%x, " "DID: x%x, nlp_flag: x%x nlp_state: x%x RPI: x%x\n", - phba->brd_no, elsiocb->iotag, + phba->brd_no, vport->vpi, elsiocb->iotag, elsiocb->iocb.ulpContext, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); if (ndlp->nlp_flag & NLP_LOGO_ACC) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_LOGO_ACC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); elsiocb->iocb_cmpl = lpfc_cmpl_els_logo_acc; } else { - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; } phba->fc_stat.elsXmitACC++; - spin_lock_irq(phba->host->host_lock); rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); - spin_unlock_irq(phba->host->host_lock); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -2021,9 +2338,11 @@ } int -lpfc_els_rsp_reject(struct lpfc_hba * phba, uint32_t rejectError, - struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp) +lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError, + struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp, + LPFC_MBOXQ_t *mbox) { + struct lpfc_hba *phba = vport->phba; IOCB_t *icmd; IOCB_t *oldcmd; struct lpfc_iocbq *elsiocb; @@ -2036,9 +2355,9 @@ psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - cmdsize = 2 * sizeof (uint32_t); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, - ndlp, ndlp->nlp_DID, ELS_CMD_LS_RJT); + cmdsize = 2 * sizeof(uint32_t); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, + ndlp->nlp_DID, ELS_CMD_LS_RJT); if (!elsiocb) return 1; @@ -2048,22 +2367,30 @@ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_LS_RJT; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); *((uint32_t *) (pcmd)) = rejectError; + if (mbox) { + elsiocb->context_un.mbox = mbox; + elsiocb->context1 = lpfc_nlp_get(ndlp); + } + /* Xmit ELS RJT response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0129 Xmit ELS RJT x%x response tag x%x xri x%x, " - "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", - phba->brd_no, rejectError, elsiocb->iotag, + "%d (%d):0129 Xmit ELS RJT x%x response tag x%x " + "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, " + "rpi x%x\n", + phba->brd_no, vport->vpi, rejectError, elsiocb->iotag, elsiocb->iocb.ulpContext, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue LS_RJT: did:x%x flg:x%x err:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, rejectError); + phba->fc_stat.elsXmitLSRJT++; - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; - spin_lock_irq(phba->host->host_lock); + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); - spin_unlock_irq(phba->host->host_lock); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -2072,25 +2399,22 @@ } int -lpfc_els_rsp_adisc_acc(struct lpfc_hba * phba, - struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp) +lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, + struct lpfc_nodelist *ndlp) { + struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; ADISC *ap; - IOCB_t *icmd; - IOCB_t *oldcmd; + IOCB_t *icmd, *oldcmd; struct lpfc_iocbq *elsiocb; - struct lpfc_sli_ring *pring; - struct lpfc_sli *psli; uint8_t *pcmd; uint16_t cmdsize; int rc; - psli = &phba->sli; - pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - - cmdsize = sizeof (uint32_t) + sizeof (ADISC); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, - ndlp, ndlp->nlp_DID, ELS_CMD_ACC); + cmdsize = sizeof(uint32_t) + sizeof(ADISC); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, + ndlp->nlp_DID, ELS_CMD_ACC); if (!elsiocb) return 1; @@ -2100,28 +2424,30 @@ /* Xmit ADISC ACC response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0130 Xmit ADISC ACC response iotag x%x xri: " + "%d (%d):0130 Xmit ADISC ACC response iotag x%x xri: " "x%x, did x%x, nlp_flag x%x, nlp_state x%x rpi x%x\n", - phba->brd_no, elsiocb->iotag, + phba->brd_no, vport->vpi, elsiocb->iotag, elsiocb->iocb.ulpContext, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_ACC; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); ap = (ADISC *) (pcmd); ap->hardAL_PA = phba->fc_pref_ALPA; - memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name)); - memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); - ap->DID = be32_to_cpu(phba->fc_myDID); + memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name)); + memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); + ap->DID = be32_to_cpu(vport->fc_myDID); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue ACC ADISC: did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); phba->fc_stat.elsXmitACC++; - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; - spin_lock_irq(phba->host->host_lock); + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); - spin_unlock_irq(phba->host->host_lock); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -2130,9 +2456,10 @@ } int -lpfc_els_rsp_prli_acc(struct lpfc_hba *phba, struct lpfc_iocbq *oldiocb, +lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp) { + struct lpfc_hba *phba = vport->phba; PRLI *npr; lpfc_vpd_t *vpd; IOCB_t *icmd; @@ -2147,8 +2474,8 @@ psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ - cmdsize = sizeof (uint32_t) + sizeof (PRLI); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ndlp, + cmdsize = sizeof(uint32_t) + sizeof(PRLI); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ndlp->nlp_DID, (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK))); if (!elsiocb) return 1; @@ -2159,19 +2486,19 @@ /* Xmit PRLI ACC response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0131 Xmit PRLI ACC response tag x%x xri x%x, " + "%d (%d):0131 Xmit PRLI ACC response tag x%x xri x%x, " "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", - phba->brd_no, elsiocb->iotag, + phba->brd_no, vport->vpi, elsiocb->iotag, elsiocb->iocb.ulpContext, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)); - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); /* For PRLI, remainder of payload is PRLI parameter page */ - memset(pcmd, 0, sizeof (PRLI)); + memset(pcmd, 0, sizeof(PRLI)); npr = (PRLI *) pcmd; vpd = &phba->vpd; @@ -2193,12 +2520,14 @@ npr->prliType = PRLI_FCP_TYPE; npr->initiatorFunc = 1; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue ACC PRLI: did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); + phba->fc_stat.elsXmitACC++; - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; - spin_lock_irq(phba->host->host_lock); rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); - spin_unlock_irq(phba->host->host_lock); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -2207,12 +2536,12 @@ } static int -lpfc_els_rsp_rnid_acc(struct lpfc_hba *phba, uint8_t format, +lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format, struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp) { + struct lpfc_hba *phba = vport->phba; RNID *rn; - IOCB_t *icmd; - IOCB_t *oldcmd; + IOCB_t *icmd, *oldcmd; struct lpfc_iocbq *elsiocb; struct lpfc_sli_ring *pring; struct lpfc_sli *psli; @@ -2223,13 +2552,13 @@ psli = &phba->sli; pring = &psli->ring[LPFC_ELS_RING]; - cmdsize = sizeof (uint32_t) + sizeof (uint32_t) - + (2 * sizeof (struct lpfc_name)); + cmdsize = sizeof(uint32_t) + sizeof(uint32_t) + + (2 * sizeof(struct lpfc_name)); if (format) - cmdsize += sizeof (RNID_TOP_DISC); + cmdsize += sizeof(RNID_TOP_DISC); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, - ndlp, ndlp->nlp_DID, ELS_CMD_ACC); + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, + ndlp->nlp_DID, ELS_CMD_ACC); if (!elsiocb) return 1; @@ -2239,30 +2568,30 @@ /* Xmit RNID ACC response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0132 Xmit RNID ACC response tag x%x " + "%d (%d):0132 Xmit RNID ACC response tag x%x " "xri x%x\n", - phba->brd_no, elsiocb->iotag, + phba->brd_no, vport->vpi, elsiocb->iotag, elsiocb->iocb.ulpContext); pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_ACC; - pcmd += sizeof (uint32_t); + pcmd += sizeof(uint32_t); - memset(pcmd, 0, sizeof (RNID)); + memset(pcmd, 0, sizeof(RNID)); rn = (RNID *) (pcmd); rn->Format = format; - rn->CommonLen = (2 * sizeof (struct lpfc_name)); - memcpy(&rn->portName, &phba->fc_portname, sizeof (struct lpfc_name)); - memcpy(&rn->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); + rn->CommonLen = (2 * sizeof(struct lpfc_name)); + memcpy(&rn->portName, &vport->fc_portname, sizeof(struct lpfc_name)); + memcpy(&rn->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); switch (format) { case 0: rn->SpecificLen = 0; break; case RNID_TOPOLOGY_DISC: - rn->SpecificLen = sizeof (RNID_TOP_DISC); + rn->SpecificLen = sizeof(RNID_TOP_DISC); memcpy(&rn->un.topologyDisc.portName, - &phba->fc_portname, sizeof (struct lpfc_name)); + &vport->fc_portname, sizeof(struct lpfc_name)); rn->un.topologyDisc.unitType = RNID_HBA; rn->un.topologyDisc.physPort = 0; rn->un.topologyDisc.attachedNodes = 0; @@ -2273,15 +2602,17 @@ break; } + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, + "Issue ACC RNID: did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); + phba->fc_stat.elsXmitACC++; - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; lpfc_nlp_put(ndlp); elsiocb->context1 = NULL; /* Don't need ndlp for cmpl, * it could be freed */ - spin_lock_irq(phba->host->host_lock); rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); - spin_unlock_irq(phba->host->host_lock); if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -2290,168 +2621,153 @@ } int -lpfc_els_disc_adisc(struct lpfc_hba *phba) +lpfc_els_disc_adisc(struct lpfc_vport *vport) { - int sentadisc; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_nodelist *ndlp, *next_ndlp; + int sentadisc = 0; - sentadisc = 0; /* go thru NPR nodes and issue any remaining ELS ADISCs */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state == NLP_STE_NPR_NODE && (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 && (ndlp->nlp_flag & NLP_NPR_ADISC) != 0) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); - lpfc_issue_els_adisc(phba, ndlp, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); + lpfc_issue_els_adisc(vport, ndlp, 0); sentadisc++; - phba->num_disc_nodes++; - if (phba->num_disc_nodes >= - phba->cfg_discovery_threads) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_NLP_MORE; - spin_unlock_irq(phba->host->host_lock); + vport->num_disc_nodes++; + if (vport->num_disc_nodes >= + vport->phba->cfg_discovery_threads) { + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_NLP_MORE; + spin_unlock_irq(shost->host_lock); break; } } } if (sentadisc == 0) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_NLP_MORE; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NLP_MORE; + spin_unlock_irq(shost->host_lock); } return sentadisc; } int -lpfc_els_disc_plogi(struct lpfc_hba * phba) +lpfc_els_disc_plogi(struct lpfc_vport *vport) { - int sentplogi; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_nodelist *ndlp, *next_ndlp; + int sentplogi = 0; - sentplogi = 0; - /* go thru NPR list and issue any remaining ELS PLOGIs */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { + /* go thru NPR nodes and issue any remaining ELS PLOGIs */ + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state == NLP_STE_NPR_NODE && (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 && (ndlp->nlp_flag & NLP_DELAY_TMO) == 0 && (ndlp->nlp_flag & NLP_NPR_ADISC) == 0) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); sentplogi++; - phba->num_disc_nodes++; - if (phba->num_disc_nodes >= - phba->cfg_discovery_threads) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_NLP_MORE; - spin_unlock_irq(phba->host->host_lock); + vport->num_disc_nodes++; + if (vport->num_disc_nodes >= + vport->phba->cfg_discovery_threads) { + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_NLP_MORE; + spin_unlock_irq(shost->host_lock); break; } } } if (sentplogi == 0) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_NLP_MORE; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NLP_MORE; + spin_unlock_irq(shost->host_lock); } return sentplogi; } -int -lpfc_els_flush_rscn(struct lpfc_hba * phba) +void +lpfc_els_flush_rscn(struct lpfc_vport *vport) { - struct lpfc_dmabuf *mp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; int i; - for (i = 0; i < phba->fc_rscn_id_cnt; i++) { - mp = phba->fc_rscn_id_list[i]; - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - phba->fc_rscn_id_list[i] = NULL; - } - phba->fc_rscn_id_cnt = 0; - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY); - spin_unlock_irq(phba->host->host_lock); - lpfc_can_disctmo(phba); - return 0; + for (i = 0; i < vport->fc_rscn_id_cnt; i++) { + lpfc_in_buf_free(phba, vport->fc_rscn_id_list[i]); + vport->fc_rscn_id_list[i] = NULL; + } + spin_lock_irq(shost->host_lock); + vport->fc_rscn_id_cnt = 0; + vport->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY); + spin_unlock_irq(shost->host_lock); + lpfc_can_disctmo(vport); } int -lpfc_rscn_payload_check(struct lpfc_hba * phba, uint32_t did) +lpfc_rscn_payload_check(struct lpfc_vport *vport, uint32_t did) { D_ID ns_did; D_ID rscn_did; - struct lpfc_dmabuf *mp; uint32_t *lp; - uint32_t payload_len, cmd, i, match; + uint32_t payload_len, i; + struct lpfc_hba *phba = vport->phba; ns_did.un.word = did; - match = 0; /* Never match fabric nodes for RSCNs */ if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) - return(0); + return 0; /* If we are doing a FULL RSCN rediscovery, match everything */ - if (phba->fc_flag & FC_RSCN_DISCOVERY) { + if (vport->fc_flag & FC_RSCN_DISCOVERY) return did; - } - for (i = 0; i < phba->fc_rscn_id_cnt; i++) { - mp = phba->fc_rscn_id_list[i]; - lp = (uint32_t *) mp->virt; - cmd = *lp++; - payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */ - payload_len -= sizeof (uint32_t); /* take off word 0 */ + for (i = 0; i < vport->fc_rscn_id_cnt; i++) { + lp = vport->fc_rscn_id_list[i]->virt; + payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK); + payload_len -= sizeof(uint32_t); /* take off word 0 */ while (payload_len) { - rscn_did.un.word = *lp++; - rscn_did.un.word = be32_to_cpu(rscn_did.un.word); - payload_len -= sizeof (uint32_t); + rscn_did.un.word = be32_to_cpu(*lp++); + payload_len -= sizeof(uint32_t); switch (rscn_did.un.b.resv) { case 0: /* Single N_Port ID effected */ - if (ns_did.un.word == rscn_did.un.word) { - match = did; - } + if (ns_did.un.word == rscn_did.un.word) + return did; break; case 1: /* Whole N_Port Area effected */ if ((ns_did.un.b.domain == rscn_did.un.b.domain) && (ns_did.un.b.area == rscn_did.un.b.area)) - { - match = did; - } + return did; break; case 2: /* Whole N_Port Domain effected */ if (ns_did.un.b.domain == rscn_did.un.b.domain) - { - match = did; - } - break; - case 3: /* Whole Fabric effected */ - match = did; + return did; break; default: - /* Unknown Identifier in RSCN list */ + /* Unknown Identifier in RSCN node */ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0217 Unknown Identifier in " - "RSCN payload Data: x%x\n", - phba->brd_no, rscn_did.un.word); - break; - } - if (match) { - break; + "%d (%d):0217 Unknown " + "Identifier in RSCN payload " + "Data: x%x\n", + phba->brd_no, vport->vpi, + rscn_did.un.word); + case 3: /* Whole Fabric effected */ + return did; } } } - return match; + return 0; } static int -lpfc_rscn_recovery_check(struct lpfc_hba *phba) +lpfc_rscn_recovery_check(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp = NULL; @@ -2459,12 +2775,12 @@ * them to NPR state. */ - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state == NLP_STE_UNUSED_NODE || - lpfc_rscn_payload_check(phba, ndlp->nlp_DID) == 0) + lpfc_rscn_payload_check(vport, ndlp->nlp_DID) == 0) continue; - lpfc_disc_state_machine(phba, ndlp, NULL, + lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); /* @@ -2472,175 +2788,248 @@ * recovery event. */ if (ndlp->nlp_flag & NLP_DELAY_TMO) - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); } return 0; } static int -lpfc_els_rcv_rscn(struct lpfc_hba * phba, - struct lpfc_iocbq * cmdiocb, - struct lpfc_nodelist * ndlp, uint8_t newnode) +lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp, uint8_t newnode) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; struct lpfc_dmabuf *pcmd; - uint32_t *lp; + struct lpfc_vport *next_vport; + uint32_t *lp, *datap; IOCB_t *icmd; - uint32_t payload_len, cmd; + uint32_t payload_len, length, nportid, *cmd; + int rscn_cnt = vport->fc_rscn_id_cnt; + int rscn_id = 0, hba_id = 0; int i; icmd = &cmdiocb->iocb; pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; lp = (uint32_t *) pcmd->virt; - cmd = *lp++; - payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */ - payload_len -= sizeof (uint32_t); /* take off word 0 */ - cmd &= ELS_CMD_MASK; + payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK); + payload_len -= sizeof(uint32_t); /* take off word 0 */ /* RSCN received */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0214 RSCN received Data: x%x x%x x%x x%x\n", - phba->brd_no, - phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt); + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0214 RSCN received Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, vport->fc_flag, payload_len, + *lp, rscn_cnt); for (i = 0; i < payload_len/sizeof(uint32_t); i++) - fc_host_post_event(phba->host, fc_get_event_number(), + fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_RSCN, lp[i]); /* If we are about to begin discovery, just ACC the RSCN. * Discovery processing will satisfy it. */ - if (phba->hba_state <= LPFC_NS_QRY) { - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, + if (vport->port_state <= LPFC_NS_QRY) { + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RSCN ignore: did:x%x/ste:x%x flg:x%x", + ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag); + + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode); return 0; } + /* If this RSCN just contains NPortIDs for other vports on this HBA, + * just ACC and ignore it. + */ + if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + !(phba->cfg_peer_port_login)) { + i = payload_len; + datap = lp; + while (i > 0) { + nportid = *datap++; + nportid = ((be32_to_cpu(nportid)) & Mask_DID); + i -= sizeof(uint32_t); + rscn_id++; + list_for_each_entry(next_vport, &phba->port_list, + listentry) { + if (nportid == next_vport->fc_myDID) { + hba_id++; + break; + } + } + } + if (rscn_id == hba_id) { + /* ALL NPortIDs in RSCN are on HBA */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0214 Ignore RSCN Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, vport->fc_flag, payload_len, + *lp, rscn_cnt); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RSCN vport: did:x%x/ste:x%x flg:x%x", + ndlp->nlp_DID, vport->port_state, + ndlp->nlp_flag); + + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, + ndlp, NULL, newnode); + return 0; + } + } + /* If we are already processing an RSCN, save the received * RSCN payload buffer, cmdiocb->context2 to process later. */ - if (phba->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) { - if ((phba->fc_rscn_id_cnt < FC_MAX_HOLD_RSCN) && - !(phba->fc_flag & FC_RSCN_DISCOVERY)) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_RSCN_MODE; - spin_unlock_irq(phba->host->host_lock); - phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd; - + if (vport->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) { + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RSCN defer: did:x%x/ste:x%x flg:x%x", + ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag); + + vport->fc_flag |= FC_RSCN_DEFERRED; + if ((rscn_cnt < FC_MAX_HOLD_RSCN) && + !(vport->fc_flag & FC_RSCN_DISCOVERY)) { + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_RSCN_MODE; + spin_unlock_irq(shost->host_lock); + if (rscn_cnt) { + cmd = vport->fc_rscn_id_list[rscn_cnt-1]->virt; + length = be32_to_cpu(*cmd & ~ELS_CMD_MASK); + } + if ((rscn_cnt) && + (payload_len + length <= LPFC_BPL_SIZE)) { + *cmd &= ELS_CMD_MASK; + *cmd |= be32_to_cpu(payload_len + length); + memcpy(((uint8_t *)cmd) + length, lp, + payload_len); + } else { + vport->fc_rscn_id_list[rscn_cnt] = pcmd; + vport->fc_rscn_id_cnt++; /* If we zero, cmdiocb->context2, the calling * routine will not try to free it. */ cmdiocb->context2 = NULL; + } /* Deferred RSCN */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0235 Deferred RSCN " + "%d (%d):0235 Deferred RSCN " "Data: x%x x%x x%x\n", - phba->brd_no, phba->fc_rscn_id_cnt, - phba->fc_flag, phba->hba_state); + phba->brd_no, vport->vpi, + vport->fc_rscn_id_cnt, vport->fc_flag, + vport->port_state); } else { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_RSCN_DISCOVERY; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_RSCN_DISCOVERY; + spin_unlock_irq(shost->host_lock); /* ReDiscovery RSCN */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0234 ReDiscovery RSCN " + "%d (%d):0234 ReDiscovery RSCN " "Data: x%x x%x x%x\n", - phba->brd_no, phba->fc_rscn_id_cnt, - phba->fc_flag, phba->hba_state); + phba->brd_no, vport->vpi, + vport->fc_rscn_id_cnt, vport->fc_flag, + vport->port_state); } /* Send back ACC */ - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode); /* send RECOVERY event for ALL nodes that match RSCN payload */ - lpfc_rscn_recovery_check(phba); + lpfc_rscn_recovery_check(vport); + vport->fc_flag &= ~FC_RSCN_DEFERRED; return 0; } - phba->fc_flag |= FC_RSCN_MODE; - phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RSCN: did:x%x/ste:x%x flg:x%x", + ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag); + + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_RSCN_MODE; + spin_unlock_irq(shost->host_lock); + vport->fc_rscn_id_list[vport->fc_rscn_id_cnt++] = pcmd; /* * If we zero, cmdiocb->context2, the calling routine will * not try to free it. */ cmdiocb->context2 = NULL; - lpfc_set_disctmo(phba); + lpfc_set_disctmo(vport); /* Send back ACC */ - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode); /* send RECOVERY event for ALL nodes that match RSCN payload */ - lpfc_rscn_recovery_check(phba); + lpfc_rscn_recovery_check(vport); - return lpfc_els_handle_rscn(phba); + return lpfc_els_handle_rscn(vport); } int -lpfc_els_handle_rscn(struct lpfc_hba * phba) +lpfc_els_handle_rscn(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp; + struct lpfc_hba *phba = vport->phba; + + /* Ignore RSCN if the port is being torn down. */ + if (vport->load_flag & FC_UNLOADING) { + lpfc_els_flush_rscn(vport); + return 0; + } /* Start timer for RSCN processing */ - lpfc_set_disctmo(phba); + lpfc_set_disctmo(vport); /* RSCN processed */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0215 RSCN processed Data: x%x x%x x%x x%x\n", - phba->brd_no, - phba->fc_flag, 0, phba->fc_rscn_id_cnt, - phba->hba_state); + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0215 RSCN processed Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, + vport->fc_flag, 0, vport->fc_rscn_id_cnt, + vport->port_state); /* To process RSCN, first compare RSCN data with NameServer */ - phba->fc_ns_retry = 0; - ndlp = lpfc_findnode_did(phba, NameServer_DID); + vport->fc_ns_retry = 0; + ndlp = lpfc_findnode_did(vport, NameServer_DID); if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { /* Good ndlp, issue CT Request to NameServer */ - if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == 0) { + if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0) == 0) /* Wait for NameServer query cmpl before we can continue */ return 1; - } } else { /* If login to NameServer does not exist, issue one */ /* Good status, issue PLOGI to NameServer */ - ndlp = lpfc_findnode_did(phba, NameServer_DID); - if (ndlp) { + ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (ndlp) /* Wait for NameServer login cmpl before we can continue */ return 1; - } + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); if (!ndlp) { - lpfc_els_flush_rscn(phba); + lpfc_els_flush_rscn(vport); return 0; } else { - lpfc_nlp_init(phba, ndlp, NameServer_DID); + lpfc_nlp_init(vport, ndlp, NameServer_DID); ndlp->nlp_type |= NLP_FABRIC; ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, NameServer_DID, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + lpfc_issue_els_plogi(vport, NameServer_DID, 0); /* Wait for NameServer login cmpl before we can continue */ return 1; } } - lpfc_els_flush_rscn(phba); + lpfc_els_flush_rscn(vport); return 0; } static int -lpfc_els_rcv_flogi(struct lpfc_hba * phba, - struct lpfc_iocbq * cmdiocb, - struct lpfc_nodelist * ndlp, uint8_t newnode) +lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp, uint8_t newnode) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; uint32_t *lp = (uint32_t *) pcmd->virt; IOCB_t *icmd = &cmdiocb->iocb; @@ -2655,7 +3044,7 @@ /* FLOGI received */ - lpfc_set_disctmo(phba); + lpfc_set_disctmo(vport); if (phba->fc_topology == TOPOLOGY_LOOP) { /* We should never receive a FLOGI in loop mode, ignore it */ @@ -2664,33 +3053,34 @@ /* An FLOGI ELS command was received from DID in Loop Mode */ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "%d:0113 An FLOGI ELS command x%x was received " - "from DID x%x in Loop Mode\n", - phba->brd_no, cmd, did); + "%d (%d):0113 An FLOGI ELS command x%x was " + "received from DID x%x in Loop Mode\n", + phba->brd_no, vport->vpi, cmd, did); return 1; } did = Fabric_DID; - if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3))) { + if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3))) { /* For a FLOGI we accept, then if our portname is greater * then the remote portname we initiate Nport login. */ - rc = memcmp(&phba->fc_portname, &sp->portName, - sizeof (struct lpfc_name)); + rc = memcmp(&vport->fc_portname, &sp->portName, + sizeof(struct lpfc_name)); if (!rc) { - if ((mbox = mempool_alloc(phba->mbox_mem_pool, - GFP_KERNEL)) == 0) { + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) return 1; - } + lpfc_linkdown(phba); lpfc_init_link(phba, mbox, phba->cfg_topology, phba->cfg_link_speed); mbox->mb.un.varInitLnk.lipsr_AL_PA = 0; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mbox->vport = vport; rc = lpfc_sli_issue_mbox (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); lpfc_set_loopback_flag(phba); @@ -2699,31 +3089,34 @@ } return 1; } else if (rc > 0) { /* greater than */ - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_PT2PT_PLOGI; - spin_unlock_irq(phba->host->host_lock); - } - phba->fc_flag |= FC_PT2PT; - phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_PT2PT_PLOGI; + spin_unlock_irq(shost->host_lock); + } + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_PT2PT; + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + spin_unlock_irq(shost->host_lock); } else { /* Reject this request because invalid parameters */ stat.un.b.lsRjtRsvd0 = 0; stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, + NULL); return 1; } /* Send back ACC */ - lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode); + lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode); return 0; } static int -lpfc_els_rcv_rnid(struct lpfc_hba * phba, - struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp) +lpfc_els_rcv_rnid(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp) { struct lpfc_dmabuf *pcmd; uint32_t *lp; @@ -2746,7 +3139,7 @@ case 0: case RNID_TOPOLOGY_DISC: /* Send back ACC */ - lpfc_els_rsp_rnid_acc(phba, rn->Format, cmdiocb, ndlp); + lpfc_els_rsp_rnid_acc(vport, rn->Format, cmdiocb, ndlp); break; default: /* Reject this request because format not supported */ @@ -2754,13 +3147,14 @@ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, + NULL); } return 0; } static int -lpfc_els_rcv_lirr(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, +lpfc_els_rcv_lirr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, struct lpfc_nodelist *ndlp) { struct ls_rjt stat; @@ -2770,15 +3164,15 @@ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); return 0; } static void lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli; - struct lpfc_sli_ring *pring; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; MAILBOX_t *mb; IOCB_t *icmd; RPS_RSP *rps_rsp; @@ -2788,8 +3182,6 @@ uint16_t xri, status; uint32_t cmdsize; - psli = &phba->sli; - pring = &psli->ring[LPFC_ELS_RING]; mb = &pmb->mb; ndlp = (struct lpfc_nodelist *) pmb->context2; @@ -2804,7 +3196,8 @@ cmdsize = sizeof(RPS_RSP) + sizeof(uint32_t); mempool_free(pmb, phba->mbox_mem_pool); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, lpfc_max_els_tries, ndlp, + elsiocb = lpfc_prep_els_iocb(phba->pport, 0, cmdsize, + lpfc_max_els_tries, ndlp, ndlp->nlp_DID, ELS_CMD_ACC); lpfc_nlp_put(ndlp); if (!elsiocb) @@ -2815,14 +3208,14 @@ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_ACC; - pcmd += sizeof (uint32_t); /* Skip past command */ + pcmd += sizeof(uint32_t); /* Skip past command */ rps_rsp = (RPS_RSP *)pcmd; if (phba->fc_topology != TOPOLOGY_LOOP) status = 0x10; else status = 0x8; - if (phba->fc_flag & FC_FABRIC) + if (phba->pport->fc_flag & FC_FABRIC) status |= 0x4; rps_rsp->rsvd1 = 0; @@ -2836,25 +3229,25 @@ /* Xmit ELS RPS ACC response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0118 Xmit ELS RPS ACC response tag x%x xri x%x, " - "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", - phba->brd_no, elsiocb->iotag, + "%d (%d):0118 Xmit ELS RPS ACC response tag x%x " + "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, " + "rpi x%x\n", + phba->brd_no, ndlp->vport->vpi, elsiocb->iotag, elsiocb->iocb.ulpContext, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; phba->fc_stat.elsXmitACC++; - - if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) lpfc_els_free_iocb(phba, elsiocb); - } return; } static int -lpfc_els_rcv_rps(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_nodelist * ndlp) +lpfc_els_rcv_rps(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp) { + struct lpfc_hba *phba = vport->phba; uint32_t *lp; uint8_t flag; LPFC_MBOXQ_t *mbox; @@ -2868,7 +3261,8 @@ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, + NULL); } pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; @@ -2878,19 +3272,24 @@ if ((flag == 0) || ((flag == 1) && (be32_to_cpu(rps->un.portNum) == 0)) || - ((flag == 2) && (memcmp(&rps->un.portName, &phba->fc_portname, - sizeof (struct lpfc_name)) == 0))) { - if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) { + ((flag == 2) && (memcmp(&rps->un.portName, &vport->fc_portname, + sizeof(struct lpfc_name)) == 0))) { + + printk("Fix me....\n"); + dump_stack(); + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC); + if (mbox) { lpfc_read_lnk_stat(phba, mbox); mbox->context1 = - (void *)((unsigned long)cmdiocb->iocb.ulpContext); + (void *)((unsigned long) cmdiocb->iocb.ulpContext); mbox->context2 = lpfc_nlp_get(ndlp); + mbox->vport = vport; mbox->mbox_cmpl = lpfc_els_rsp_rps_acc; if (lpfc_sli_issue_mbox (phba, mbox, - (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) { + (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) /* Mbox completion will send ELS Response */ return 0; - } + lpfc_nlp_put(ndlp); mempool_free(mbox, phba->mbox_mem_pool); } @@ -2899,27 +3298,25 @@ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); return 0; } static int -lpfc_els_rsp_rpl_acc(struct lpfc_hba * phba, uint16_t cmdsize, - struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp) +lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize, + struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp) { - IOCB_t *icmd; - IOCB_t *oldcmd; + struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd, *oldcmd; RPL_RSP rpl_rsp; struct lpfc_iocbq *elsiocb; - struct lpfc_sli_ring *pring; - struct lpfc_sli *psli; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; uint8_t *pcmd; - psli = &phba->sli; - pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ + elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, + ndlp->nlp_DID, ELS_CMD_ACC); - elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, - ndlp, ndlp->nlp_DID, ELS_CMD_ACC); if (!elsiocb) return 1; @@ -2929,7 +3326,7 @@ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt); *((uint32_t *) (pcmd)) = ELS_CMD_ACC; - pcmd += sizeof (uint16_t); + pcmd += sizeof(uint16_t); *((uint16_t *)(pcmd)) = be16_to_cpu(cmdsize); pcmd += sizeof(uint16_t); @@ -2937,8 +3334,8 @@ rpl_rsp.listLen = be32_to_cpu(1); rpl_rsp.index = 0; rpl_rsp.port_num_blk.portNum = 0; - rpl_rsp.port_num_blk.portID = be32_to_cpu(phba->fc_myDID); - memcpy(&rpl_rsp.port_num_blk.portName, &phba->fc_portname, + rpl_rsp.port_num_blk.portID = be32_to_cpu(vport->fc_myDID); + memcpy(&rpl_rsp.port_num_blk.portName, &vport->fc_portname, sizeof(struct lpfc_name)); memcpy(pcmd, &rpl_rsp, cmdsize - sizeof(uint32_t)); @@ -2946,13 +3343,14 @@ /* Xmit ELS RPL ACC response tag */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0120 Xmit ELS RPL ACC response tag x%x xri x%x, " - "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", - phba->brd_no, elsiocb->iotag, + "%d (%d):0120 Xmit ELS RPL ACC response tag x%x " + "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, " + "rpi x%x\n", + phba->brd_no, vport->vpi, elsiocb->iotag, elsiocb->iocb.ulpContext, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); - elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; phba->fc_stat.elsXmitACC++; if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { @@ -2963,8 +3361,8 @@ } static int -lpfc_els_rcv_rpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_nodelist * ndlp) +lpfc_els_rcv_rpl(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp) { struct lpfc_dmabuf *pcmd; uint32_t *lp; @@ -2979,7 +3377,8 @@ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, + NULL); } pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; @@ -2996,15 +3395,16 @@ } else { cmdsize = sizeof(uint32_t) + maxsize * sizeof(uint32_t); } - lpfc_els_rsp_rpl_acc(phba, cmdsize, cmdiocb, ndlp); + lpfc_els_rsp_rpl_acc(vport, cmdsize, cmdiocb, ndlp); return 0; } static int -lpfc_els_rcv_farp(struct lpfc_hba * phba, - struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp) +lpfc_els_rcv_farp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp) { + struct lpfc_hba *phba = vport->phba; struct lpfc_dmabuf *pcmd; uint32_t *lp; IOCB_t *icmd; @@ -3020,11 +3420,9 @@ fp = (FARP *) lp; /* FARP-REQ received from DID */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_ELS, - "%d:0601 FARP-REQ received from DID x%x\n", - phba->brd_no, did); + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0601 FARP-REQ received from DID x%x\n", + phba->brd_no, vport->vpi, did); /* We will only support match on WWPN or WWNN */ if (fp->Mflags & ~(FARP_MATCH_NODE | FARP_MATCH_PORT)) { @@ -3034,15 +3432,15 @@ cnt = 0; /* If this FARP command is searching for my portname */ if (fp->Mflags & FARP_MATCH_PORT) { - if (memcmp(&fp->RportName, &phba->fc_portname, - sizeof (struct lpfc_name)) == 0) + if (memcmp(&fp->RportName, &vport->fc_portname, + sizeof(struct lpfc_name)) == 0) cnt = 1; } /* If this FARP command is searching for my nodename */ if (fp->Mflags & FARP_MATCH_NODE) { - if (memcmp(&fp->RnodeName, &phba->fc_nodename, - sizeof (struct lpfc_name)) == 0) + if (memcmp(&fp->RnodeName, &vport->fc_nodename, + sizeof(struct lpfc_name)) == 0) cnt = 1; } @@ -3052,28 +3450,28 @@ /* Log back into the node before sending the FARP. */ if (fp->Rflags & FARP_REQUEST_PLOGI) { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); } /* Send a FARP response to that node */ - if (fp->Rflags & FARP_REQUEST_FARPR) { - lpfc_issue_els_farpr(phba, did, 0); - } + if (fp->Rflags & FARP_REQUEST_FARPR) + lpfc_issue_els_farpr(vport, did, 0); } } return 0; } static int -lpfc_els_rcv_farpr(struct lpfc_hba * phba, - struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp) +lpfc_els_rcv_farpr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp) { struct lpfc_dmabuf *pcmd; uint32_t *lp; IOCB_t *icmd; uint32_t cmd, did; + struct lpfc_hba *phba = vport->phba; icmd = &cmdiocb->iocb; did = icmd->un.elsreq64.remoteID; @@ -3082,21 +3480,18 @@ cmd = *lp++; /* FARP-RSP received from DID */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_ELS, - "%d:0600 FARP-RSP received from DID x%x\n", - phba->brd_no, did); - + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0600 FARP-RSP received from DID x%x\n", + phba->brd_no, vport->vpi, did); /* ACCEPT the Farp resp request */ - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); return 0; } static int -lpfc_els_rcv_fan(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_nodelist * fan_ndlp) +lpfc_els_rcv_fan(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *fan_ndlp) { struct lpfc_dmabuf *pcmd; uint32_t *lp; @@ -3104,10 +3499,12 @@ uint32_t cmd, did; FAN *fp; struct lpfc_nodelist *ndlp, *next_ndlp; + struct lpfc_hba *phba = vport->phba; /* FAN received */ - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "%d:0265 FAN received\n", - phba->brd_no); + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0265 FAN received\n", + phba->brd_no, vport->vpi); icmd = &cmdiocb->iocb; did = icmd->un.elsreq64.remoteID; @@ -3115,11 +3512,11 @@ lp = (uint32_t *)pcmd->virt; cmd = *lp++; - fp = (FAN *)lp; + fp = (FAN *) lp; /* FAN received; Fan does not have a reply sequence */ - if (phba->hba_state == LPFC_LOCAL_CFG_LINK) { + if (phba->pport->port_state == LPFC_LOCAL_CFG_LINK) { if ((memcmp(&phba->fc_fabparam.nodeName, &fp->FnodeName, sizeof(struct lpfc_name)) != 0) || (memcmp(&phba->fc_fabparam.portName, &fp->FportName, @@ -3130,7 +3527,7 @@ */ list_for_each_entry_safe(ndlp, next_ndlp, - &phba->fc_nodes, nlp_listp) { + &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state != NLP_STE_NPR_NODE) continue; if (ndlp->nlp_type & NLP_FABRIC) { @@ -3138,24 +3535,24 @@ * Clean up old Fabric, Nameserver and * other NLP_FABRIC logins */ - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) { /* Fail outstanding I/O now since this * device is marked for PLOGI */ - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); } } - phba->hba_state = LPFC_FLOGI; - lpfc_set_disctmo(phba); - lpfc_initial_flogi(phba); + vport->port_state = LPFC_FLOGI; + lpfc_set_disctmo(vport); + lpfc_initial_flogi(vport); return 0; } /* Discovery not needed, * move the nodes to their original state. */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state != NLP_STE_NPR_NODE) continue; @@ -3163,13 +3560,13 @@ switch (ndlp->nlp_prev_state) { case NLP_STE_UNMAPPED_NODE: ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); break; case NLP_STE_MAPPED_NODE: ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, + lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE); break; @@ -3179,7 +3576,7 @@ } /* Start discovery - this should just do CLEAR_LA */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); } return 0; } @@ -3187,42 +3584,42 @@ void lpfc_els_timeout(unsigned long ptr) { - struct lpfc_hba *phba; + struct lpfc_vport *vport = (struct lpfc_vport *) ptr; + struct lpfc_hba *phba = vport->phba; unsigned long iflag; - phba = (struct lpfc_hba *)ptr; - if (phba == 0) - return; - spin_lock_irqsave(phba->host->host_lock, iflag); - if (!(phba->work_hba_events & WORKER_ELS_TMO)) { - phba->work_hba_events |= WORKER_ELS_TMO; + spin_lock_irqsave(&vport->work_port_lock, iflag); + if ((vport->work_port_events & WORKER_ELS_TMO) == 0) { + vport->work_port_events |= WORKER_ELS_TMO; + spin_unlock_irqrestore(&vport->work_port_lock, iflag); + + spin_lock_irqsave(&phba->hbalock, iflag); if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, iflag); } - spin_unlock_irqrestore(phba->host->host_lock, iflag); + else + spin_unlock_irqrestore(&vport->work_port_lock, iflag); return; } void -lpfc_els_timeout_handler(struct lpfc_hba *phba) +lpfc_els_timeout_handler(struct lpfc_vport *vport) { + struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ring *pring; struct lpfc_iocbq *tmp_iocb, *piocb; IOCB_t *cmd = NULL; struct lpfc_dmabuf *pcmd; - uint32_t *elscmd; - uint32_t els_command=0; + uint32_t els_command = 0; uint32_t timeout; - uint32_t remote_ID; + uint32_t remote_ID = 0xffffffff; - if (phba == 0) - return; - spin_lock_irq(phba->host->host_lock); /* If the timer is already canceled do nothing */ - if (!(phba->work_hba_events & WORKER_ELS_TMO)) { - spin_unlock_irq(phba->host->host_lock); + if ((vport->work_port_events & WORKER_ELS_TMO) == 0) { return; } + spin_lock_irq(&phba->hbalock); timeout = (uint32_t)(phba->fc_ratov << 1); pring = &phba->sli.ring[LPFC_ELS_RING]; @@ -3230,63 +3627,70 @@ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { cmd = &piocb->iocb; - if ((piocb->iocb_flag & LPFC_IO_LIBDFC) || - (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN) || - (piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)) { + if ((piocb->iocb_flag & LPFC_IO_LIBDFC) != 0 || + piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN || + piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN) continue; - } + + if (piocb->vport != vport) + continue; + pcmd = (struct lpfc_dmabuf *) piocb->context2; - if (pcmd) { - elscmd = (uint32_t *) (pcmd->virt); - els_command = *elscmd; - } + if (pcmd) + els_command = *(uint32_t *) (pcmd->virt); - if ((els_command == ELS_CMD_FARP) - || (els_command == ELS_CMD_FARPR)) { + if (els_command == ELS_CMD_FARP || + els_command == ELS_CMD_FARPR || + els_command == ELS_CMD_FDISC) + continue; + + if (vport != piocb->vport) continue; - } if (piocb->drvrTimeout > 0) { - if (piocb->drvrTimeout >= timeout) { + if (piocb->drvrTimeout >= timeout) piocb->drvrTimeout -= timeout; - } else { + else piocb->drvrTimeout = 0; - } continue; } - if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) { + remote_ID = 0xffffffff; + if (cmd->ulpCommand != CMD_GEN_REQUEST64_CR) + remote_ID = cmd->un.elsreq64.remoteID; + else { struct lpfc_nodelist *ndlp; - ndlp = __lpfc_findnode_rpi(phba, cmd->ulpContext); + ndlp = __lpfc_findnode_rpi(vport, cmd->ulpContext); + if (ndlp) remote_ID = ndlp->nlp_DID; - } else { - remote_ID = cmd->un.elsreq64.remoteID; } - lpfc_printf_log(phba, - KERN_ERR, - LOG_ELS, - "%d:0127 ELS timeout Data: x%x x%x x%x x%x\n", - phba->brd_no, els_command, + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0127 ELS timeout Data: x%x x%x x%x " + "x%x\n", + phba->brd_no, vport->vpi, els_command, remote_ID, cmd->ulpCommand, cmd->ulpIoTag); lpfc_sli_issue_abort_iotag(phba, pring, piocb); } - if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt) - mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout); + spin_unlock_irq(&phba->hbalock); - spin_unlock_irq(phba->host->host_lock); + if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt) + mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout); } void -lpfc_els_flush_cmd(struct lpfc_hba *phba) +lpfc_els_flush_cmd(struct lpfc_vport *vport) { LIST_HEAD(completions); + struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; struct lpfc_iocbq *tmp_iocb, *piocb; IOCB_t *cmd = NULL; - spin_lock_irq(phba->host->host_lock); + lpfc_fabric_abort_vport(vport); + + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) { cmd = &piocb->iocb; @@ -3301,271 +3705,1042 @@ cmd->ulpCommand == CMD_ABORT_XRI_CN) continue; + if (piocb->vport != vport) + continue; + list_move_tail(&piocb->list, &completions); pring->txq_cnt--; - } list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { - cmd = &piocb->iocb; - if (piocb->iocb_flag & LPFC_IO_LIBDFC) { continue; } + if (piocb->vport != vport) + continue; + lpfc_sli_issue_abort_iotag(phba, pring, piocb); } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); - while(!list_empty(&completions)) { + while (!list_empty(&completions)) { piocb = list_get_first(&completions, struct lpfc_iocbq, list); cmd = &piocb->iocb; - list_del(&piocb->list); + list_del_init(&piocb->list); - if (piocb->iocb_cmpl) { + if (!piocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, piocb); + else { cmd->ulpStatus = IOSTAT_LOCAL_REJECT; cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; (piocb->iocb_cmpl) (phba, piocb, piocb); - } else - lpfc_sli_release_iocbq(phba, piocb); + } } return; } -void -lpfc_els_unsol_event(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, struct lpfc_iocbq * elsiocb) +static void +lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_vport *vport, struct lpfc_iocbq *elsiocb) { - struct lpfc_sli *psli; struct lpfc_nodelist *ndlp; - struct lpfc_dmabuf *mp; - uint32_t *lp; - IOCB_t *icmd; struct ls_rjt stat; - uint32_t cmd; - uint32_t did; - uint32_t newnode; - uint32_t drop_cmd = 0; /* by default do NOT drop received cmd */ - uint32_t rjt_err = 0; - - psli = &phba->sli; - icmd = &elsiocb->iocb; - - if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) && - ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) { - /* Not enough posted buffers; Try posting more buffers */ - phba->fc_stat.NoRcvBuf++; - lpfc_post_buffer(phba, pring, 0, 1); - return; - } - - /* If there are no BDEs associated with this IOCB, - * there is nothing to do. - */ - if (icmd->ulpBdeCount == 0) - return; + uint32_t *payload; + uint32_t cmd, did, newnode, rjt_err = 0; + IOCB_t *icmd = &elsiocb->iocb; - /* type of ELS cmd is first 32bit word in packet */ - mp = lpfc_sli_ringpostbuf_get(phba, pring, getPaddr(icmd->un. - cont64[0]. - addrHigh, - icmd->un. - cont64[0].addrLow)); - if (mp == 0) { - drop_cmd = 1; + if (vport == NULL || elsiocb->context2 == NULL) goto dropit; - } newnode = 0; - lp = (uint32_t *) mp->virt; - cmd = *lp++; - lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], 1, 1); + payload = ((struct lpfc_dmabuf *)elsiocb->context2)->virt; + cmd = *payload; + if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) == 0) + lpfc_post_buffer(phba, pring, 1, 1); + did = icmd->un.rcvels.remoteID; if (icmd->ulpStatus) { - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - drop_cmd = 1; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV Unsol ELS: status:x%x/x%x did:x%x", + icmd->ulpStatus, icmd->un.ulpWord[4], did); goto dropit; } /* Check to see if link went down during discovery */ - if (lpfc_els_chk_latt(phba)) { - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - drop_cmd = 1; + if (lpfc_els_chk_latt(vport)) goto dropit; - } - did = icmd->un.rcvels.remoteID; - ndlp = lpfc_findnode_did(phba, did); + /* Ignore traffic recevied during vport shutdown. */ + if (vport->load_flag & FC_UNLOADING) + goto dropit; + + ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); - if (!ndlp) { - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - drop_cmd = 1; + if (!ndlp) goto dropit; - } - lpfc_nlp_init(phba, ndlp, did); + lpfc_nlp_init(vport, ndlp, did); newnode = 1; if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) { ndlp->nlp_type |= NLP_FABRIC; } - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); } phba->fc_stat.elsRcvFrame++; if (elsiocb->context1) lpfc_nlp_put(elsiocb->context1); elsiocb->context1 = lpfc_nlp_get(ndlp); - elsiocb->context2 = mp; + elsiocb->vport = vport; if ((cmd & ELS_CMD_MASK) == ELS_CMD_RSCN) { cmd &= ELS_CMD_MASK; } /* ELS command received from NPORT */ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "%d:0112 ELS command x%x received from NPORT x%x " - "Data: x%x\n", phba->brd_no, cmd, did, phba->hba_state); + "%d (%d):0112 ELS command x%x received from NPORT x%x " + "Data: x%x\n", phba->brd_no, vport->vpi, cmd, did, + vport->port_state); switch (cmd) { case ELS_CMD_PLOGI: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV PLOGI: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvPLOGI++; - if (phba->hba_state < LPFC_DISC_AUTH) { - rjt_err = 1; + ndlp = lpfc_plogi_confirm_nport(phba, payload, ndlp); + + if (vport->port_state < LPFC_DISC_AUTH) { + rjt_err = LSRJT_UNABLE_TPC; break; } - ndlp = lpfc_plogi_confirm_nport(phba, mp, ndlp); - lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PLOGI); + lpfc_disc_state_machine(vport, ndlp, elsiocb, + NLP_EVT_RCV_PLOGI); + break; case ELS_CMD_FLOGI: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV FLOGI: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvFLOGI++; - lpfc_els_rcv_flogi(phba, elsiocb, ndlp, newnode); + lpfc_els_rcv_flogi(vport, elsiocb, ndlp, newnode); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; case ELS_CMD_LOGO: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV LOGO: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvLOGO++; - if (phba->hba_state < LPFC_DISC_AUTH) { - rjt_err = 1; + if (vport->port_state < LPFC_DISC_AUTH) { + rjt_err = LSRJT_UNABLE_TPC; break; } - lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_LOGO); + lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); break; case ELS_CMD_PRLO: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV PRLO: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvPRLO++; - if (phba->hba_state < LPFC_DISC_AUTH) { - rjt_err = 1; + if (vport->port_state < LPFC_DISC_AUTH) { + rjt_err = LSRJT_UNABLE_TPC; break; } - lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLO); + lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); break; case ELS_CMD_RSCN: phba->fc_stat.elsRcvRSCN++; - lpfc_els_rcv_rscn(phba, elsiocb, ndlp, newnode); + lpfc_els_rcv_rscn(vport, elsiocb, ndlp, newnode); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; case ELS_CMD_ADISC: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV ADISC: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvADISC++; - if (phba->hba_state < LPFC_DISC_AUTH) { - rjt_err = 1; + if (vport->port_state < LPFC_DISC_AUTH) { + rjt_err = LSRJT_UNABLE_TPC; break; } - lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_ADISC); + lpfc_disc_state_machine(vport, ndlp, elsiocb, + NLP_EVT_RCV_ADISC); break; case ELS_CMD_PDISC: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV PDISC: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvPDISC++; - if (phba->hba_state < LPFC_DISC_AUTH) { - rjt_err = 1; + if (vport->port_state < LPFC_DISC_AUTH) { + rjt_err = LSRJT_UNABLE_TPC; break; } - lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PDISC); + lpfc_disc_state_machine(vport, ndlp, elsiocb, + NLP_EVT_RCV_PDISC); break; case ELS_CMD_FARPR: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV FARPR: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvFARPR++; - lpfc_els_rcv_farpr(phba, elsiocb, ndlp); + lpfc_els_rcv_farpr(vport, elsiocb, ndlp); break; case ELS_CMD_FARP: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV FARP: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvFARP++; - lpfc_els_rcv_farp(phba, elsiocb, ndlp); + lpfc_els_rcv_farp(vport, elsiocb, ndlp); break; case ELS_CMD_FAN: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV FAN: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvFAN++; - lpfc_els_rcv_fan(phba, elsiocb, ndlp); + lpfc_els_rcv_fan(vport, elsiocb, ndlp); break; case ELS_CMD_PRLI: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV PRLI: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvPRLI++; - if (phba->hba_state < LPFC_DISC_AUTH) { - rjt_err = 1; + if (vport->port_state < LPFC_DISC_AUTH) { + rjt_err = LSRJT_UNABLE_TPC; break; } - lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLI); + lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); break; case ELS_CMD_LIRR: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV LIRR: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvLIRR++; - lpfc_els_rcv_lirr(phba, elsiocb, ndlp); + lpfc_els_rcv_lirr(vport, elsiocb, ndlp); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; case ELS_CMD_RPS: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RPS: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvRPS++; - lpfc_els_rcv_rps(phba, elsiocb, ndlp); + lpfc_els_rcv_rps(vport, elsiocb, ndlp); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; case ELS_CMD_RPL: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RPL: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvRPL++; - lpfc_els_rcv_rpl(phba, elsiocb, ndlp); + lpfc_els_rcv_rpl(vport, elsiocb, ndlp); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; case ELS_CMD_RNID: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV RNID: did:x%x/ste:x%x flg:x%x", + did, vport->port_state, ndlp->nlp_flag); + phba->fc_stat.elsRcvRNID++; - lpfc_els_rcv_rnid(phba, elsiocb, ndlp); + lpfc_els_rcv_rnid(vport, elsiocb, ndlp); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; default: + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, + "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", + cmd, did, vport->port_state); + /* Unsupported ELS command, reject */ - rjt_err = 1; + rjt_err = LSRJT_INVALID_CMD; /* Unknown ELS command received from NPORT */ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "%d:0115 Unknown ELS command x%x received from " - "NPORT x%x\n", phba->brd_no, cmd, did); + "%d (%d):0115 Unknown ELS command x%x " + "received from NPORT x%x\n", + phba->brd_no, vport->vpi, cmd, did); if (newnode) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); break; } /* check if need to LS_RJT received ELS cmd */ if (rjt_err) { - stat.un.b.lsRjtRsvd0 = 0; - stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + memset(&stat, 0, sizeof(stat)); + stat.un.b.lsRjtRsnCode = rjt_err; stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; - stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, elsiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, + NULL); + if (newnode) + lpfc_drop_node(vport, ndlp); } - lpfc_nlp_put(elsiocb->context1); - elsiocb->context1 = NULL; - if (elsiocb->context2) { - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - } + return; + dropit: - /* check if need to drop received ELS cmd */ - if (drop_cmd == 1) { lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "%d:0111 Dropping received ELS cmd " - "Data: x%x x%x x%x\n", phba->brd_no, + "%d (%d):0111 Dropping received ELS cmd " + "Data: x%x x%x x%x\n", + phba->brd_no, vport ? vport->vpi : 0xffff, icmd->ulpStatus, icmd->un.ulpWord[4], icmd->ulpTimeout); phba->fc_stat.elsRcvDrop++; +} + +static struct lpfc_vport * +lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi) +{ + struct lpfc_vport *vport; + + list_for_each_entry(vport, &phba->port_list, listentry) { + if (vport->vpi == vpi) + return vport; + } + return NULL; +} + +void +lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *elsiocb) +{ + struct lpfc_vport *vport = phba->pport; + IOCB_t *icmd = &elsiocb->iocb; + dma_addr_t paddr; + struct lpfc_dmabuf *bdeBuf1 = elsiocb->context2; + struct lpfc_dmabuf *bdeBuf2 = elsiocb->context3; + + elsiocb->context2 = NULL; + elsiocb->context3 = NULL; + + if (icmd->ulpStatus == IOSTAT_NEED_BUFFER) { + lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ); + } else if (icmd->ulpStatus == IOSTAT_LOCAL_REJECT && + (icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING) { + phba->fc_stat.NoRcvBuf++; + /* Not enough posted buffers; Try posting more buffers */ + if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)) + lpfc_post_buffer(phba, pring, 0, 1); + return; + } + + if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + (icmd->ulpCommand == CMD_IOCB_RCV_ELS64_CX || + icmd->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) { + if (icmd->unsli3.rcvsli3.vpi == 0xffff) + vport = phba->pport; + else { + uint16_t vpi = icmd->unsli3.rcvsli3.vpi; + vport = lpfc_find_vport_by_vpid(phba, vpi); + } + } + /* If there are no BDEs associated + * with this IOCB, there is nothing to do. + */ + if (icmd->ulpBdeCount == 0) + return; + + /* type of ELS cmd is first 32bit word + * in packet + */ + if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { + elsiocb->context2 = bdeBuf1; + } else { + paddr = getPaddr(icmd->un.cont64[0].addrHigh, + icmd->un.cont64[0].addrLow); + elsiocb->context2 = lpfc_sli_ringpostbuf_get(phba, pring, + paddr); + } + + lpfc_els_unsol_buffer(phba, pring, vport, elsiocb); + /* + * The different unsolicited event handlers would tell us + * if they are done with "mp" by setting context2 to NULL. + */ + lpfc_nlp_put(elsiocb->context1); + elsiocb->context1 = NULL; + if (elsiocb->context2) { + lpfc_in_buf_free(phba, (struct lpfc_dmabuf *)elsiocb->context2); + elsiocb->context2 = NULL; + } + + /* RCV_ELS64_CX provide for 2 BDEs - process 2nd if included */ + if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) && + icmd->ulpBdeCount == 2) { + elsiocb->context2 = bdeBuf2; + lpfc_els_unsol_buffer(phba, pring, vport, elsiocb); + /* free mp if we are done with it */ + if (elsiocb->context2) { + lpfc_in_buf_free(phba, elsiocb->context2); + elsiocb->context2 = NULL; + } + } +} + +void +lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) +{ + struct lpfc_nodelist *ndlp, *ndlp_fdmi; + + ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (!ndlp) { + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + if (phba->fc_topology == TOPOLOGY_LOOP) { + lpfc_disc_start(vport); + return; + } + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0251 NameServer login: no memory\n", + phba->brd_no, vport->vpi); + return; + } + lpfc_nlp_init(vport, ndlp, NameServer_DID); + ndlp->nlp_type |= NLP_FABRIC; + } + + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + + if (lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0)) { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0252 Cannot issue NameServer login\n", + phba->brd_no, vport->vpi); + return; + } + + if (phba->cfg_fdmi_on) { + ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool, + GFP_KERNEL); + if (ndlp_fdmi) { + lpfc_nlp_init(vport, ndlp_fdmi, FDMI_DID); + ndlp_fdmi->nlp_type |= NLP_FABRIC; + ndlp_fdmi->nlp_state = + NLP_STE_PLOGI_ISSUE; + lpfc_issue_els_plogi(vport, ndlp_fdmi->nlp_DID, + 0); + } + } + return; +} + +static void +lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) +{ + struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + MAILBOX_t *mb = &pmb->mb; + + vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI; + lpfc_nlp_put(ndlp); + + if (mb->mbxStatus) { + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "%d (%d):0915 Register VPI failed: 0x%x\n", + phba->brd_no, vport->vpi, mb->mbxStatus); + + switch (mb->mbxStatus) { + case 0x11: /* unsupported feature */ + case 0x9603: /* max_vpi exceeded */ + /* giving up on vport registration */ + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + spin_unlock_irq(shost->host_lock); + lpfc_can_disctmo(vport); + break; + default: + /* Try to recover from this error */ + lpfc_mbx_unreg_vpi(vport); + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + lpfc_initial_fdisc(vport); + break; + } + + } else { + if (vport == phba->pport) + lpfc_issue_fabric_reglogin(vport); + else + lpfc_do_scr_ns_plogi(phba, vport); } + mempool_free(pmb, phba->mbox_mem_pool); return; } + +void +lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp) +{ + LPFC_MBOXQ_t *mbox; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mbox) { + lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, mbox); + mbox->vport = vport; + mbox->context2 = lpfc_nlp_get(ndlp); + mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport; + if (lpfc_sli_issue_mbox(phba, mbox, + MBX_NOWAIT | MBX_STOP_IOCB) + == MBX_NOT_FINISHED) { + mempool_free(mbox, phba->mbox_mem_pool); + vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI; + + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "%d (%d):0253 Register VPI: Cannot send mbox\n", + phba->brd_no, vport->vpi); + } + } else { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "%d (%d):0254 Register VPI: no memory\n", + phba->brd_no, vport->vpi); + + vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI; + lpfc_nlp_put(ndlp); + } +} + +static void +lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + struct lpfc_nodelist *np; + struct lpfc_nodelist *next_np; + IOCB_t *irsp = &rspiocb->iocb; + struct lpfc_iocbq *piocb; + + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0123 FDISC completes. x%x/x%x prevDID: x%x\n", + phba->brd_no, vport->vpi, + irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID); + + /* Since all FDISCs are being single threaded, we + * must reset the discovery timer for ALL vports + * waiting to send FDISC when one completes. + */ + list_for_each_entry(piocb, &phba->fabric_iocb_list, list) { + lpfc_set_disctmo(piocb->vport); + } + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "FDISC cmpl: status:x%x/x%x prevdid:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID); + + if (irsp->ulpStatus) { + /* Check for retry */ + if (lpfc_els_retry(phba, cmdiocb, rspiocb)) + goto out; + + /* FDISC failed */ + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0124 FDISC failed. (%d/%d)\n", + phba->brd_no, vport->vpi, + irsp->ulpStatus, irsp->un.ulpWord[4]); + + if (vport->fc_vport->vport_state == FC_VPORT_INITIALIZING) + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + + lpfc_nlp_put(ndlp); + /* giving up on FDISC. Cancel discovery timer */ + lpfc_can_disctmo(vport); + } else { + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_FABRIC; + if (vport->phba->fc_topology == TOPOLOGY_LOOP) + vport->fc_flag |= FC_PUBLIC_LOOP; + spin_unlock_irq(shost->host_lock); + + vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID; + lpfc_vport_set_state(vport, FC_VPORT_ACTIVE); + if ((vport->fc_prevDID != vport->fc_myDID) && + !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) { + /* If our NportID changed, we need to ensure all + * remaining NPORTs get unreg_login'ed so we can + * issue unreg_vpi. + */ + list_for_each_entry_safe(np, next_np, + &vport->fc_nodes, nlp_listp) { + if (np->nlp_state != NLP_STE_NPR_NODE + || !(np->nlp_flag & NLP_NPR_ADISC)) + continue; + spin_lock_irq(shost->host_lock); + np->nlp_flag &= ~NLP_NPR_ADISC; + spin_unlock_irq(shost->host_lock); + lpfc_unreg_rpi(vport, np); + } + lpfc_mbx_unreg_vpi(vport); + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + } + + if (vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) + lpfc_register_new_vport(phba, vport, ndlp); + else + lpfc_do_scr_ns_plogi(phba, vport); + + lpfc_nlp_put(ndlp); /* Free Fabric ndlp for vports */ + } + +out: + lpfc_els_free_iocb(phba, cmdiocb); +} + +int +lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint8_t retry) +{ + struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; + struct serv_parm *sp; + uint8_t *pcmd; + uint16_t cmdsize; + int did = ndlp->nlp_DID; + int rc; + + cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, did, + ELS_CMD_FDISC); + if (!elsiocb) { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0255 Issue FDISC: no IOCB\n", + phba->brd_no, vport->vpi); + return 1; + } + + icmd = &elsiocb->iocb; + icmd->un.elsreq64.myID = 0; + icmd->un.elsreq64.fl = 1; + + /* For FDISC, Let FDISC rsp set the NPortID for this VPI */ + icmd->ulpCt_h = 1; + icmd->ulpCt_l = 0; + + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + *((uint32_t *) (pcmd)) = ELS_CMD_FDISC; + pcmd += sizeof(uint32_t); /* CSP Word 1 */ + memcpy(pcmd, &vport->phba->pport->fc_sparam, sizeof(struct serv_parm)); + sp = (struct serv_parm *) pcmd; + /* Setup CSPs accordingly for Fabric */ + sp->cmn.e_d_tov = 0; + sp->cmn.w2.r_a_tov = 0; + sp->cls1.classValid = 0; + sp->cls2.seqDelivery = 1; + sp->cls3.seqDelivery = 1; + + pcmd += sizeof(uint32_t); /* CSP Word 2 */ + pcmd += sizeof(uint32_t); /* CSP Word 3 */ + pcmd += sizeof(uint32_t); /* CSP Word 4 */ + pcmd += sizeof(uint32_t); /* Port Name */ + memcpy(pcmd, &vport->fc_portname, 8); + pcmd += sizeof(uint32_t); /* Node Name */ + pcmd += sizeof(uint32_t); /* Node Name */ + memcpy(pcmd, &vport->fc_nodename, 8); + + lpfc_set_disctmo(vport); + + phba->fc_stat.elsXmitFDISC++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_fdisc; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue FDISC: did:x%x", + did, 0, 0); + + rc = lpfc_issue_fabric_iocb(phba, elsiocb); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0256 Issue FDISC: Cannot send IOCB\n", + phba->brd_no, vport->vpi); + + return 1; + } + lpfc_vport_set_state(vport, FC_VPORT_INITIALIZING); + vport->port_state = LPFC_FDISC; + return 0; +} + +static void +lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + struct lpfc_vport *vport = cmdiocb->vport; + IOCB_t *irsp; + + irsp = &rspiocb->iocb; + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "LOGO npiv cmpl: status:x%x/x%x did:x%x", + irsp->ulpStatus, irsp->un.ulpWord[4], irsp->un.rcvels.remoteID); + + lpfc_els_free_iocb(phba, cmdiocb); + vport->unreg_vpi_cmpl = VPORT_ERROR; +} + +int +lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; + uint8_t *pcmd; + uint16_t cmdsize; + + cmdsize = 2 * sizeof(uint32_t) + sizeof(struct lpfc_name); + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, 0, ndlp, ndlp->nlp_DID, + ELS_CMD_LOGO); + if (!elsiocb) + return 1; + + icmd = &elsiocb->iocb; + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + *((uint32_t *) (pcmd)) = ELS_CMD_LOGO; + pcmd += sizeof(uint32_t); + + /* Fill in LOGO payload */ + *((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID); + pcmd += sizeof(uint32_t); + memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name)); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue LOGO npiv did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); + + elsiocb->iocb_cmpl = lpfc_cmpl_els_npiv_logo; + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_LOGO_SND; + spin_unlock_irq(shost->host_lock); + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_LOGO_SND; + spin_unlock_irq(shost->host_lock); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } + return 0; +} + +void +lpfc_fabric_block_timeout(unsigned long ptr) +{ + struct lpfc_hba *phba = (struct lpfc_hba *) ptr; + unsigned long iflags; + uint32_t tmo_posted; + spin_lock_irqsave(&phba->pport->work_port_lock, iflags); + tmo_posted = phba->pport->work_port_events & WORKER_FABRIC_BLOCK_TMO; + if (!tmo_posted) + phba->pport->work_port_events |= WORKER_FABRIC_BLOCK_TMO; + spin_unlock_irqrestore(&phba->pport->work_port_lock, iflags); + + if (!tmo_posted) { + spin_lock_irqsave(&phba->hbalock, iflags); + if (phba->work_wait) + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, iflags); + } +} + +static void +lpfc_resume_fabric_iocbs(struct lpfc_hba *phba) +{ + struct lpfc_iocbq *iocb; + unsigned long iflags; + int ret; + struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + IOCB_t *cmd; + +repeat: + iocb = NULL; + spin_lock_irqsave(&phba->hbalock, iflags); + /* Post any pending iocb to the SLI layer */ + if (atomic_read(&phba->fabric_iocb_count) == 0) { + list_remove_head(&phba->fabric_iocb_list, iocb, typeof(*iocb), + list); + if (iocb) + atomic_inc(&phba->fabric_iocb_count); + } + spin_unlock_irqrestore(&phba->hbalock, iflags); + if (iocb) { + iocb->fabric_iocb_cmpl = iocb->iocb_cmpl; + iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb; + iocb->iocb_flag |= LPFC_IO_FABRIC; + + lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD, + "Fabric sched1: ste:x%x", + iocb->vport->port_state, 0, 0); + + ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0); + + if (ret == IOCB_ERROR) { + iocb->iocb_cmpl = iocb->fabric_iocb_cmpl; + iocb->fabric_iocb_cmpl = NULL; + iocb->iocb_flag &= ~LPFC_IO_FABRIC; + cmd = &iocb->iocb; + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + iocb->iocb_cmpl(phba, iocb, iocb); + + atomic_dec(&phba->fabric_iocb_count); + goto repeat; + } + } + + return; +} + +void +lpfc_unblock_fabric_iocbs(struct lpfc_hba *phba) +{ + clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); + + lpfc_resume_fabric_iocbs(phba); + return; +} + +static void +lpfc_block_fabric_iocbs(struct lpfc_hba *phba) +{ + int blocked; + + blocked = test_and_set_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); + /* Start a timer to unblock fabric + * iocbs after 100ms + */ + if (!blocked) + mod_timer(&phba->fabric_block_timer, jiffies + HZ/10 ); + + return; +} + +static void +lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + struct ls_rjt stat; + + if ((cmdiocb->iocb_flag & LPFC_IO_FABRIC) != LPFC_IO_FABRIC) + BUG(); + + switch (rspiocb->iocb.ulpStatus) { + case IOSTAT_NPORT_RJT: + case IOSTAT_FABRIC_RJT: + if (rspiocb->iocb.un.ulpWord[4] & RJT_UNAVAIL_TEMP) { + lpfc_block_fabric_iocbs(phba); + } + break; + + case IOSTAT_NPORT_BSY: + case IOSTAT_FABRIC_BSY: + lpfc_block_fabric_iocbs(phba); + break; + + case IOSTAT_LS_RJT: + stat.un.lsRjtError = + be32_to_cpu(rspiocb->iocb.un.ulpWord[4]); + if ((stat.un.b.lsRjtRsnCode == LSRJT_UNABLE_TPC) || + (stat.un.b.lsRjtRsnCode == LSRJT_LOGICAL_BSY)) + lpfc_block_fabric_iocbs(phba); + break; + } + + if (atomic_read(&phba->fabric_iocb_count) == 0) + BUG(); + + cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl; + cmdiocb->fabric_iocb_cmpl = NULL; + cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC; + cmdiocb->iocb_cmpl(phba, cmdiocb, rspiocb); + + atomic_dec(&phba->fabric_iocb_count); + if (!test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags)) { + /* Post any pending iocbs to HBA */ + lpfc_resume_fabric_iocbs(phba); + } +} + +int +lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb) +{ + unsigned long iflags; + struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + int ready; + int ret; + + if (atomic_read(&phba->fabric_iocb_count) > 1) + BUG(); + + spin_lock_irqsave(&phba->hbalock, iflags); + ready = atomic_read(&phba->fabric_iocb_count) == 0 && + !test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); + + spin_unlock_irqrestore(&phba->hbalock, iflags); + if (ready) { + iocb->fabric_iocb_cmpl = iocb->iocb_cmpl; + iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb; + iocb->iocb_flag |= LPFC_IO_FABRIC; + + lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD, + "Fabric sched2: ste:x%x", + iocb->vport->port_state, 0, 0); + + atomic_inc(&phba->fabric_iocb_count); + ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0); + + if (ret == IOCB_ERROR) { + iocb->iocb_cmpl = iocb->fabric_iocb_cmpl; + iocb->fabric_iocb_cmpl = NULL; + iocb->iocb_flag &= ~LPFC_IO_FABRIC; + atomic_dec(&phba->fabric_iocb_count); + } + } else { + spin_lock_irqsave(&phba->hbalock, iflags); + list_add_tail(&iocb->list, &phba->fabric_iocb_list); + spin_unlock_irqrestore(&phba->hbalock, iflags); + ret = IOCB_SUCCESS; + } + return ret; +} + + +void lpfc_fabric_abort_vport(struct lpfc_vport *vport) +{ + LIST_HEAD(completions); + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *tmp_iocb, *piocb; + IOCB_t *cmd; + + spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list, + list) { + + if (piocb->vport != vport) + continue; + + list_move_tail(&piocb->list, &completions); + } + spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + piocb = list_get_first(&completions, struct lpfc_iocbq, list); + list_del_init(&piocb->list); + + cmd = &piocb->iocb; + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (piocb->iocb_cmpl) (phba, piocb, piocb); + } +} + +void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp) +{ + LIST_HEAD(completions); + struct lpfc_hba *phba = ndlp->vport->phba; + struct lpfc_iocbq *tmp_iocb, *piocb; + struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + IOCB_t *cmd; + + spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list, + list) { + if ((lpfc_check_sli_ndlp(phba, pring, piocb, ndlp))) { + + list_move_tail(&piocb->list, &completions); + } + } + spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + piocb = list_get_first(&completions, struct lpfc_iocbq, list); + list_del_init(&piocb->list); + + cmd = &piocb->iocb; + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (piocb->iocb_cmpl) (phba, piocb, piocb); + } +} + +void lpfc_fabric_abort_hba(struct lpfc_hba *phba) +{ + LIST_HEAD(completions); + struct lpfc_iocbq *piocb; + IOCB_t *cmd; + + spin_lock_irq(&phba->hbalock); + list_splice_init(&phba->fabric_iocb_list, &completions); + spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + piocb = list_get_first(&completions, struct lpfc_iocbq, list); + list_del_init(&piocb->list); + + cmd = &piocb->iocb; + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (piocb->iocb_cmpl) (phba, piocb, piocb); + } +} + + +void lpfc_fabric_abort_flogi(struct lpfc_hba *phba) +{ + LIST_HEAD(completions); + struct lpfc_iocbq *tmp_iocb, *piocb; + IOCB_t *cmd; + struct lpfc_nodelist *ndlp; + + spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list, + list) { + + cmd = &piocb->iocb; + ndlp = (struct lpfc_nodelist *) piocb->context1; + if (cmd->ulpCommand == CMD_ELS_REQUEST64_CR && + ndlp != NULL && + ndlp->nlp_DID == Fabric_DID) + list_move_tail(&piocb->list, &completions); + } + spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + piocb = list_get_first(&completions, struct lpfc_iocbq, list); + list_del_init(&piocb->list); + + cmd = &piocb->iocb; + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (piocb->iocb_cmpl) (phba, piocb, piocb); + } +} + + diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hbadisc.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hbadisc.c 2007-12-21 15:36:12.000000000 -0500 @@ -36,6 +36,8 @@ #include "lpfc.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" +#include "lpfc_vport.h" +#include "lpfc_debugfs.h" /* AlpaArray for assignment of scsid for scan-down and bind_method */ static uint8_t lpfcAlpaArray[] = { @@ -54,7 +56,7 @@ 0x10, 0x0F, 0x08, 0x04, 0x02, 0x01 }; -static void lpfc_disc_timeout_handler(struct lpfc_hba *); +static void lpfc_disc_timeout_handler(struct lpfc_vport *); void lpfc_terminate_rport_io(struct fc_rport *rport) @@ -74,14 +76,16 @@ return; } - phba = ndlp->nlp_phba; + phba = ndlp->vport->phba; + + lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT, + "rport terminate: sid:x%x did:x%x flg:x%x", + ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag); - spin_lock_irq(phba->host->host_lock); if (ndlp->nlp_sid != NLP_NO_SID) { lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); } - spin_unlock_irq(phba->host->host_lock); return; } @@ -94,28 +98,98 @@ { struct lpfc_rport_data *rdata; struct lpfc_nodelist * ndlp; - uint8_t *name; - int warn_on = 0; + struct lpfc_vport *vport; struct lpfc_hba *phba; + struct completion devloss_compl; + struct lpfc_work_evt *evtp; rdata = rport->dd_data; ndlp = rdata->pnode; if (!ndlp) { - if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) + if (rport->scsi_target_id != -1) { printk(KERN_ERR "Cannot find remote node" " for rport in dev_loss_tmo_callbk x%x\n", rport->port_id); + } return; } - if (ndlp->nlp_state == NLP_STE_MAPPED_NODE) + vport = ndlp->vport; + phba = vport->phba; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, + "rport devlosscb: sid:x%x did:x%x flg:x%x", + ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag); + + init_completion(&devloss_compl); + evtp = &ndlp->dev_loss_evt; + + if (!list_empty(&evtp->evt_listp)) + return; + + spin_lock_irq(&phba->hbalock); + evtp->evt_arg1 = ndlp; + evtp->evt_arg2 = &devloss_compl; + evtp->evt = LPFC_EVT_DEV_LOSS; + list_add_tail(&evtp->evt_listp, &phba->work_list); + if (phba->work_wait) + wake_up(phba->work_wait); + + spin_unlock_irq(&phba->hbalock); + + wait_for_completion(&devloss_compl); + + return; +} + +/* + * This function is called from the worker thread when dev_loss_tmo + * expire. + */ +void +lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) +{ + struct lpfc_rport_data *rdata; + struct fc_rport *rport; + struct lpfc_vport *vport; + struct lpfc_hba *phba; + uint8_t *name; + int warn_on = 0; + + rport = ndlp->rport; + + if (!rport) return; - name = (uint8_t *)&ndlp->nlp_portname; - phba = ndlp->nlp_phba; + rdata = rport->dd_data; + name = (uint8_t *) &ndlp->nlp_portname; + vport = ndlp->vport; + phba = vport->phba; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, + "rport devlosstmo:did:x%x type:x%x id:x%x", + ndlp->nlp_DID, ndlp->nlp_type, rport->scsi_target_id); - spin_lock_irq(phba->host->host_lock); + if (!(vport->load_flag & FC_UNLOADING) && + ndlp->nlp_state == NLP_STE_MAPPED_NODE) + return; + + if (ndlp->nlp_type & NLP_FABRIC) { + int put_node; + int put_rport; + + /* We will clean up these Nodes in linkup */ + put_node = rdata->pnode != NULL; + put_rport = ndlp->rport != NULL; + rdata->pnode = NULL; + ndlp->rport = NULL; + if (put_node) + lpfc_nlp_put(ndlp); + if (put_rport) + put_device(&rport->dev); + return; + } if (ndlp->nlp_sid != NLP_NO_SID) { warn_on = 1; @@ -123,76 +197,114 @@ lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); } - if (phba->fc_flag & FC_UNLOADING) + if (vport->load_flag & FC_UNLOADING) warn_on = 0; - spin_unlock_irq(phba->host->host_lock); - if (warn_on) { lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0203 Devloss timeout on " + "%d (%d):0203 Devloss timeout on " "WWPN %x:%x:%x:%x:%x:%x:%x:%x " "NPort x%x Data: x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, *name, *(name+1), *(name+2), *(name+3), *(name+4), *(name+5), *(name+6), *(name+7), ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); } else { lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0204 Devloss timeout on " + "%d (%d):0204 Devloss timeout on " "WWPN %x:%x:%x:%x:%x:%x:%x:%x " "NPort x%x Data: x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, *name, *(name+1), *(name+2), *(name+3), *(name+4), *(name+5), *(name+6), *(name+7), ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); } - if (!(phba->fc_flag & FC_UNLOADING) && + if (!(vport->load_flag & FC_UNLOADING) && !(ndlp->nlp_flag & NLP_DELAY_TMO) && !(ndlp->nlp_flag & NLP_NPR_2B_DISC) && (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE)) - lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM); + lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); else { + int put_node; + int put_rport; + + put_node = rdata->pnode != NULL; + put_rport = ndlp->rport != NULL; rdata->pnode = NULL; ndlp->rport = NULL; + if (put_node) lpfc_nlp_put(ndlp); + if (put_rport) put_device(&rport->dev); } +} + +void +lpfc_worker_wake_up(struct lpfc_hba *phba) +{ + wake_up(phba->work_wait); return; } static void -lpfc_work_list_done(struct lpfc_hba * phba) +lpfc_work_list_done(struct lpfc_hba *phba) { struct lpfc_work_evt *evtp = NULL; struct lpfc_nodelist *ndlp; + struct lpfc_vport *vport; int free_evt; - spin_lock_irq(phba->host->host_lock); - while(!list_empty(&phba->work_list)) { + spin_lock_irq(&phba->hbalock); + while (!list_empty(&phba->work_list)) { list_remove_head((&phba->work_list), evtp, typeof(*evtp), evt_listp); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); free_evt = 1; switch (evtp->evt) { + case LPFC_EVT_DEV_LOSS_DELAY: + free_evt = 0; /* evt is part of ndlp */ + ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); + vport = ndlp->vport; + if (!vport) + break; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, + "rport devlossdly:did:x%x flg:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, 0); + + if (!(vport->load_flag & FC_UNLOADING) && + !(ndlp->nlp_flag & NLP_DELAY_TMO) && + !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) { + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); + } + break; case LPFC_EVT_ELS_RETRY: - ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); + ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); lpfc_els_retry_delay_handler(ndlp); + free_evt = 0; /* evt is part of ndlp */ + break; + case LPFC_EVT_DEV_LOSS: + ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); + lpfc_nlp_get(ndlp); + lpfc_dev_loss_tmo_handler(ndlp); free_evt = 0; + complete((struct completion *)(evtp->evt_arg2)); + lpfc_nlp_put(ndlp); break; case LPFC_EVT_ONLINE: - if (phba->hba_state < LPFC_LINK_DOWN) - *(int *)(evtp->evt_arg1) = lpfc_online(phba); + if (phba->link_state < LPFC_LINK_DOWN) + *(int *) (evtp->evt_arg1) = lpfc_online(phba); else - *(int *)(evtp->evt_arg1) = 0; + *(int *) (evtp->evt_arg1) = 0; complete((struct completion *)(evtp->evt_arg2)); break; case LPFC_EVT_OFFLINE_PREP: - if (phba->hba_state >= LPFC_LINK_DOWN) + if (phba->link_state >= LPFC_LINK_DOWN) lpfc_offline_prep(phba); *(int *)(evtp->evt_arg1) = 0; complete((struct completion *)(evtp->evt_arg2)); @@ -218,33 +330,31 @@ case LPFC_EVT_KILL: lpfc_offline(phba); *(int *)(evtp->evt_arg1) - = (phba->stopped) ? 0 : lpfc_sli_brdkill(phba); + = (phba->pport->stopped) + ? 0 : lpfc_sli_brdkill(phba); lpfc_unblock_mgmt_io(phba); complete((struct completion *)(evtp->evt_arg2)); break; } if (free_evt) kfree(evtp); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); } -static void -lpfc_work_done(struct lpfc_hba * phba) +void +lpfc_work_done(struct lpfc_hba *phba) { struct lpfc_sli_ring *pring; - int i; - uint32_t ha_copy; - uint32_t control; - uint32_t work_hba_events; + uint32_t ha_copy, status, control, work_port_events; + struct lpfc_vport *vport; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); ha_copy = phba->work_ha; phba->work_ha = 0; - work_hba_events=phba->work_hba_events; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); if (ha_copy & HA_ERATT) lpfc_handle_eratt(phba); @@ -255,66 +365,111 @@ if (ha_copy & HA_LATT) lpfc_handle_latt(phba); - if (work_hba_events & WORKER_DISC_TMO) - lpfc_disc_timeout_handler(phba); + spin_lock_irq(&phba->hbalock); + list_for_each_entry(vport, &phba->port_list, listentry) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + if (!scsi_host_get(shost)) { + continue; + } + spin_unlock_irq(&phba->hbalock); + work_port_events = vport->work_port_events; + + if (work_port_events & WORKER_DISC_TMO) + lpfc_disc_timeout_handler(vport); - if (work_hba_events & WORKER_ELS_TMO) - lpfc_els_timeout_handler(phba); + if (work_port_events & WORKER_ELS_TMO) + lpfc_els_timeout_handler(vport); - if (work_hba_events & WORKER_MBOX_TMO) + if (work_port_events & WORKER_HB_TMO) + lpfc_hb_timeout_handler(phba); + + if (work_port_events & WORKER_MBOX_TMO) lpfc_mbox_timeout_handler(phba); - if (work_hba_events & WORKER_FDMI_TMO) - lpfc_fdmi_tmo_handler(phba); + if (work_port_events & WORKER_FABRIC_BLOCK_TMO) + lpfc_unblock_fabric_iocbs(phba); + + if (work_port_events & WORKER_FDMI_TMO) + lpfc_fdmi_timeout_handler(vport); - spin_lock_irq(phba->host->host_lock); - phba->work_hba_events &= ~work_hba_events; - spin_unlock_irq(phba->host->host_lock); - - for (i = 0; i < phba->sli.num_rings; i++, ha_copy >>= 4) { - pring = &phba->sli.ring[i]; - if ((ha_copy & HA_RXATT) + if (work_port_events & WORKER_RAMP_DOWN_QUEUE) + lpfc_ramp_down_queue_handler(phba); + + if (work_port_events & WORKER_RAMP_UP_QUEUE) + lpfc_ramp_up_queue_handler(phba); + + spin_lock_irq(&vport->work_port_lock); + vport->work_port_events &= ~work_port_events; + spin_unlock_irq(&vport->work_port_lock); + scsi_host_put(shost); + spin_lock_irq(&phba->hbalock); + } + spin_unlock_irq(&phba->hbalock); + + pring = &phba->sli.ring[LPFC_ELS_RING]; + status = (ha_copy & (HA_RXMASK << (4*LPFC_ELS_RING))); + status >>= (4*LPFC_ELS_RING); + if ((status & HA_RXMASK) || (pring->flag & LPFC_DEFERRED_RING_EVENT)) { if (pring->flag & LPFC_STOP_IOCB_MASK) { pring->flag |= LPFC_DEFERRED_RING_EVENT; } else { lpfc_sli_handle_slow_ring_event(phba, pring, - (ha_copy & + (status & HA_RXMASK)); pring->flag &= ~LPFC_DEFERRED_RING_EVENT; } /* * Turn on Ring interrupts */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); control = readl(phba->HCregaddr); - control |= (HC_R0INT_ENA << i); + if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) { + control |= (HC_R0INT_ENA << LPFC_ELS_RING); writel(control, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - spin_unlock_irq(phba->host->host_lock); } + spin_unlock_irq(&phba->hbalock); } - - lpfc_work_list_done (phba); - + lpfc_work_list_done(phba); } static int -check_work_wait_done(struct lpfc_hba *phba) { +check_work_wait_done(struct lpfc_hba *phba) +{ + struct lpfc_vport *vport; + struct lpfc_sli_ring *pring; + int rc = 0; - spin_lock_irq(phba->host->host_lock); - if (phba->work_ha || - phba->work_hba_events || - (!list_empty(&phba->work_list)) || + spin_lock_irq(&phba->hbalock); + list_for_each_entry(vport, &phba->port_list, listentry) { + if (vport->work_port_events) { + rc = 1; + goto exit; + } + } + + if (phba->work_ha || (!list_empty(&phba->work_list)) || kthread_should_stop()) { - spin_unlock_irq(phba->host->host_lock); - return 1; - } else { - spin_unlock_irq(phba->host->host_lock); - return 0; + rc = 1; + goto exit; } + + pring = &phba->sli.ring[LPFC_ELS_RING]; + if (pring->flag & LPFC_DEFERRED_RING_EVENT) + rc = 1; +exit: + if (rc) + phba->work_found++; + else + phba->work_found = 0; + + spin_unlock_irq(&phba->hbalock); + return rc; } + int lpfc_do_work(void *p) { @@ -324,11 +479,13 @@ set_user_nice(current, -20); phba->work_wait = &work_waitq; + phba->work_found = 0; while (1) { rc = wait_event_interruptible(work_waitq, check_work_wait_done(phba)); + BUG_ON(rc); if (kthread_should_stop()) @@ -336,6 +493,17 @@ lpfc_work_done(phba); + /* If there is alot of slow ring work, like during link up + * check_work_wait_done() may cause this thread to not give + * up the CPU for very long periods of time. This may cause + * soft lockups or other problems. To avoid these situations + * give up the CPU here after LPFC_MAX_WORKER_ITERATION + * consecutive iterations. + */ + if (phba->work_found >= LPFC_MAX_WORKER_ITERATION) { + phba->work_found = 0; + schedule(); + } } phba->work_wait = NULL; return 0; @@ -347,16 +515,17 @@ * embedding it in the IOCB. */ int -lpfc_workq_post_event(struct lpfc_hba * phba, void *arg1, void *arg2, +lpfc_workq_post_event(struct lpfc_hba *phba, void *arg1, void *arg2, uint32_t evt) { struct lpfc_work_evt *evtp; + unsigned long flags; /* * All Mailbox completions and LPFC_ELS_RING rcv ring IOCB events will * be queued to worker thread for processing */ - evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_KERNEL); + evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_ATOMIC); if (!evtp) return 0; @@ -364,136 +533,210 @@ evtp->evt_arg2 = arg2; evtp->evt = evt; - spin_lock_irq(phba->host->host_lock); + spin_lock_irqsave(&phba->hbalock, flags); list_add_tail(&evtp->evt_listp, &phba->work_list); if (phba->work_wait) - wake_up(phba->work_wait); - spin_unlock_irq(phba->host->host_lock); + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, flags); return 1; } -int -lpfc_linkdown(struct lpfc_hba *phba) +void +lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove) { - struct lpfc_sli *psli; + struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp, *next_ndlp; - LPFC_MBOXQ_t *mb; int rc; - psli = &phba->sli; - /* sysfs or selective reset may call this routine to clean up */ - if (phba->hba_state >= LPFC_LINK_DOWN) { - if (phba->hba_state == LPFC_LINK_DOWN) - return 0; + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + continue; + + if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) + lpfc_unreg_rpi(vport, ndlp); - spin_lock_irq(phba->host->host_lock); - phba->hba_state = LPFC_LINK_DOWN; - spin_unlock_irq(phba->host->host_lock); + /* Leave Fabric nodes alone on link down */ + if (!remove && ndlp->nlp_type & NLP_FABRIC) + continue; + rc = lpfc_disc_state_machine(vport, ndlp, NULL, + remove + ? NLP_EVT_DEVICE_RM + : NLP_EVT_DEVICE_RECOVERY); + } + if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) { + lpfc_mbx_unreg_vpi(vport); + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; } +} + +static void +lpfc_linkdown_port(struct lpfc_vport *vport) +{ + struct lpfc_nodelist *ndlp, *next_ndlp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - fc_host_post_event(phba->host, fc_get_event_number(), - FCH_EVT_LINKDOWN, 0); + fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKDOWN, 0); - /* Clean up any firmware default rpi's */ - if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { - lpfc_unreg_did(phba, 0xffffffff, mb); - mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl; - if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB)) - == MBX_NOT_FINISHED) { - mempool_free( mb, phba->mbox_mem_pool); - } - } + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Link Down: state:x%x rtry:x%x flg:x%x", + vport->port_state, vport->fc_ns_retry, vport->fc_flag); /* Cleanup any outstanding RSCN activity */ - lpfc_els_flush_rscn(phba); + lpfc_els_flush_rscn(vport); /* Cleanup any outstanding ELS commands */ - lpfc_els_flush_cmd(phba); + lpfc_els_flush_cmd(vport); + + lpfc_cleanup_rpis(vport, 0); - /* - * Issue a LINK DOWN event to all nodes. - */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { /* free any ndlp's on unused list */ + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) + /* free any ndlp's in unused state */ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) - lpfc_drop_node(phba, ndlp); - else /* otherwise, force node recovery. */ - rc = lpfc_disc_state_machine(phba, ndlp, NULL, - NLP_EVT_DEVICE_RECOVERY); + lpfc_drop_node(vport, ndlp); + + /* Turn off discovery timer if its running */ + lpfc_can_disctmo(vport); +} + +int +lpfc_linkdown(struct lpfc_hba *phba) +{ + struct lpfc_vport *vport = phba->pport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_vport *port_iterator; + LPFC_MBOXQ_t *mb; + + if (phba->link_state == LPFC_LINK_DOWN) { + return 0; + } + spin_lock_irq(&phba->hbalock); + if (phba->link_state > LPFC_LINK_DOWN) { + phba->link_state = LPFC_LINK_DOWN; + phba->pport->fc_flag &= ~FC_LBIT; + } + spin_unlock_irq(&phba->hbalock); + + list_for_each_entry(port_iterator, &phba->port_list, listentry) { + + /* Issue a LINK DOWN event to all nodes */ + lpfc_linkdown_port(port_iterator); + } + + /* Clean up any firmware default rpi's */ + mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mb) { + lpfc_unreg_did(phba, 0xffff, 0xffffffff, mb); + mb->vport = vport; + mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB)) + == MBX_NOT_FINISHED) { + mempool_free(mb, phba->mbox_mem_pool); + } } /* Setup myDID for link up if we are in pt2pt mode */ - if (phba->fc_flag & FC_PT2PT) { - phba->fc_myDID = 0; - if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { + if (phba->pport->fc_flag & FC_PT2PT) { + phba->pport->fc_myDID = 0; + mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mb) { lpfc_config_link(phba, mb); - mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl; - if (lpfc_sli_issue_mbox - (phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB)) + mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mb->vport = vport; + if (lpfc_sli_issue_mbox(phba, mb, + (MBX_NOWAIT | MBX_STOP_IOCB)) == MBX_NOT_FINISHED) { - mempool_free( mb, phba->mbox_mem_pool); + mempool_free(mb, phba->mbox_mem_pool); } } - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI); - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI); + spin_unlock_irq(shost->host_lock); } - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_LBIT; - spin_unlock_irq(phba->host->host_lock); - - /* Turn off discovery timer if its running */ - lpfc_can_disctmo(phba); - /* Must process IOCBs on all rings to handle ABORTed I/Os */ return 0; } -static int -lpfc_linkup(struct lpfc_hba *phba) +static void +lpfc_linkup_cleanup_nodes(struct lpfc_vport *vport) { - struct lpfc_nodelist *ndlp, *next_ndlp; - - fc_host_post_event(phba->host, fc_get_event_number(), - FCH_EVT_LINKUP, 0); - - spin_lock_irq(phba->host->host_lock); - phba->hba_state = LPFC_LINK_UP; - phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY | - FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY); - phba->fc_flag |= FC_NDISC_ACTIVE; - phba->fc_ns_retry = 0; - spin_unlock_irq(phba->host->host_lock); + struct lpfc_nodelist *ndlp; + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + continue; - if (phba->fc_flag & FC_LBIT) { - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { - if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) { if (ndlp->nlp_type & NLP_FABRIC) { - /* - * On Linkup its safe to clean up the - * ndlp from Fabric connections. + /* On Linkup its safe to clean up the ndlp + * from Fabric connections. */ - lpfc_nlp_set_state(phba, ndlp, - NLP_STE_UNUSED_NODE); + if (ndlp->nlp_DID != Fabric_DID) + lpfc_unreg_rpi(vport, ndlp); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) { - /* - * Fail outstanding IO now since - * device is marked for PLOGI. + /* Fail outstanding IO now since device is + * marked for PLOGI. */ - lpfc_unreg_rpi(phba, ndlp); - } - } + lpfc_unreg_rpi(vport, ndlp); } } +} - /* free any ndlp's on unused list */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, - nlp_listp) { +static void +lpfc_linkup_port(struct lpfc_vport *vport) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp, *next_ndlp; + struct lpfc_hba *phba = vport->phba; + + if ((vport->load_flag & FC_UNLOADING) != 0) + return; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Link Up: top:x%x speed:x%x flg:x%x", + phba->fc_topology, phba->fc_linkspeed, phba->link_flag); + + /* If NPIV is not enabled, only bring the physical port up */ + if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + (vport != phba->pport)) + return; + + fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKUP, 0); + + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY | + FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY); + vport->fc_flag |= FC_NDISC_ACTIVE; + vport->fc_ns_retry = 0; + spin_unlock_irq(shost->host_lock); + + if (vport->fc_flag & FC_LBIT) + lpfc_linkup_cleanup_nodes(vport); + + /* free any ndlp's in unused state */ + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); +} + +static int +lpfc_linkup(struct lpfc_hba *phba) +{ + struct lpfc_vport *vport; + + phba->link_state = LPFC_LINK_UP; + + /* Unblock fabric iocbs if they are blocked */ + clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); + del_timer_sync(&phba->fabric_block_timer); + + list_for_each_entry(vport, &phba->port_list, listentry) { + lpfc_linkup_port(vport); } + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) + lpfc_issue_clear_la(phba, phba->pport); return 0; } @@ -505,14 +748,14 @@ * handed off to the SLI layer. */ void -lpfc_mbx_cmpl_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli; - MAILBOX_t *mb; + struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_sli *psli = &phba->sli; + MAILBOX_t *mb = &pmb->mb; uint32_t control; - psli = &phba->sli; - mb = &pmb->mb; /* Since we don't do discovery right now, turn these off here */ psli->ring[psli->extra_ring].flag &= ~LPFC_STOP_IOCB_EVENT; psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT; @@ -522,69 +765,74 @@ if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) { /* CLEAR_LA mbox error state */ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, - "%d:0320 CLEAR_LA mbxStatus error x%x hba " + "%d (%d):0320 CLEAR_LA mbxStatus error x%x hba " "state x%x\n", - phba->brd_no, mb->mbxStatus, phba->hba_state); + phba->brd_no, vport->vpi, mb->mbxStatus, + vport->port_state); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; goto out; } - if (phba->fc_flag & FC_ABORT_DISCOVERY) - goto out; + if (vport->port_type == LPFC_PHYSICAL_PORT) + phba->link_state = LPFC_HBA_READY; - phba->num_disc_nodes = 0; - /* go thru NPR list and issue ELS PLOGIs */ - if (phba->fc_npr_cnt) { - lpfc_els_disc_plogi(phba); - } + spin_lock_irq(&phba->hbalock); + psli->sli_flag |= LPFC_PROCESS_LA; + control = readl(phba->HCregaddr); + control |= HC_LAINT_ENA; + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ + spin_unlock_irq(&phba->hbalock); + return; + + vport->num_disc_nodes = 0; + /* go thru NPR nodes and issue ELS PLOGIs */ + if (vport->fc_npr_cnt) + lpfc_els_disc_plogi(vport); - if (!phba->num_disc_nodes) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_NDISC_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + if (!vport->num_disc_nodes) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NDISC_ACTIVE; + spin_unlock_irq(shost->host_lock); } - phba->hba_state = LPFC_HBA_READY; + vport->port_state = LPFC_VPORT_READY; out: /* Device Discovery completes */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0225 Device Discovery completes\n", - phba->brd_no); - - mempool_free( pmb, phba->mbox_mem_pool); - - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_ABORT_DISCOVERY; - if (phba->fc_flag & FC_ESTABLISH_LINK) { - phba->fc_flag &= ~FC_ESTABLISH_LINK; - } - spin_unlock_irq(phba->host->host_lock); + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0225 Device Discovery completes\n", + phba->brd_no, vport->vpi); + + mempool_free(pmb, phba->mbox_mem_pool); + + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~(FC_ABORT_DISCOVERY | FC_ESTABLISH_LINK); + spin_unlock_irq(shost->host_lock); del_timer_sync(&phba->fc_estabtmo); - lpfc_can_disctmo(phba); + lpfc_can_disctmo(vport); /* turn on Link Attention interrupts */ - spin_lock_irq(phba->host->host_lock); + + spin_lock_irq(&phba->hbalock); psli->sli_flag |= LPFC_PROCESS_LA; control = readl(phba->HCregaddr); control |= HC_LAINT_ENA; writel(control, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return; } + static void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli = &phba->sli; - int rc; + struct lpfc_vport *vport = pmb->vport; if (pmb->mb.mbxStatus) goto out; @@ -592,127 +840,110 @@ mempool_free(pmb, phba->mbox_mem_pool); if (phba->fc_topology == TOPOLOGY_LOOP && - phba->fc_flag & FC_PUBLIC_LOOP && - !(phba->fc_flag & FC_LBIT)) { + vport->fc_flag & FC_PUBLIC_LOOP && + !(vport->fc_flag & FC_LBIT)) { /* Need to wait for FAN - use discovery timer - * for timeout. hba_state is identically + * for timeout. port_state is identically * LPFC_LOCAL_CFG_LINK while waiting for FAN */ - lpfc_set_disctmo(phba); + lpfc_set_disctmo(vport); return; } - /* Start discovery by sending a FLOGI. hba_state is identically + /* Start discovery by sending a FLOGI. port_state is identically * LPFC_FLOGI while waiting for FLOGI cmpl */ - phba->hba_state = LPFC_FLOGI; - lpfc_set_disctmo(phba); - lpfc_initial_flogi(phba); + if (vport->port_state != LPFC_FLOGI) { + vport->port_state = LPFC_FLOGI; + lpfc_set_disctmo(vport); + lpfc_initial_flogi(vport); + } return; out: lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, - "%d:0306 CONFIG_LINK mbxStatus error x%x " + "%d (%d):0306 CONFIG_LINK mbxStatus error x%x " "HBA state x%x\n", - phba->brd_no, pmb->mb.mbxStatus, phba->hba_state); + phba->brd_no, vport->vpi, pmb->mb.mbxStatus, + vport->port_state); - lpfc_linkdown(phba); + mempool_free(pmb, phba->mbox_mem_pool); - phba->hba_state = LPFC_HBA_ERROR; + lpfc_linkdown(phba); lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0200 CONFIG_LINK bad hba state x%x\n", - phba->brd_no, phba->hba_state); + "%d (%d):0200 CONFIG_LINK bad hba state x%x\n", + phba->brd_no, vport->vpi, vport->port_state); - lpfc_clear_la(phba, pmb); - pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la; - rc = lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) { - mempool_free(pmb, phba->mbox_mem_pool); - lpfc_disc_flush_list(phba); - psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; - phba->hba_state = LPFC_HBA_READY; - } + lpfc_issue_clear_la(phba, vport); return; } static void -lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli = &phba->sli; MAILBOX_t *mb = &pmb->mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1; + struct lpfc_vport *vport = pmb->vport; /* Check for error */ if (mb->mbxStatus) { /* READ_SPARAM mbox error state */ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, - "%d:0319 READ_SPARAM mbxStatus error x%x " + "%d (%d):0319 READ_SPARAM mbxStatus error x%x " "hba state x%x>\n", - phba->brd_no, mb->mbxStatus, phba->hba_state); + phba->brd_no, vport->vpi, mb->mbxStatus, + vport->port_state); lpfc_linkdown(phba); - phba->hba_state = LPFC_HBA_ERROR; goto out; } - memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt, + memcpy((uint8_t *) &vport->fc_sparam, (uint8_t *) mp->virt, sizeof (struct serv_parm)); if (phba->cfg_soft_wwnn) - u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn); + u64_to_wwn(phba->cfg_soft_wwnn, + vport->fc_sparam.nodeName.u.wwn); if (phba->cfg_soft_wwpn) - u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn); - memcpy((uint8_t *) & phba->fc_nodename, - (uint8_t *) & phba->fc_sparam.nodeName, - sizeof (struct lpfc_name)); - memcpy((uint8_t *) & phba->fc_portname, - (uint8_t *) & phba->fc_sparam.portName, - sizeof (struct lpfc_name)); + u64_to_wwn(phba->cfg_soft_wwpn, + vport->fc_sparam.portName.u.wwn); + memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, + sizeof(vport->fc_nodename)); + memcpy(&vport->fc_portname, &vport->fc_sparam.portName, + sizeof(vport->fc_portname)); + if (vport->port_type == LPFC_PHYSICAL_PORT) { + memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn)); + memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn)); + } + lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); - mempool_free( pmb, phba->mbox_mem_pool); + mempool_free(pmb, phba->mbox_mem_pool); return; out: pmb->context1 = NULL; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); - if (phba->hba_state != LPFC_CLEAR_LA) { - lpfc_clear_la(phba, pmb); - pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la; - if (lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB)) - == MBX_NOT_FINISHED) { - mempool_free( pmb, phba->mbox_mem_pool); - lpfc_disc_flush_list(phba); - psli->ring[(psli->extra_ring)].flag &= - ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->fcp_ring)].flag &= - ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->next_ring)].flag &= - ~LPFC_STOP_IOCB_EVENT; - phba->hba_state = LPFC_HBA_READY; - } - } else { - mempool_free( pmb, phba->mbox_mem_pool); - } + lpfc_issue_clear_la(phba, vport); + mempool_free(pmb, phba->mbox_mem_pool); return; } static void lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la) { - int i; + struct lpfc_vport *vport = phba->pport; LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox; + int i; struct lpfc_dmabuf *mp; int rc; sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); switch (la->UlnkSpeed) { case LA_1GHZ_LINK: phba->fc_linkspeed = LA_1GHZ_LINK; @@ -732,14 +963,16 @@ } phba->fc_topology = la->topology; + phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED; if (phba->fc_topology == TOPOLOGY_LOOP) { - /* Get Loop Map information */ + phba->sli3_options &= ~LPFC_SLI3_NPIV_ENABLED; + /* Get Loop Map information */ if (la->il) - phba->fc_flag |= FC_LBIT; + vport->fc_flag |= FC_LBIT; - phba->fc_myDID = la->granted_AL_PA; + vport->fc_myDID = la->granted_AL_PA; i = la->un.lilpBde64.tus.f.bdeSize; if (i == 0) { @@ -781,14 +1014,20 @@ } } } else { - phba->fc_myDID = phba->fc_pref_DID; - phba->fc_flag |= FC_LBIT; + if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) { + if (phba->max_vpi && phba->cfg_npiv_enable && + (phba->sli_rev == 3)) + phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED; + } + vport->fc_myDID = phba->fc_pref_DID; + vport->fc_flag |= FC_LBIT; } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); lpfc_linkup(phba); if (sparam_mbox) { - lpfc_read_sparam(phba, sparam_mbox); + lpfc_read_sparam(phba, sparam_mbox, 0); + sparam_mbox->vport = vport; sparam_mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam; rc = lpfc_sli_issue_mbox(phba, sparam_mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); @@ -799,36 +1038,48 @@ mempool_free(sparam_mbox, phba->mbox_mem_pool); if (cfglink_mbox) mempool_free(cfglink_mbox, phba->mbox_mem_pool); - return; + goto out; } } if (cfglink_mbox) { - phba->hba_state = LPFC_LOCAL_CFG_LINK; + vport->port_state = LPFC_LOCAL_CFG_LINK; lpfc_config_link(phba, cfglink_mbox); + cfglink_mbox->vport = vport; cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link; rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) + if (rc != MBX_NOT_FINISHED) + return; mempool_free(cfglink_mbox, phba->mbox_mem_pool); } +out: + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "%d (%d):0263 Discovery Mailbox error: state: 0x%x : %p %p\n", + phba->brd_no, vport->vpi, + vport->port_state, sparam_mbox, cfglink_mbox); + + lpfc_issue_clear_la(phba, vport); + return; } static void -lpfc_mbx_issue_link_down(struct lpfc_hba *phba) { +lpfc_mbx_issue_link_down(struct lpfc_hba *phba) +{ uint32_t control; struct lpfc_sli *psli = &phba->sli; lpfc_linkdown(phba); /* turn on Link Attention interrupts - no CLEAR_LA needed */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); psli->sli_flag |= LPFC_PROCESS_LA; control = readl(phba->HCregaddr); control |= HC_LAINT_ENA; writel(control, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); } /* @@ -838,22 +1089,21 @@ * handed off to the SLI layer. */ void -lpfc_mbx_cmpl_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_read_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { + struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); READ_LA_VAR *la; MAILBOX_t *mb = &pmb->mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); /* Check for error */ if (mb->mbxStatus) { - lpfc_printf_log(phba, - KERN_INFO, - LOG_LINK_EVENT, + lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT, "%d:1307 READ_LA mbox error x%x state x%x\n", - phba->brd_no, - mb->mbxStatus, phba->hba_state); + phba->brd_no, mb->mbxStatus, vport->port_state); lpfc_mbx_issue_link_down(phba); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; goto lpfc_mbx_cmpl_read_la_free_mbuf; } @@ -861,27 +1111,26 @@ memcpy(&phba->alpa_map[0], mp->virt, 128); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); if (la->pb) - phba->fc_flag |= FC_BYPASSED_MODE; + vport->fc_flag |= FC_BYPASSED_MODE; else - phba->fc_flag &= ~FC_BYPASSED_MODE; - spin_unlock_irq(phba->host->host_lock); + vport->fc_flag &= ~FC_BYPASSED_MODE; + spin_unlock_irq(shost->host_lock); if (((phba->fc_eventTag + 1) < la->eventTag) || (phba->fc_eventTag == la->eventTag)) { phba->fc_stat.LinkMultiEvent++; - if (la->attType == AT_LINK_UP) { + if (la->attType == AT_LINK_UP) if (phba->fc_eventTag != 0) lpfc_linkdown(phba); } - } phba->fc_eventTag = la->eventTag; if (la->attType == AT_LINK_UP) { phba->fc_stat.LinkUp++; - if (phba->fc_flag & FC_LOOPBACK_MODE) { + if (phba->link_flag & LS_LOOPBACK_MODE) { lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT, "%d:1306 Link Up Event in loop back mode " "x%x received Data: x%x x%x x%x x%x\n", @@ -903,7 +1152,7 @@ "%d:1305 Link Down Event x%x received " "Data: x%x x%x x%x\n", phba->brd_no, la->eventTag, phba->fc_eventTag, - phba->hba_state, phba->fc_flag); + phba->pport->port_state, vport->fc_flag); lpfc_mbx_issue_link_down(phba); } @@ -921,31 +1170,115 @@ * handed off to the SLI layer. */ void -lpfc_mbx_cmpl_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli; - MAILBOX_t *mb; - struct lpfc_dmabuf *mp; - struct lpfc_nodelist *ndlp; - - psli = &phba->sli; - mb = &pmb->mb; - - ndlp = (struct lpfc_nodelist *) pmb->context2; - mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_vport *vport = pmb->vport; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; pmb->context1 = NULL; /* Good status, call state machine */ - lpfc_disc_state_machine(phba, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN); + lpfc_disc_state_machine(vport, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN); lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); - mempool_free( pmb, phba->mbox_mem_pool); + mempool_free(pmb, phba->mbox_mem_pool); lpfc_nlp_put(ndlp); return; } +static void +lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) +{ + MAILBOX_t *mb = &pmb->mb; + struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + switch (mb->mbxStatus) { + case 0x0011: + case 0x0020: + case 0x9700: + lpfc_printf_log(phba, KERN_INFO, LOG_NODE, + "%d (%d):0911 cmpl_unreg_vpi, " + "mb status = 0x%x\n", + phba->brd_no, vport->vpi, mb->mbxStatus); + break; + } + vport->unreg_vpi_cmpl = VPORT_OK; + mempool_free(pmb, phba->mbox_mem_pool); + /* + * This shost reference might have been taken at the beginning of + * lpfc_vport_delete() + */ + if (vport->load_flag & FC_UNLOADING) + scsi_host_put(shost); +} + +void +lpfc_mbx_unreg_vpi(struct lpfc_vport *vport) +{ + struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + int rc; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) + return; + + lpfc_unreg_vpi(phba, vport->vpi, mbox); + mbox->vport = vport; + mbox->mbox_cmpl = lpfc_mbx_cmpl_unreg_vpi; + rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) { + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT, + "%d (%d):1800 Could not issue unreg_vpi\n", + phba->brd_no, vport->vpi); + mempool_free(mbox, phba->mbox_mem_pool); + vport->unreg_vpi_cmpl = VPORT_ERROR; + } +} + +static void +lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) +{ + struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + MAILBOX_t *mb = &pmb->mb; + + switch (mb->mbxStatus) { + case 0x0011: + case 0x9601: + case 0x9602: + lpfc_printf_log(phba, KERN_INFO, LOG_NODE, + "%d (%d):0912 cmpl_reg_vpi, mb status = 0x%x\n", + phba->brd_no, vport->vpi, mb->mbxStatus); + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + spin_unlock_irq(shost->host_lock); + vport->fc_myDID = 0; + goto out; + } + + vport->num_disc_nodes = 0; + /* go thru NPR list and issue ELS PLOGIs */ + if (vport->fc_npr_cnt) + lpfc_els_disc_plogi(vport); + + if (!vport->num_disc_nodes) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NDISC_ACTIVE; + spin_unlock_irq(shost->host_lock); + lpfc_can_disctmo(vport); + } + vport->port_state = LPFC_VPORT_READY; + +out: + mempool_free(pmb, phba->mbox_mem_pool); + return; +} + /* * This routine handles processing a Fabric REG_LOGIN mailbox * command upon completion. It is setup in the LPFC_MBOXQ @@ -953,20 +1286,14 @@ * handed off to the SLI layer. */ void -lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli; - MAILBOX_t *mb; - struct lpfc_dmabuf *mp; + struct lpfc_vport *vport = pmb->vport; + struct lpfc_vport *next_vport; + MAILBOX_t *mb = &pmb->mb; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); struct lpfc_nodelist *ndlp; - struct lpfc_nodelist *ndlp_fdmi; - - - psli = &phba->sli; - mb = &pmb->mb; - ndlp = (struct lpfc_nodelist *) pmb->context2; - mp = (struct lpfc_dmabuf *) (pmb->context1); pmb->context1 = NULL; pmb->context2 = NULL; @@ -977,60 +1304,46 @@ mempool_free(pmb, phba->mbox_mem_pool); lpfc_nlp_put(ndlp); - /* FLOGI failed, so just use loop map to make discovery list */ - lpfc_disc_list_loopmap(phba); + if (phba->fc_topology == TOPOLOGY_LOOP) { + /* FLOGI failed, use loop map to make discovery list */ + lpfc_disc_list_loopmap(vport); /* Start discovery */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); + return; + } + + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "%d (%d):0258 Register Fabric login error: 0x%x\n", + phba->brd_no, vport->vpi, mb->mbxStatus); + return; } ndlp->nlp_rpi = mb->un.varWords[0]; ndlp->nlp_type |= NLP_FABRIC; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); lpfc_nlp_put(ndlp); /* Drop the reference from the mbox */ - if (phba->hba_state == LPFC_FABRIC_CFG_LINK) { - /* This NPort has been assigned an NPort_ID by the fabric as a - * result of the completed fabric login. Issue a State Change - * Registration (SCR) ELS request to the fabric controller - * (SCR_DID) so that this NPort gets RSCN events from the - * fabric. - */ - lpfc_issue_els_scr(phba, SCR_DID, 0); + if (vport->port_state == LPFC_FABRIC_CFG_LINK) { + list_for_each_entry(next_vport, &phba->port_list, listentry) { + if (next_vport->port_type == LPFC_PHYSICAL_PORT) + continue; - ndlp = lpfc_findnode_did(phba, NameServer_DID); - if (!ndlp) { - /* Allocate a new node instance. If the pool is empty, - * start the discovery process and skip the Nameserver - * login process. This is attempted again later on. - * Otherwise, issue a Port Login (PLOGI) to NameServer. - */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); - if (!ndlp) { - lpfc_disc_start(phba); - lpfc_mbuf_free(phba, mp->virt, mp->phys); - kfree(mp); - mempool_free(pmb, phba->mbox_mem_pool); - return; - } else { - lpfc_nlp_init(phba, ndlp, NameServer_DID); - ndlp->nlp_type |= NLP_FABRIC; - } - } - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, NameServer_DID, 0); - if (phba->cfg_fdmi_on) { - ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool, - GFP_KERNEL); - if (ndlp_fdmi) { - lpfc_nlp_init(phba, ndlp_fdmi, FDMI_DID); - ndlp_fdmi->nlp_type |= NLP_FABRIC; - ndlp_fdmi->nlp_state = NLP_STE_PLOGI_ISSUE; - lpfc_issue_els_plogi(phba, FDMI_DID, 0); + if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) + lpfc_initial_fdisc(next_vport); + else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { + lpfc_vport_set_state(vport, + FC_VPORT_NO_FABRIC_SUPP); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0259 No NPIV Fabric " + "support\n", + phba->brd_no, vport->vpi); } } + lpfc_do_scr_ns_plogi(phba, vport); } lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -1046,32 +1359,36 @@ * handed off to the SLI layer. */ void -lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli; - MAILBOX_t *mb; - struct lpfc_dmabuf *mp; - struct lpfc_nodelist *ndlp; - - psli = &phba->sli; - mb = &pmb->mb; - - ndlp = (struct lpfc_nodelist *) pmb->context2; - mp = (struct lpfc_dmabuf *) (pmb->context1); + MAILBOX_t *mb = &pmb->mb; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_vport *vport = pmb->vport; if (mb->mbxStatus) { +out: lpfc_nlp_put(ndlp); lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); mempool_free(pmb, phba->mbox_mem_pool); - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); - /* RegLogin failed, so just use loop map to make discovery - list */ - lpfc_disc_list_loopmap(phba); + if (phba->fc_topology == TOPOLOGY_LOOP) { + /* + * RegLogin failed, use loop map to make discovery + * list + */ + lpfc_disc_list_loopmap(vport); /* Start discovery */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); + return; + } + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0260 Register NameServer error: 0x%x\n", + phba->brd_no, vport->vpi, mb->mbxStatus); return; } @@ -1079,37 +1396,43 @@ ndlp->nlp_rpi = mb->un.varWords[0]; ndlp->nlp_type |= NLP_FABRIC; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); - if (phba->hba_state < LPFC_HBA_READY) { - /* Link up discovery requires Fabrib registration. */ - lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RNN_ID); - lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RSNN_NN); - lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFT_ID); - lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFF_ID); + if (vport->port_state < LPFC_VPORT_READY) { + /* Link up discovery requires Fabric registration. */ + lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, 0); /* Do this first! */ + lpfc_ns_cmd(vport, SLI_CTNS_RNN_ID, 0, 0); + lpfc_ns_cmd(vport, SLI_CTNS_RSNN_NN, 0, 0); + lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0); + lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0); + + /* Issue SCR just before NameServer GID_FT Query */ + lpfc_issue_els_scr(vport, SCR_DID, 0); } - phba->fc_ns_retry = 0; + vport->fc_ns_retry = 0; /* Good status, issue CT Request to NameServer */ - if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT)) { + if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0)) { /* Cannot issue NameServer Query, so finish up discovery */ - lpfc_disc_start(phba); + goto out; } lpfc_nlp_put(ndlp); lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); - mempool_free( pmb, phba->mbox_mem_pool); + mempool_free(pmb, phba->mbox_mem_pool); return; } static void -lpfc_register_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct fc_rport *rport; struct lpfc_rport_data *rdata; struct fc_rport_identifiers rport_ids; + struct lpfc_hba *phba = vport->phba; /* Remote port has reappeared. Re-register w/ FC transport */ rport_ids.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn); @@ -1125,10 +1448,15 @@ * registered the port. */ if (ndlp->rport && ndlp->rport->dd_data && - *(struct lpfc_rport_data **) ndlp->rport->dd_data) { + ((struct lpfc_rport_data *) ndlp->rport->dd_data)->pnode == ndlp) { lpfc_nlp_put(ndlp); } - ndlp->rport = rport = fc_remote_port_add(phba->host, 0, &rport_ids); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, + "rport add: did:x%x flg:x%x type x%x", + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type); + + ndlp->rport = rport = fc_remote_port_add(shost, 0, &rport_ids); if (!rport || !get_device(&rport->dev)) { dev_printk(KERN_WARNING, &phba->pcidev->dev, "Warning: fc_remote_port_add failed\n"); @@ -1154,22 +1482,17 @@ (rport->scsi_target_id < LPFC_MAX_TARGET)) { ndlp->nlp_sid = rport->scsi_target_id; } - return; } static void -lpfc_unregister_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp) { struct fc_rport *rport = ndlp->rport; - struct lpfc_rport_data *rdata = rport->dd_data; - if (rport->scsi_target_id == -1) { - ndlp->rport = NULL; - rdata->pnode = NULL; - lpfc_nlp_put(ndlp); - put_device(&rport->dev); - } + lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT, + "rport delete: did:x%x flg:x%x type x%x", + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type); fc_remote_port_delete(rport); @@ -1177,42 +1500,46 @@ } static void -lpfc_nlp_counters(struct lpfc_hba *phba, int state, int count) +lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count) { - spin_lock_irq(phba->host->host_lock); + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + spin_lock_irq(shost->host_lock); switch (state) { case NLP_STE_UNUSED_NODE: - phba->fc_unused_cnt += count; + vport->fc_unused_cnt += count; break; case NLP_STE_PLOGI_ISSUE: - phba->fc_plogi_cnt += count; + vport->fc_plogi_cnt += count; break; case NLP_STE_ADISC_ISSUE: - phba->fc_adisc_cnt += count; + vport->fc_adisc_cnt += count; break; case NLP_STE_REG_LOGIN_ISSUE: - phba->fc_reglogin_cnt += count; + vport->fc_reglogin_cnt += count; break; case NLP_STE_PRLI_ISSUE: - phba->fc_prli_cnt += count; + vport->fc_prli_cnt += count; break; case NLP_STE_UNMAPPED_NODE: - phba->fc_unmap_cnt += count; + vport->fc_unmap_cnt += count; break; case NLP_STE_MAPPED_NODE: - phba->fc_map_cnt += count; + vport->fc_map_cnt += count; break; case NLP_STE_NPR_NODE: - phba->fc_npr_cnt += count; + vport->fc_npr_cnt += count; break; } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); } static void -lpfc_nlp_state_cleanup(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, +lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int old_state, int new_state) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + if (new_state == NLP_STE_UNMAPPED_NODE) { ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR); ndlp->nlp_flag &= ~NLP_NODEV_REMOVE; @@ -1226,21 +1553,20 @@ /* Transport interface */ if (ndlp->rport && (old_state == NLP_STE_MAPPED_NODE || old_state == NLP_STE_UNMAPPED_NODE)) { - phba->nport_event_cnt++; - lpfc_unregister_remote_port(phba, ndlp); + vport->phba->nport_event_cnt++; + lpfc_unregister_remote_port(ndlp); } if (new_state == NLP_STE_MAPPED_NODE || new_state == NLP_STE_UNMAPPED_NODE) { - phba->nport_event_cnt++; + vport->phba->nport_event_cnt++; /* * Tell the fc transport about the port, if we haven't * already. If we have, and it's a scsi entity, be * sure to unblock any attached scsi devices */ - lpfc_register_remote_port(phba, ndlp); + lpfc_register_remote_port(vport, ndlp); } - /* * if we added to Mapped list, but the remote port * registration failed or assigned a target id outside @@ -1251,10 +1577,10 @@ (!ndlp->rport || ndlp->rport->scsi_target_id == -1 || ndlp->rport->scsi_target_id >= LPFC_MAX_TARGET)) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_TGT_NO_SCSIID; - spin_unlock_irq(phba->host->host_lock); - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + spin_unlock_irq(shost->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); } } @@ -1280,61 +1606,74 @@ } void -lpfc_nlp_set_state(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int state) +lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + int state) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); int old_state = ndlp->nlp_state; char name1[16], name2[16]; - lpfc_printf_log(phba, KERN_INFO, LOG_NODE, - "%d:0904 NPort state transition x%06x, %s -> %s\n", - phba->brd_no, + lpfc_printf_log(vport->phba, KERN_INFO, LOG_NODE, + "%d (%d):0904 NPort state transition x%06x, %s -> %s\n", + vport->phba->brd_no, vport->vpi, ndlp->nlp_DID, lpfc_nlp_state_name(name1, sizeof(name1), old_state), lpfc_nlp_state_name(name2, sizeof(name2), state)); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE, + "node statechg did:x%x old:%d ste:%d", + ndlp->nlp_DID, old_state, state); + if (old_state == NLP_STE_NPR_NODE && (ndlp->nlp_flag & NLP_DELAY_TMO) != 0 && state != NLP_STE_NPR_NODE) - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); if (old_state == NLP_STE_UNMAPPED_NODE) { ndlp->nlp_flag &= ~NLP_TGT_NO_SCSIID; ndlp->nlp_type &= ~NLP_FC_NODE; } if (list_empty(&ndlp->nlp_listp)) { - spin_lock_irq(phba->host->host_lock); - list_add_tail(&ndlp->nlp_listp, &phba->fc_nodes); - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + list_add_tail(&ndlp->nlp_listp, &vport->fc_nodes); + spin_unlock_irq(shost->host_lock); } else if (old_state) - lpfc_nlp_counters(phba, old_state, -1); + lpfc_nlp_counters(vport, old_state, -1); ndlp->nlp_state = state; - lpfc_nlp_counters(phba, state, 1); - lpfc_nlp_state_cleanup(phba, ndlp, old_state, state); + lpfc_nlp_counters(vport, state, 1); + lpfc_nlp_state_cleanup(vport, ndlp, old_state, state); } void -lpfc_dequeue_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_dequeue_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0) - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp)) - lpfc_nlp_counters(phba, ndlp->nlp_state, -1); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_counters(vport, ndlp->nlp_state, -1); + spin_lock_irq(shost->host_lock); list_del_init(&ndlp->nlp_listp); - spin_unlock_irq(phba->host->host_lock); - lpfc_nlp_state_cleanup(phba, ndlp, ndlp->nlp_state, 0); + spin_unlock_irq(shost->host_lock); + lpfc_nlp_state_cleanup(vport, ndlp, ndlp->nlp_state, + NLP_STE_UNUSED_NODE); } void -lpfc_drop_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0) - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp)) - lpfc_nlp_counters(phba, ndlp->nlp_state, -1); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_counters(vport, ndlp->nlp_state, -1); + spin_lock_irq(shost->host_lock); list_del_init(&ndlp->nlp_listp); - spin_unlock_irq(phba->host->host_lock); + ndlp->nlp_flag &= ~NLP_TARGET_REMOVE; + spin_unlock_irq(shost->host_lock); lpfc_nlp_put(ndlp); } @@ -1342,11 +1681,13 @@ * Start / ReStart rescue timer for Discovery / RSCN handling */ void -lpfc_set_disctmo(struct lpfc_hba * phba) +lpfc_set_disctmo(struct lpfc_vport *vport) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; uint32_t tmo; - if (phba->hba_state == LPFC_LOCAL_CFG_LINK) { + if (vport->port_state == LPFC_LOCAL_CFG_LINK) { /* For FAN, timeout should be greater then edtov */ tmo = (((phba->fc_edtov + 999) / 1000) + 1); } else { @@ -1356,18 +1697,25 @@ tmo = ((phba->fc_ratov * 3) + 3); } - mod_timer(&phba->fc_disctmo, jiffies + HZ * tmo); - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_DISC_TMO; - spin_unlock_irq(phba->host->host_lock); + + if (!timer_pending(&vport->fc_disctmo)) { + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "set disc timer: tmo:x%x state:x%x flg:x%x", + tmo, vport->port_state, vport->fc_flag); + } + + mod_timer(&vport->fc_disctmo, jiffies + HZ * tmo); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_DISC_TMO; + spin_unlock_irq(shost->host_lock); /* Start Discovery Timer state */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0247 Start Discovery Timer state x%x " + "%d (%d):0247 Start Discovery Timer state x%x " "Data: x%x x%lx x%x x%x\n", - phba->brd_no, - phba->hba_state, tmo, (unsigned long)&phba->fc_disctmo, - phba->fc_plogi_cnt, phba->fc_adisc_cnt); + phba->brd_no, vport->vpi, vport->port_state, tmo, + (unsigned long)&vport->fc_disctmo, vport->fc_plogi_cnt, + vport->fc_adisc_cnt); return; } @@ -1376,23 +1724,34 @@ * Cancel rescue timer for Discovery / RSCN handling */ int -lpfc_can_disctmo(struct lpfc_hba * phba) +lpfc_can_disctmo(struct lpfc_vport *vport) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + unsigned long iflags; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "can disc timer: state:x%x rtry:x%x flg:x%x", + vport->port_state, vport->fc_ns_retry, vport->fc_flag); + /* Turn off discovery timer if its running */ - if (phba->fc_flag & FC_DISC_TMO) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_DISC_TMO; - spin_unlock_irq(phba->host->host_lock); - del_timer_sync(&phba->fc_disctmo); - phba->work_hba_events &= ~WORKER_DISC_TMO; + if (vport->fc_flag & FC_DISC_TMO) { + spin_lock_irqsave(shost->host_lock, iflags); + vport->fc_flag &= ~FC_DISC_TMO; + spin_unlock_irqrestore(shost->host_lock, iflags); + del_timer_sync(&vport->fc_disctmo); + spin_lock_irqsave(&vport->work_port_lock, iflags); + vport->work_port_events &= ~WORKER_DISC_TMO; + spin_unlock_irqrestore(&vport->work_port_lock, iflags); } /* Cancel Discovery Timer state */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0248 Cancel Discovery Timer state x%x " + "%d (%d):0248 Cancel Discovery Timer state x%x " "Data: x%x x%x x%x\n", - phba->brd_no, phba->hba_state, phba->fc_flag, - phba->fc_plogi_cnt, phba->fc_adisc_cnt); + phba->brd_no, vport->vpi, vport->port_state, + vport->fc_flag, vport->fc_plogi_cnt, + vport->fc_adisc_cnt); return 0; } @@ -1402,15 +1761,18 @@ * Return true if iocb matches the specified nport */ int -lpfc_check_sli_ndlp(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, - struct lpfc_iocbq * iocb, struct lpfc_nodelist * ndlp) +lpfc_check_sli_ndlp(struct lpfc_hba *phba, + struct lpfc_sli_ring *pring, + struct lpfc_iocbq *iocb, + struct lpfc_nodelist *ndlp) { - struct lpfc_sli *psli; - IOCB_t *icmd; + struct lpfc_sli *psli = &phba->sli; + IOCB_t *icmd = &iocb->iocb; + struct lpfc_vport *vport = ndlp->vport; + + if (iocb->vport != vport) + return 0; - psli = &phba->sli; - icmd = &iocb->iocb; if (pring->ringno == LPFC_ELS_RING) { switch (icmd->ulpCommand) { case CMD_GEN_REQUEST64_CR: @@ -1445,7 +1807,7 @@ * associated with nlp_rpi in the LPFC_NODELIST entry. */ static int -lpfc_no_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) +lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) { LIST_HEAD(completions); struct lpfc_sli *psli; @@ -1454,6 +1816,8 @@ IOCB_t *icmd; uint32_t rpi, i; + lpfc_fabric_abort_nport(ndlp); + /* * Everything that matches on txcmplq will be returned * by firmware with a no rpi error. @@ -1465,15 +1829,15 @@ for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { /* * Check to see if iocb matches the nport we are * looking for */ - if ((lpfc_check_sli_ndlp - (phba, pring, iocb, ndlp))) { + if ((lpfc_check_sli_ndlp(phba, pring, iocb, + ndlp))) { /* It matches, so deque and call compl with an error */ list_move_tail(&iocb->list, @@ -1481,22 +1845,22 @@ pring->txq_cnt--; } } - spin_unlock_irq(phba->host->host_lock); - + spin_unlock_irq(&phba->hbalock); } } while (!list_empty(&completions)) { iocb = list_get_first(&completions, struct lpfc_iocbq, list); - list_del(&iocb->list); + list_del_init(&iocb->list); - if (iocb->iocb_cmpl) { + if (!iocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, iocb); + else { icmd = &iocb->iocb; icmd->ulpStatus = IOSTAT_LOCAL_REJECT; icmd->un.ulpWord[4] = IOERR_SLI_ABORTED; - (iocb->iocb_cmpl) (phba, iocb, iocb); - } else - lpfc_sli_release_iocbq(phba, iocb); + (iocb->iocb_cmpl)(phba, iocb, iocb); + } } return 0; @@ -1512,19 +1876,22 @@ * we are waiting to PLOGI back to the remote NPort. */ int -lpfc_unreg_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) +lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { + struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *mbox; int rc; if (ndlp->nlp_rpi) { - if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { - lpfc_unreg_login(phba, ndlp->nlp_rpi, mbox); - mbox->mbox_cmpl=lpfc_sli_def_mbox_cmpl; - rc = lpfc_sli_issue_mbox - (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mbox) { + lpfc_unreg_login(phba, vport->vpi, ndlp->nlp_rpi, mbox); + mbox->vport = vport; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + rc = lpfc_sli_issue_mbox(phba, mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)); if (rc == MBX_NOT_FINISHED) - mempool_free( mbox, phba->mbox_mem_pool); + mempool_free(mbox, phba->mbox_mem_pool); } lpfc_no_rpi(phba, ndlp); ndlp->nlp_rpi = 0; @@ -1533,25 +1900,70 @@ return 0; } +void +lpfc_unreg_all_rpis(struct lpfc_vport *vport) +{ + struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + int rc; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mbox) { + lpfc_unreg_login(phba, vport->vpi, 0xffff, mbox); + mbox->vport = vport; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + rc = lpfc_sli_issue_mbox(phba, mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) { + mempool_free(mbox, phba->mbox_mem_pool); + } + } +} + +void +lpfc_unreg_default_rpis(struct lpfc_vport *vport) +{ + struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + int rc; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mbox) { + lpfc_unreg_did(phba, vport->vpi, 0xffffffff, mbox); + mbox->vport = vport; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + rc = lpfc_sli_issue_mbox(phba, mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) { + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT, + "%d (%d):1815 Could not issue " + "unreg_did (default rpis)\n", + phba->brd_no, vport->vpi); + mempool_free(mbox, phba->mbox_mem_pool); + } + } +} + /* * Free resources associated with LPFC_NODELIST entry * so it can be freed. */ static int -lpfc_cleanup_node(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) +lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { - LPFC_MBOXQ_t *mb; - LPFC_MBOXQ_t *nextmb; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mb, *nextmb; struct lpfc_dmabuf *mp; /* Cleanup node for NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_NODE, - "%d:0900 Cleanup node for NPort x%x " + "%d (%d):0900 Cleanup node for NPort x%x " "Data: x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag, + phba->brd_no, vport->vpi, ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); - lpfc_dequeue_node(phba, ndlp); + lpfc_dequeue_node(vport, ndlp); /* cleanup any ndlp on mbox q waiting for reglogin cmpl */ if ((mb = phba->sli.mbox_active)) { @@ -1562,13 +1974,13 @@ } } - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) && (ndlp == (struct lpfc_nodelist *) mb->context2)) { mp = (struct lpfc_dmabuf *) (mb->context1); if (mp) { - lpfc_mbuf_free(phba, mp->virt, mp->phys); + __lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } list_del(&mb->list); @@ -1576,20 +1988,27 @@ lpfc_nlp_put(ndlp); } } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); lpfc_els_abort(phba,ndlp); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = 0; del_timer_sync(&ndlp->nlp_delayfunc); if (!list_empty(&ndlp->els_retry_evt.evt_listp)) list_del_init(&ndlp->els_retry_evt.evt_listp); + if (!list_empty(&ndlp->dev_loss_evt.evt_listp)) + list_del_init(&ndlp->dev_loss_evt.evt_listp); + + if (!list_empty(&ndlp->dev_loss_evt.evt_listp)) { + list_del_init(&ndlp->dev_loss_evt.evt_listp); + complete((struct completion *)(ndlp->dev_loss_evt.evt_arg2)); + } - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); return 0; } @@ -1600,18 +2019,22 @@ * machine, defer the free till we reach the end of the state machine. */ static void -lpfc_nlp_remove(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { struct lpfc_rport_data *rdata; if (ndlp->nlp_flag & NLP_DELAY_TMO) { - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); } - lpfc_cleanup_node(phba, ndlp); + lpfc_cleanup_node(vport, ndlp); - if ((ndlp->rport) && !(phba->fc_flag & FC_UNLOADING)) { - put_device(&ndlp->rport->dev); + /* + * We can get here with a non-NULL ndlp->rport because when we + * unregister a rport we don't break the rport/node linkage. So if we + * do, make sure we don't leaving any dangling pointers behind. + */ + if (ndlp->rport) { rdata = ndlp->rport->dd_data; rdata->pnode = NULL; ndlp->rport = NULL; @@ -1619,11 +2042,10 @@ } static int -lpfc_matchdid(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did) +lpfc_matchdid(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint32_t did) { - D_ID mydid; - D_ID ndlpdid; - D_ID matchdid; + D_ID mydid, ndlpdid, matchdid; if (did == Bcast_DID) return 0; @@ -1637,7 +2059,7 @@ return 1; /* Next check for area/domain identically equals 0 match */ - mydid.un.word = phba->fc_myDID; + mydid.un.word = vport->fc_myDID; if ((mydid.un.b.domain == 0) && (mydid.un.b.area == 0)) { return 0; } @@ -1669,101 +2091,116 @@ } /* Search for a nodelist entry */ -struct lpfc_nodelist * -lpfc_findnode_did(struct lpfc_hba *phba, uint32_t did) +static struct lpfc_nodelist * +__lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) { + struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; uint32_t data1; - spin_lock_irq(phba->host->host_lock); - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { - if (lpfc_matchdid(phba, ndlp, did)) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (lpfc_matchdid(vport, ndlp, did)) { data1 = (((uint32_t) ndlp->nlp_state << 24) | ((uint32_t) ndlp->nlp_xri << 16) | ((uint32_t) ndlp->nlp_type << 8) | ((uint32_t) ndlp->nlp_rpi & 0xff)); lpfc_printf_log(phba, KERN_INFO, LOG_NODE, - "%d:0929 FIND node DID " + "%d (%d):0929 FIND node DID " " Data: x%p x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, ndlp, ndlp->nlp_DID, ndlp->nlp_flag, data1); - spin_unlock_irq(phba->host->host_lock); return ndlp; } } - spin_unlock_irq(phba->host->host_lock); /* FIND node did NOT FOUND */ lpfc_printf_log(phba, KERN_INFO, LOG_NODE, - "%d:0932 FIND node did x%x NOT FOUND.\n", - phba->brd_no, did); + "%d (%d):0932 FIND node did x%x NOT FOUND.\n", + phba->brd_no, vport->vpi, did); return NULL; } struct lpfc_nodelist * -lpfc_setup_disc_node(struct lpfc_hba * phba, uint32_t did) +lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + + spin_lock_irq(shost->host_lock); + ndlp = __lpfc_findnode_did(vport, did); + spin_unlock_irq(shost->host_lock); + return ndlp; +} + +struct lpfc_nodelist * +lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_nodelist *ndlp; - ndlp = lpfc_findnode_did(phba, did); + ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { - if ((phba->fc_flag & FC_RSCN_MODE) && - ((lpfc_rscn_payload_check(phba, did) == 0))) + if ((vport->fc_flag & FC_RSCN_MODE) != 0 && + lpfc_rscn_payload_check(vport, did) == 0) return NULL; ndlp = (struct lpfc_nodelist *) - mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); if (!ndlp) return NULL; - lpfc_nlp_init(phba, ndlp, did); - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_init(vport, ndlp, did); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; + spin_unlock_irq(shost->host_lock); return ndlp; } - if (phba->fc_flag & FC_RSCN_MODE) { - if (lpfc_rscn_payload_check(phba, did)) { + if (vport->fc_flag & FC_RSCN_MODE) { + if (lpfc_rscn_payload_check(vport, did)) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; + spin_unlock_irq(shost->host_lock); /* Since this node is marked for discovery, * delay timeout is not needed. */ if (ndlp->nlp_flag & NLP_DELAY_TMO) - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); } else ndlp = NULL; } else { if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE || ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) return NULL; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; + spin_unlock_irq(shost->host_lock); } return ndlp; } /* Build a list of nodes to discover based on the loopmap */ void -lpfc_disc_list_loopmap(struct lpfc_hba * phba) +lpfc_disc_list_loopmap(struct lpfc_vport *vport) { + struct lpfc_hba *phba = vport->phba; int j; uint32_t alpa, index; - if (phba->hba_state <= LPFC_LINK_DOWN) { + if (!lpfc_is_link_up(phba)) return; - } - if (phba->fc_topology != TOPOLOGY_LOOP) { + + if (phba->fc_topology != TOPOLOGY_LOOP) return; - } /* Check for loop map present or not */ if (phba->alpa_map[0]) { for (j = 1; j <= phba->alpa_map[0]; j++) { alpa = phba->alpa_map[j]; - - if (((phba->fc_myDID & 0xff) == alpa) || (alpa == 0)) { + if (((vport->fc_myDID & 0xff) == alpa) || (alpa == 0)) continue; - } - lpfc_setup_disc_node(phba, alpa); + lpfc_setup_disc_node(vport, alpa); } } else { /* No alpamap, so try all alpa's */ @@ -1776,113 +2213,167 @@ else index = FC_MAXLOOP - j - 1; alpa = lpfcAlpaArray[index]; - if ((phba->fc_myDID & 0xff) == alpa) { + if ((vport->fc_myDID & 0xff) == alpa) continue; - } - - lpfc_setup_disc_node(phba, alpa); + lpfc_setup_disc_node(vport, alpa); } } return; } -/* Start Link up / RSCN discovery on NPR list */ void -lpfc_disc_start(struct lpfc_hba * phba) +lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport) { - struct lpfc_sli *psli; LPFC_MBOXQ_t *mbox; - struct lpfc_nodelist *ndlp, *next_ndlp; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *extra_ring = &psli->ring[psli->extra_ring]; + struct lpfc_sli_ring *fcp_ring = &psli->ring[psli->fcp_ring]; + struct lpfc_sli_ring *next_ring = &psli->ring[psli->next_ring]; + int rc; + + /* + * if it's not a physical port or if we already send + * clear_la then don't send it. + */ + if ((phba->link_state >= LPFC_CLEAR_LA) || + (vport->port_type != LPFC_PHYSICAL_PORT)) + return; + + /* Link up discovery */ + if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) != NULL) { + phba->link_state = LPFC_CLEAR_LA; + lpfc_clear_la(phba, mbox); + mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; + mbox->vport = vport; + rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | + MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) { + mempool_free(mbox, phba->mbox_mem_pool); + lpfc_disc_flush_list(vport); + extra_ring->flag &= ~LPFC_STOP_IOCB_EVENT; + fcp_ring->flag &= ~LPFC_STOP_IOCB_EVENT; + next_ring->flag &= ~LPFC_STOP_IOCB_EVENT; + phba->link_state = LPFC_HBA_ERROR; + } + } +} + +/* Reg_vpi to tell firmware to resume normal operations */ +void +lpfc_issue_reg_vpi(struct lpfc_hba *phba, struct lpfc_vport *vport) +{ + LPFC_MBOXQ_t *regvpimbox; + + regvpimbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (regvpimbox) { + lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, regvpimbox); + regvpimbox->mbox_cmpl = lpfc_mbx_cmpl_reg_vpi; + regvpimbox->vport = vport; + if (lpfc_sli_issue_mbox(phba, regvpimbox, + (MBX_NOWAIT | MBX_STOP_IOCB)) + == MBX_NOT_FINISHED) { + mempool_free(regvpimbox, phba->mbox_mem_pool); + } + } +} + +/* Start Link up / RSCN discovery on NPR nodes */ +void +lpfc_disc_start(struct lpfc_vport *vport) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; uint32_t num_sent; uint32_t clear_la_pending; int did_changed; - int rc; - - psli = &phba->sli; - if (phba->hba_state <= LPFC_LINK_DOWN) { + if (!lpfc_is_link_up(phba)) return; - } - if (phba->hba_state == LPFC_CLEAR_LA) + + if (phba->link_state == LPFC_CLEAR_LA) clear_la_pending = 1; else clear_la_pending = 0; - if (phba->hba_state < LPFC_HBA_READY) { - phba->hba_state = LPFC_DISC_AUTH; - } - lpfc_set_disctmo(phba); + if (vport->port_state < LPFC_VPORT_READY) + vport->port_state = LPFC_DISC_AUTH; + + lpfc_set_disctmo(vport); - if (phba->fc_prevDID == phba->fc_myDID) { + if (vport->fc_prevDID == vport->fc_myDID) did_changed = 0; - } else { + else did_changed = 1; - } - phba->fc_prevDID = phba->fc_myDID; - phba->num_disc_nodes = 0; + + vport->fc_prevDID = vport->fc_myDID; + vport->num_disc_nodes = 0; /* Start Discovery state */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0202 Start Discovery hba state x%x " + "%d (%d):0202 Start Discovery hba state x%x " "Data: x%x x%x x%x\n", - phba->brd_no, phba->hba_state, phba->fc_flag, - phba->fc_plogi_cnt, phba->fc_adisc_cnt); - - /* If our did changed, we MUST do PLOGI */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { - if (ndlp->nlp_state == NLP_STE_NPR_NODE && - (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 && - did_changed) { - spin_lock_irq(phba->host->host_lock); - ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); - } - } + phba->brd_no, vport->vpi, vport->port_state, + vport->fc_flag, vport->fc_plogi_cnt, + vport->fc_adisc_cnt); /* First do ADISCs - if any */ - num_sent = lpfc_els_disc_adisc(phba); + num_sent = lpfc_els_disc_adisc(vport); if (num_sent) return; - if ((phba->hba_state < LPFC_HBA_READY) && (!clear_la_pending)) { + /* + * For SLI3, cmpl_reg_vpi will set port_state to READY, and + * continue discovery. + */ + if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && + !(vport->fc_flag & FC_RSCN_MODE)) { + lpfc_issue_reg_vpi(phba, vport); + return; + } + + /* + * For SLI2, we need to set port_state to READY and continue + * discovery. + */ + if (vport->port_state < LPFC_VPORT_READY && !clear_la_pending) { /* If we get here, there is nothing to ADISC */ - if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { - phba->hba_state = LPFC_CLEAR_LA; - lpfc_clear_la(phba, mbox); - mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; - rc = lpfc_sli_issue_mbox(phba, mbox, - (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) { - mempool_free( mbox, phba->mbox_mem_pool); - lpfc_disc_flush_list(phba); - psli->ring[(psli->extra_ring)].flag &= - ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->fcp_ring)].flag &= - ~LPFC_STOP_IOCB_EVENT; - psli->ring[(psli->next_ring)].flag &= - ~LPFC_STOP_IOCB_EVENT; - phba->hba_state = LPFC_HBA_READY; + if (vport->port_type == LPFC_PHYSICAL_PORT) + lpfc_issue_clear_la(phba, vport); + + if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) { + vport->num_disc_nodes = 0; + /* go thru NPR nodes and issue ELS PLOGIs */ + if (vport->fc_npr_cnt) + lpfc_els_disc_plogi(vport); + + if (!vport->num_disc_nodes) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_NDISC_ACTIVE; + spin_unlock_irq(shost->host_lock); + lpfc_can_disctmo(vport); } } + vport->port_state = LPFC_VPORT_READY; } else { /* Next do PLOGIs - if any */ - num_sent = lpfc_els_disc_plogi(phba); + num_sent = lpfc_els_disc_plogi(vport); if (num_sent) return; - if (phba->fc_flag & FC_RSCN_MODE) { + if (vport->fc_flag & FC_RSCN_MODE) { /* Check to see if more RSCNs came in while we * were processing this one. */ - if ((phba->fc_rscn_id_cnt == 0) && - (!(phba->fc_flag & FC_RSCN_DISCOVERY))) { - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_RSCN_MODE; - spin_unlock_irq(phba->host->host_lock); + if ((vport->fc_rscn_id_cnt == 0) && + (!(vport->fc_flag & FC_RSCN_DISCOVERY))) { + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_RSCN_MODE; + spin_unlock_irq(shost->host_lock); + lpfc_can_disctmo(vport); } else - lpfc_els_handle_rscn(phba); + lpfc_els_handle_rscn(vport); } } return; @@ -1893,7 +2384,7 @@ * ring the match the sppecified nodelist. */ static void -lpfc_free_tx(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) +lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) { LIST_HEAD(completions); struct lpfc_sli *psli; @@ -1907,7 +2398,7 @@ /* Error matching iocb on txq or txcmplq * First check the txq. */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { if (iocb->context1 != ndlp) { continue; @@ -1927,36 +2418,36 @@ continue; } icmd = &iocb->iocb; - if ((icmd->ulpCommand == CMD_ELS_REQUEST64_CR) || - (icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX)) { + if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR || + icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX) { lpfc_sli_issue_abort_iotag(phba, pring, iocb); } } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); while (!list_empty(&completions)) { iocb = list_get_first(&completions, struct lpfc_iocbq, list); - list_del(&iocb->list); + list_del_init(&iocb->list); - if (iocb->iocb_cmpl) { + if (!iocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, iocb); + else { icmd = &iocb->iocb; icmd->ulpStatus = IOSTAT_LOCAL_REJECT; icmd->un.ulpWord[4] = IOERR_SLI_ABORTED; (iocb->iocb_cmpl) (phba, iocb, iocb); - } else - lpfc_sli_release_iocbq(phba, iocb); } - - return; + } } void -lpfc_disc_flush_list(struct lpfc_hba * phba) +lpfc_disc_flush_list(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp, *next_ndlp; + struct lpfc_hba *phba = vport->phba; - if (phba->fc_plogi_cnt || phba->fc_adisc_cnt) { - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, + if (vport->fc_plogi_cnt || vport->fc_adisc_cnt) { + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE || ndlp->nlp_state == NLP_STE_ADISC_ISSUE) { @@ -1967,6 +2458,14 @@ } } +void +lpfc_cleanup_discovery_resources(struct lpfc_vport *vport) +{ + lpfc_els_flush_rscn(vport); + lpfc_els_flush_cmd(vport); + lpfc_disc_flush_list(vport); +} + /*****************************************************************************/ /* * NAME: lpfc_disc_timeout @@ -1985,158 +2484,154 @@ void lpfc_disc_timeout(unsigned long ptr) { - struct lpfc_hba *phba = (struct lpfc_hba *)ptr; + struct lpfc_vport *vport = (struct lpfc_vport *) ptr; + struct lpfc_hba *phba = vport->phba; unsigned long flags = 0; if (unlikely(!phba)) return; - spin_lock_irqsave(phba->host->host_lock, flags); - if (!(phba->work_hba_events & WORKER_DISC_TMO)) { - phba->work_hba_events |= WORKER_DISC_TMO; + if ((vport->work_port_events & WORKER_DISC_TMO) == 0) { + spin_lock_irqsave(&vport->work_port_lock, flags); + vport->work_port_events |= WORKER_DISC_TMO; + spin_unlock_irqrestore(&vport->work_port_lock, flags); + + spin_lock_irqsave(&phba->hbalock, flags); if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, flags); } - spin_unlock_irqrestore(phba->host->host_lock, flags); return; } static void -lpfc_disc_timeout_handler(struct lpfc_hba *phba) +lpfc_disc_timeout_handler(struct lpfc_vport *vport) { - struct lpfc_sli *psli; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; struct lpfc_nodelist *ndlp, *next_ndlp; - LPFC_MBOXQ_t *clearlambox, *initlinkmbox; + LPFC_MBOXQ_t *initlinkmbox; int rc, clrlaerr = 0; - if (unlikely(!phba)) - return; - - if (!(phba->fc_flag & FC_DISC_TMO)) + if (!(vport->fc_flag & FC_DISC_TMO)) return; - psli = &phba->sli; + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_DISC_TMO; + spin_unlock_irq(shost->host_lock); - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_DISC_TMO; - spin_unlock_irq(phba->host->host_lock); + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "disc timeout: state:x%x rtry:x%x flg:x%x", + vport->port_state, vport->fc_ns_retry, vport->fc_flag); - switch (phba->hba_state) { + switch (vport->port_state) { case LPFC_LOCAL_CFG_LINK: - /* hba_state is identically LPFC_LOCAL_CFG_LINK while waiting for FAN */ + /* port_state is identically LPFC_LOCAL_CFG_LINK while waiting for + * FAN + */ /* FAN timeout */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_DISCOVERY, - "%d:0221 FAN timeout\n", - phba->brd_no); + lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY, + "%d (%d):0221 FAN timeout\n", + phba->brd_no, vport->vpi); /* Start discovery by sending FLOGI, clean up old rpis */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state != NLP_STE_NPR_NODE) continue; if (ndlp->nlp_type & NLP_FABRIC) { /* Clean up the ndlp on Fabric connections */ - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) { /* Fail outstanding IO now since device * is marked for PLOGI. */ - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); } } - phba->hba_state = LPFC_FLOGI; - lpfc_set_disctmo(phba); - lpfc_initial_flogi(phba); + if (vport->port_state != LPFC_FLOGI) { + vport->port_state = LPFC_FLOGI; + lpfc_set_disctmo(vport); + lpfc_initial_flogi(vport); + } break; + case LPFC_FDISC: case LPFC_FLOGI: - /* hba_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */ + /* port_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */ /* Initial FLOGI timeout */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_DISCOVERY, - "%d:0222 Initial FLOGI timeout\n", - phba->brd_no); + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0222 Initial %s timeout\n", + phba->brd_no, vport->vpi, + vport->vpi ? "FLOGI" : "FDISC"); /* Assume no Fabric and go on with discovery. * Check for outstanding ELS FLOGI to abort. */ /* FLOGI failed, so just use loop map to make discovery list */ - lpfc_disc_list_loopmap(phba); + lpfc_disc_list_loopmap(vport); /* Start discovery */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); break; case LPFC_FABRIC_CFG_LINK: /* hba_state is identically LPFC_FABRIC_CFG_LINK while waiting for NameServer login */ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0223 Timeout while waiting for NameServer " - "login\n", phba->brd_no); + "%d (%d):0223 Timeout while waiting for " + "NameServer login\n", + phba->brd_no, vport->vpi); /* Next look for NameServer ndlp */ - ndlp = lpfc_findnode_did(phba, NameServer_DID); + ndlp = lpfc_findnode_did(vport, NameServer_DID); if (ndlp) lpfc_nlp_put(ndlp); /* Start discovery */ - lpfc_disc_start(phba); + lpfc_disc_start(vport); break; case LPFC_NS_QRY: /* Check for wait for NameServer Rsp timeout */ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0224 NameServer Query timeout " + "%d (%d):0224 NameServer Query timeout " "Data: x%x x%x\n", - phba->brd_no, - phba->fc_ns_retry, LPFC_MAX_NS_RETRY); + phba->brd_no, vport->vpi, + vport->fc_ns_retry, LPFC_MAX_NS_RETRY); - ndlp = lpfc_findnode_did(phba, NameServer_DID); - if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { - if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) { + if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) { /* Try it one more time */ - rc = lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT); + vport->fc_ns_retry++; + rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, + vport->fc_ns_retry, 0); if (rc == 0) break; } - phba->fc_ns_retry = 0; - } - - /* Nothing to authenticate, so CLEAR_LA right now */ - clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!clearlambox) { - clrlaerr = 1; - lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0226 Device Discovery " - "completion error\n", - phba->brd_no); - phba->hba_state = LPFC_HBA_ERROR; - break; - } + vport->fc_ns_retry = 0; - phba->hba_state = LPFC_CLEAR_LA; - lpfc_clear_la(phba, clearlambox); - clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; - rc = lpfc_sli_issue_mbox(phba, clearlambox, - (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) { - mempool_free(clearlambox, phba->mbox_mem_pool); - clrlaerr = 1; - break; + /* + * Discovery is over. + * set port_state to PORT_READY if SLI2. + * cmpl_reg_vpi will set port_state to READY for SLI3. + */ + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) + lpfc_issue_reg_vpi(phba, vport); + else { /* NPIV Not enabled */ + lpfc_issue_clear_la(phba, vport); + vport->port_state = LPFC_VPORT_READY; } /* Setup and issue mailbox INITIALIZE LINK command */ initlinkmbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!initlinkmbox) { lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0206 Device Discovery " + "%d (%d):0206 Device Discovery " "completion error\n", - phba->brd_no); - phba->hba_state = LPFC_HBA_ERROR; + phba->brd_no, vport->vpi); + phba->link_state = LPFC_HBA_ERROR; break; } @@ -2144,6 +2639,8 @@ lpfc_init_link(phba, initlinkmbox, phba->cfg_topology, phba->cfg_link_speed); initlinkmbox->mb.un.varInitLnk.lipsr_AL_PA = 0; + initlinkmbox->vport = vport; + initlinkmbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; rc = lpfc_sli_issue_mbox(phba, initlinkmbox, (MBX_NOWAIT | MBX_STOP_IOCB)); lpfc_set_loopback_flag(phba); @@ -2154,67 +2651,81 @@ case LPFC_DISC_AUTH: /* Node Authentication timeout */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_DISCOVERY, - "%d:0227 Node Authentication timeout\n", - phba->brd_no); - lpfc_disc_flush_list(phba); - clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!clearlambox) { - clrlaerr = 1; lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0207 Device Discovery " - "completion error\n", - phba->brd_no); - phba->hba_state = LPFC_HBA_ERROR; - break; + "%d (%d):0227 Node Authentication timeout\n", + phba->brd_no, vport->vpi); + lpfc_disc_flush_list(vport); + + /* + * set port_state to PORT_READY if SLI2. + * cmpl_reg_vpi will set port_state to READY for SLI3. + */ + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) + lpfc_issue_reg_vpi(phba, vport); + else { /* NPIV Not enabled */ + lpfc_issue_clear_la(phba, vport); + vport->port_state = LPFC_VPORT_READY; } - phba->hba_state = LPFC_CLEAR_LA; - lpfc_clear_la(phba, clearlambox); - clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; - rc = lpfc_sli_issue_mbox(phba, clearlambox, - (MBX_NOWAIT | MBX_STOP_IOCB)); - if (rc == MBX_NOT_FINISHED) { - mempool_free(clearlambox, phba->mbox_mem_pool); - clrlaerr = 1; + break; + + case LPFC_VPORT_READY: + if (vport->fc_flag & FC_RSCN_MODE) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0231 RSCN timeout Data: x%x " + "x%x\n", + phba->brd_no, vport->vpi, + vport->fc_ns_retry, LPFC_MAX_NS_RETRY); + + /* Cleanup any outstanding ELS commands */ + lpfc_els_flush_cmd(vport); + + lpfc_els_flush_rscn(vport); + lpfc_disc_flush_list(vport); } break; + default: + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0229 Unexpected discovery timeout, " + "vport State x%x\n", + phba->brd_no, vport->vpi, vport->port_state); + + break; + } + + switch (phba->link_state) { case LPFC_CLEAR_LA: /* CLEAR LA timeout */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_DISCOVERY, - "%d:0228 CLEAR LA timeout\n", - phba->brd_no); + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0228 CLEAR LA timeout\n", + phba->brd_no, vport->vpi); clrlaerr = 1; break; - case LPFC_HBA_READY: - if (phba->fc_flag & FC_RSCN_MODE) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_DISCOVERY, - "%d:0231 RSCN timeout Data: x%x x%x\n", - phba->brd_no, - phba->fc_ns_retry, LPFC_MAX_NS_RETRY); - - /* Cleanup any outstanding ELS commands */ - lpfc_els_flush_cmd(phba); + case LPFC_LINK_UNKNOWN: + case LPFC_WARM_START: + case LPFC_INIT_START: + case LPFC_INIT_MBX_CMDS: + case LPFC_LINK_DOWN: + case LPFC_LINK_UP: + case LPFC_HBA_ERROR: + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0230 Unexpected timeout, hba link " + "state x%x\n", + phba->brd_no, vport->vpi, phba->link_state); + clrlaerr = 1; + break; - lpfc_els_flush_rscn(phba); - lpfc_disc_flush_list(phba); - } + case LPFC_HBA_READY: break; } if (clrlaerr) { - lpfc_disc_flush_list(phba); + lpfc_disc_flush_list(vport); psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; - phba->hba_state = LPFC_HBA_READY; + vport->port_state = LPFC_VPORT_READY; } return; @@ -2227,37 +2738,29 @@ * handed off to the SLI layer. */ void -lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_sli *psli; - MAILBOX_t *mb; - struct lpfc_dmabuf *mp; - struct lpfc_nodelist *ndlp; - - psli = &phba->sli; - mb = &pmb->mb; - - ndlp = (struct lpfc_nodelist *) pmb->context2; - mp = (struct lpfc_dmabuf *) (pmb->context1); + MAILBOX_t *mb = &pmb->mb; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_vport *vport = pmb->vport; pmb->context1 = NULL; ndlp->nlp_rpi = mb->un.varWords[0]; ndlp->nlp_type |= NLP_FABRIC; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); - /* Start issuing Fabric-Device Management Interface (FDMI) - * command to 0xfffffa (FDMI well known port) - */ - if (phba->cfg_fdmi_on == 1) { - lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA); - } else { /* - * Delay issuing FDMI command if fdmi-on=2 - * (supporting RPA/hostnmae) + * Start issuing Fabric-Device Management Interface (FDMI) command to + * 0xfffffa (FDMI well known port) or Delay issuing FDMI command if + * fdmi-on=2 (supporting RPA/hostnmae) */ - mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60); - } + + if (phba->cfg_fdmi_on == 1) + lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA); + else + mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60); /* Mailbox took a reference to the node */ lpfc_nlp_put(ndlp); @@ -2283,16 +2786,12 @@ sizeof(ndlp->nlp_portname)) == 0; } -/* - * Search node lists for a remote port matching filter criteria - * Caller needs to hold host_lock before calling this routine. - */ struct lpfc_nodelist * -__lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param) +__lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param) { struct lpfc_nodelist *ndlp; - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state != NLP_STE_UNUSED_NODE && filter(ndlp, param)) return ndlp; @@ -2302,68 +2801,104 @@ /* * Search node lists for a remote port matching filter criteria - * This routine is used when the caller does NOT have host_lock. + * Caller needs to hold host_lock before calling this routine. */ struct lpfc_nodelist * -lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param) +lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_nodelist *ndlp; - spin_lock_irq(phba->host->host_lock); - ndlp = __lpfc_find_node(phba, filter, param); - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + ndlp = __lpfc_find_node(vport, filter, param); + spin_unlock_irq(shost->host_lock); return ndlp; } /* * This routine looks up the ndlp lists for the given RPI. If rpi found it - * returns the node list pointer else return NULL. + * returns the node list element pointer else return NULL. */ struct lpfc_nodelist * -__lpfc_findnode_rpi(struct lpfc_hba *phba, uint16_t rpi) +__lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi) { - return __lpfc_find_node(phba, lpfc_filter_by_rpi, &rpi); + return __lpfc_find_node(vport, lpfc_filter_by_rpi, &rpi); } struct lpfc_nodelist * -lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi) +lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_nodelist *ndlp; - spin_lock_irq(phba->host->host_lock); - ndlp = __lpfc_findnode_rpi(phba, rpi); - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); + ndlp = __lpfc_findnode_rpi(vport, rpi); + spin_unlock_irq(shost->host_lock); return ndlp; } /* * This routine looks up the ndlp lists for the given WWPN. If WWPN found it - * returns the node list pointer else return NULL. + * returns the node element list pointer else return NULL. */ struct lpfc_nodelist * -lpfc_findnode_wwpn(struct lpfc_hba *phba, struct lpfc_name *wwpn) +lpfc_findnode_wwpn(struct lpfc_vport *vport, struct lpfc_name *wwpn) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_nodelist *ndlp; - spin_lock_irq(phba->host->host_lock); - ndlp = __lpfc_find_node(phba, lpfc_filter_by_wwpn, wwpn); - spin_unlock_irq(phba->host->host_lock); - return NULL; + spin_lock_irq(shost->host_lock); + ndlp = __lpfc_find_node(vport, lpfc_filter_by_wwpn, wwpn); + spin_unlock_irq(shost->host_lock); + return ndlp; } void -lpfc_nlp_init(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did) +lpfc_dev_loss_delay(unsigned long ptr) +{ + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr; + struct lpfc_vport *vport = ndlp->vport; + struct lpfc_hba *phba = vport->phba; + struct lpfc_work_evt *evtp = &ndlp->dev_loss_evt; + unsigned long flags; + + evtp = &ndlp->dev_loss_evt; + + spin_lock_irqsave(&phba->hbalock, flags); + if (!list_empty(&evtp->evt_listp)) { + spin_unlock_irqrestore(&phba->hbalock, flags); + return; + } + + evtp->evt_arg1 = ndlp; + evtp->evt = LPFC_EVT_DEV_LOSS_DELAY; + list_add_tail(&evtp->evt_listp, &phba->work_list); + if (phba->work_wait) + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, flags); + return; +} + +void +lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint32_t did) { memset(ndlp, 0, sizeof (struct lpfc_nodelist)); INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); + INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp); init_timer(&ndlp->nlp_delayfunc); ndlp->nlp_delayfunc.function = lpfc_els_retry_delay; ndlp->nlp_delayfunc.data = (unsigned long)ndlp; ndlp->nlp_DID = did; - ndlp->nlp_phba = phba; + ndlp->vport = vport; ndlp->nlp_sid = NLP_NO_SID; INIT_LIST_HEAD(&ndlp->nlp_listp); kref_init(&ndlp->kref); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE, + "node init: did:x%x", + ndlp->nlp_DID, 0, 0); + return; } @@ -2372,8 +2907,13 @@ { struct lpfc_nodelist *ndlp = container_of(kref, struct lpfc_nodelist, kref); - lpfc_nlp_remove(ndlp->nlp_phba, ndlp); - mempool_free(ndlp, ndlp->nlp_phba->nlp_mem_pool); + + lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, + "node release: did:x%x flg:x%x type:x%x", + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type); + + lpfc_nlp_remove(ndlp->vport, ndlp); + mempool_free(ndlp, ndlp->vport->phba->nlp_mem_pool); } struct lpfc_nodelist * diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hw.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hw.h 2007-12-21 15:36:12.000000000 -0500 @@ -59,6 +59,12 @@ #define SLI2_IOCB_CMD_R3XTRA_ENTRIES 24 #define SLI2_IOCB_RSP_R3XTRA_ENTRIES 32 +#define SLI2_IOCB_CMD_SIZE 32 +#define SLI2_IOCB_RSP_SIZE 32 +#define SLI3_IOCB_CMD_SIZE 128 +#define SLI3_IOCB_RSP_SIZE 64 + + /* Common Transport structures and definitions */ union CtRevisionId { @@ -79,6 +85,9 @@ uint32_t word; }; +#define FC4_FEATURE_INIT 0x2 +#define FC4_FEATURE_TARGET 0x1 + struct lpfc_sli_ct_request { /* Structure is in Big Endian format */ union CtRevisionId RevisionId; @@ -121,20 +130,6 @@ uint32_t rsvd[7]; } rft; - struct rff { - uint32_t PortId; - uint8_t reserved[2]; -#ifdef __BIG_ENDIAN_BITFIELD - uint8_t feature_res:6; - uint8_t feature_init:1; - uint8_t feature_tgt:1; -#else /* __LITTLE_ENDIAN_BITFIELD */ - uint8_t feature_tgt:1; - uint8_t feature_init:1; - uint8_t feature_res:6; -#endif - uint8_t type_code; /* type=8 for FCP */ - } rff; struct rnn { uint32_t PortId; /* For RNN_ID requests */ uint8_t wwnn[8]; @@ -144,15 +139,42 @@ uint8_t len; uint8_t symbname[255]; } rsnn; + struct rspn { /* For RSPN_ID requests */ + uint32_t PortId; + uint8_t len; + uint8_t symbname[255]; + } rspn; + struct gff { + uint32_t PortId; + } gff; + struct gff_acc { + uint8_t fbits[128]; + } gff_acc; +#define FCP_TYPE_FEATURE_OFFSET 4 + struct rff { + uint32_t PortId; + uint8_t reserved[2]; + uint8_t fbits; + uint8_t type_code; /* type=8 for FCP */ + } rff; } un; }; #define SLI_CT_REVISION 1 -#define GID_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 260) -#define RFT_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 228) -#define RFF_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 235) -#define RNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 252) -#define RSNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request)) +#define GID_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct gid)) +#define GFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct gff)) +#define RFT_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct rft)) +#define RFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct rff)) +#define RNN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct rnn)) +#define RSNN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct rsnn)) +#define RSPN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ + sizeof(struct rspn)) /* * FsType Definitions @@ -227,6 +249,7 @@ #define SLI_CTNS_GFT_ID 0x0117 #define SLI_CTNS_GSPN_ID 0x0118 #define SLI_CTNS_GPT_ID 0x011A +#define SLI_CTNS_GFF_ID 0x011F #define SLI_CTNS_GID_PN 0x0121 #define SLI_CTNS_GID_NN 0x0131 #define SLI_CTNS_GIP_NN 0x0135 @@ -240,9 +263,9 @@ #define SLI_CTNS_RNN_ID 0x0213 #define SLI_CTNS_RCS_ID 0x0214 #define SLI_CTNS_RFT_ID 0x0217 -#define SLI_CTNS_RFF_ID 0x021F #define SLI_CTNS_RSPN_ID 0x0218 #define SLI_CTNS_RPT_ID 0x021A +#define SLI_CTNS_RFF_ID 0x021F #define SLI_CTNS_RIP_NN 0x0235 #define SLI_CTNS_RIPA_NN 0x0236 #define SLI_CTNS_RSNN_NN 0x0239 @@ -311,9 +334,9 @@ uint8_t bbCreditlsb; /* FC Word 0, byte 3 */ #ifdef __BIG_ENDIAN_BITFIELD - uint16_t increasingOffset:1; /* FC Word 1, bit 31 */ + uint16_t request_multiple_Nport:1; /* FC Word 1, bit 31 */ uint16_t randomOffset:1; /* FC Word 1, bit 30 */ - uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */ + uint16_t response_multiple_NPort:1; /* FC Word 1, bit 29 */ uint16_t fPort:1; /* FC Word 1, bit 28 */ uint16_t altBbCredit:1; /* FC Word 1, bit 27 */ uint16_t edtovResolution:1; /* FC Word 1, bit 26 */ @@ -332,9 +355,9 @@ uint16_t edtovResolution:1; /* FC Word 1, bit 26 */ uint16_t altBbCredit:1; /* FC Word 1, bit 27 */ uint16_t fPort:1; /* FC Word 1, bit 28 */ - uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */ + uint16_t response_multiple_NPort:1; /* FC Word 1, bit 29 */ uint16_t randomOffset:1; /* FC Word 1, bit 30 */ - uint16_t increasingOffset:1; /* FC Word 1, bit 31 */ + uint16_t request_multiple_Nport:1; /* FC Word 1, bit 31 */ uint16_t payloadlength:1; /* FC Word 1, bit 16 */ uint16_t contIncSeqCnt:1; /* FC Word 1, bit 17 */ @@ -1255,7 +1278,9 @@ #define MBX_KILL_BOARD 0x24 #define MBX_CONFIG_FARP 0x25 #define MBX_BEACON 0x2A +#define MBX_HEARTBEAT 0x31 +#define MBX_CONFIG_HBQ 0x7C #define MBX_LOAD_AREA 0x81 #define MBX_RUN_BIU_DIAG64 0x84 #define MBX_CONFIG_PORT 0x88 @@ -1263,6 +1288,10 @@ #define MBX_READ_RPI64 0x8F #define MBX_REG_LOGIN64 0x93 #define MBX_READ_LA64 0x95 +#define MBX_REG_VPI 0x96 +#define MBX_UNREG_VPI 0x97 +#define MBX_REG_VNPID 0x96 +#define MBX_UNREG_VNPID 0x97 #define MBX_FLASH_WR_ULA 0x98 #define MBX_SET_DEBUG 0x99 @@ -1335,6 +1364,10 @@ #define CMD_FCP_TRECEIVE64_CX 0xA1 #define CMD_FCP_TRSP64_CX 0xA3 +#define CMD_IOCB_RCV_SEQ64_CX 0xB5 +#define CMD_IOCB_RCV_ELS64_CX 0xB7 +#define CMD_IOCB_RCV_CONT64_CX 0xBB + #define CMD_GEN_REQUEST64_CR 0xC2 #define CMD_GEN_REQUEST64_CX 0xC3 @@ -1561,6 +1594,7 @@ #define FLAGS_TOPOLOGY_MODE_PT_PT 0x02 /* Attempt pt-pt only */ #define FLAGS_TOPOLOGY_MODE_LOOP 0x04 /* Attempt loop only */ #define FLAGS_TOPOLOGY_MODE_PT_LOOP 0x06 /* Attempt pt-pt then loop */ +#define FLAGS_UNREG_LOGIN_ALL 0x08 /* UNREG_LOGIN all on link down */ #define FLAGS_LIRP_LILP 0x80 /* LIRP / LILP is disabled */ #define FLAGS_TOPOLOGY_FAILOVER 0x0400 /* Bit 10 */ @@ -1744,8 +1778,6 @@ #define LMT_4Gb 0x040 #define LMT_8Gb 0x080 #define LMT_10Gb 0x100 - - uint32_t rsvd2; uint32_t rsvd3; uint32_t max_xri; @@ -1754,7 +1786,10 @@ uint32_t avail_xri; uint32_t avail_iocb; uint32_t avail_rpi; - uint32_t default_rpi; + uint32_t max_vpi; + uint32_t rsvd4; + uint32_t rsvd5; + uint32_t avail_vpi; } READ_CONFIG_VAR; /* Structure for MB Command READ_RCONFIG (12) */ @@ -1818,6 +1853,13 @@ structure */ struct ulp_bde64 sp64; } un; +#ifdef __BIG_ENDIAN_BITFIELD + uint16_t rsvd3; + uint16_t vpi; +#else /* __LITTLE_ENDIAN_BITFIELD */ + uint16_t vpi; + uint16_t rsvd3; +#endif } READ_SPARM_VAR; /* Structure for MB Command READ_STATUS (14) */ @@ -1918,11 +1960,17 @@ #ifdef __BIG_ENDIAN_BITFIELD uint32_t cv:1; uint32_t rr:1; - uint32_t rsvd1:29; + uint32_t rsvd2:2; + uint32_t v3req:1; + uint32_t v3rsp:1; + uint32_t rsvd1:25; uint32_t rv:1; #else /* __LITTLE_ENDIAN_BITFIELD */ uint32_t rv:1; - uint32_t rsvd1:29; + uint32_t rsvd1:25; + uint32_t v3rsp:1; + uint32_t v3req:1; + uint32_t rsvd2:2; uint32_t rr:1; uint32_t cv:1; #endif @@ -1972,8 +2020,8 @@ uint8_t sli1FwName[16]; uint32_t sli2FwRev; uint8_t sli2FwName[16]; - uint32_t rsvd2; - uint32_t RandomData[7]; + uint32_t sli3Feat; + uint32_t RandomData[6]; } READ_REV_VAR; /* Structure for MB Command READ_LINK_STAT (18) */ @@ -2013,6 +2061,14 @@ struct ulp_bde64 sp64; } un; +#ifdef __BIG_ENDIAN_BITFIELD + uint16_t rsvd6; + uint16_t vpi; +#else /* __LITTLE_ENDIAN_BITFIELD */ + uint16_t vpi; + uint16_t rsvd6; +#endif + } REG_LOGIN_VAR; /* Word 30 contents for REG_LOGIN */ @@ -2037,16 +2093,78 @@ #ifdef __BIG_ENDIAN_BITFIELD uint16_t rsvd1; uint16_t rpi; + uint32_t rsvd2; + uint32_t rsvd3; + uint32_t rsvd4; + uint32_t rsvd5; + uint16_t rsvd6; + uint16_t vpi; #else /* __LITTLE_ENDIAN_BITFIELD */ uint16_t rpi; uint16_t rsvd1; + uint32_t rsvd2; + uint32_t rsvd3; + uint32_t rsvd4; + uint32_t rsvd5; + uint16_t vpi; + uint16_t rsvd6; #endif } UNREG_LOGIN_VAR; +/* Structure for MB Command REG_VPI (0x96) */ +typedef struct { +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd1; + uint32_t rsvd2:8; + uint32_t sid:24; + uint32_t rsvd3; + uint32_t rsvd4; + uint32_t rsvd5; + uint16_t rsvd6; + uint16_t vpi; +#else /* __LITTLE_ENDIAN */ + uint32_t rsvd1; + uint32_t sid:24; + uint32_t rsvd2:8; + uint32_t rsvd3; + uint32_t rsvd4; + uint32_t rsvd5; + uint16_t vpi; + uint16_t rsvd6; +#endif +} REG_VPI_VAR; + +/* Structure for MB Command UNREG_VPI (0x97) */ +typedef struct { + uint32_t rsvd1; + uint32_t rsvd2; + uint32_t rsvd3; + uint32_t rsvd4; + uint32_t rsvd5; +#ifdef __BIG_ENDIAN_BITFIELD + uint16_t rsvd6; + uint16_t vpi; +#else /* __LITTLE_ENDIAN */ + uint16_t vpi; + uint16_t rsvd6; +#endif +} UNREG_VPI_VAR; + /* Structure for MB Command UNREG_D_ID (0x23) */ typedef struct { uint32_t did; + uint32_t rsvd2; + uint32_t rsvd3; + uint32_t rsvd4; + uint32_t rsvd5; +#ifdef __BIG_ENDIAN_BITFIELD + uint16_t rsvd6; + uint16_t vpi; +#else + uint16_t vpi; + uint16_t rsvd6; +#endif } UNREG_D_ID_VAR; /* Structure for MB Command READ_LA (21) */ @@ -2178,13 +2296,240 @@ #define DMP_RSP_OFFSET 0x14 /* word 5 contains first word of rsp */ #define DMP_RSP_SIZE 0x6C /* maximum of 27 words of rsp data */ -/* Structure for MB Command CONFIG_PORT (0x88) */ +struct hbq_mask { +#ifdef __BIG_ENDIAN_BITFIELD + uint8_t tmatch; + uint8_t tmask; + uint8_t rctlmatch; + uint8_t rctlmask; +#else /* __LITTLE_ENDIAN */ + uint8_t rctlmask; + uint8_t rctlmatch; + uint8_t tmask; + uint8_t tmatch; +#endif +}; + +/* Structure for MB Command CONFIG_HBQ (7c) */ + +struct config_hbq_var { +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd1 :7; + uint32_t recvNotify :1; /* Receive Notification */ + uint32_t numMask :8; /* # Mask Entries */ + uint32_t profile :8; /* Selection Profile */ + uint32_t rsvd2 :8; +#else /* __LITTLE_ENDIAN */ + uint32_t rsvd2 :8; + uint32_t profile :8; /* Selection Profile */ + uint32_t numMask :8; /* # Mask Entries */ + uint32_t recvNotify :1; /* Receive Notification */ + uint32_t rsvd1 :7; +#endif + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t hbqId :16; + uint32_t rsvd3 :12; + uint32_t ringMask :4; +#else /* __LITTLE_ENDIAN */ + uint32_t ringMask :4; + uint32_t rsvd3 :12; + uint32_t hbqId :16; +#endif + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t entry_count :16; + uint32_t rsvd4 :8; + uint32_t headerLen :8; +#else /* __LITTLE_ENDIAN */ + uint32_t headerLen :8; + uint32_t rsvd4 :8; + uint32_t entry_count :16; +#endif + + uint32_t hbqaddrLow; + uint32_t hbqaddrHigh; + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd5 :31; + uint32_t logEntry :1; +#else /* __LITTLE_ENDIAN */ + uint32_t logEntry :1; + uint32_t rsvd5 :31; +#endif + + uint32_t rsvd6; /* w7 */ + uint32_t rsvd7; /* w8 */ + uint32_t rsvd8; /* w9 */ + + struct hbq_mask hbqMasks[6]; + + + union { + uint32_t allprofiles[12]; + + struct { + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t seqlenoff :16; + uint32_t maxlen :16; + #else /* __LITTLE_ENDIAN */ + uint32_t maxlen :16; + uint32_t seqlenoff :16; + #endif + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd1 :28; + uint32_t seqlenbcnt :4; + #else /* __LITTLE_ENDIAN */ + uint32_t seqlenbcnt :4; + uint32_t rsvd1 :28; + #endif + uint32_t rsvd[10]; + } profile2; + + struct { + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t seqlenoff :16; + uint32_t maxlen :16; + #else /* __LITTLE_ENDIAN */ + uint32_t maxlen :16; + uint32_t seqlenoff :16; + #endif + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t cmdcodeoff :28; + uint32_t rsvd1 :12; + uint32_t seqlenbcnt :4; + #else /* __LITTLE_ENDIAN */ + uint32_t seqlenbcnt :4; + uint32_t rsvd1 :12; + uint32_t cmdcodeoff :28; + #endif + uint32_t cmdmatch[8]; + + uint32_t rsvd[2]; + } profile3; + + struct { + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t seqlenoff :16; + uint32_t maxlen :16; + #else /* __LITTLE_ENDIAN */ + uint32_t maxlen :16; + uint32_t seqlenoff :16; + #endif + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t cmdcodeoff :28; + uint32_t rsvd1 :12; + uint32_t seqlenbcnt :4; + #else /* __LITTLE_ENDIAN */ + uint32_t seqlenbcnt :4; + uint32_t rsvd1 :12; + uint32_t cmdcodeoff :28; + #endif + uint32_t cmdmatch[8]; + + uint32_t rsvd[2]; + } profile5; + + } profiles; + +}; + + + +/* Structure for MB Command CONFIG_PORT (0x88) */ typedef struct { - uint32_t pcbLen; +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t cBE : 1; + uint32_t cET : 1; + uint32_t cHpcb : 1; + uint32_t cMA : 1; + uint32_t sli_mode : 4; + uint32_t pcbLen : 24; /* bit 23:0 of memory based port + * config block */ +#else /* __LITTLE_ENDIAN */ + uint32_t pcbLen : 24; /* bit 23:0 of memory based port + * config block */ + uint32_t sli_mode : 4; + uint32_t cMA : 1; + uint32_t cHpcb : 1; + uint32_t cET : 1; + uint32_t cBE : 1; +#endif + uint32_t pcbLow; /* bit 31:0 of memory based port config block */ uint32_t pcbHigh; /* bit 63:32 of memory based port config block */ - uint32_t hbainit[5]; + uint32_t hbainit[6]; + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd : 24; /* Reserved */ + uint32_t cmv : 1; /* Configure Max VPIs */ + uint32_t ccrp : 1; /* Config Command Ring Polling */ + uint32_t csah : 1; /* Configure Synchronous Abort Handling */ + uint32_t chbs : 1; /* Cofigure Host Backing store */ + uint32_t cinb : 1; /* Enable Interrupt Notification Block */ + uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ + uint32_t cmx : 1; /* Configure Max XRIs */ + uint32_t cmr : 1; /* Configure Max RPIs */ +#else /* __LITTLE_ENDIAN */ + uint32_t cmr : 1; /* Configure Max RPIs */ + uint32_t cmx : 1; /* Configure Max XRIs */ + uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ + uint32_t cinb : 1; /* Enable Interrupt Notification Block */ + uint32_t chbs : 1; /* Cofigure Host Backing store */ + uint32_t csah : 1; /* Configure Synchronous Abort Handling */ + uint32_t ccrp : 1; /* Config Command Ring Polling */ + uint32_t cmv : 1; /* Configure Max VPIs */ + uint32_t rsvd : 24; /* Reserved */ +#endif +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd2 : 24; /* Reserved */ + uint32_t gmv : 1; /* Grant Max VPIs */ + uint32_t gcrp : 1; /* Grant Command Ring Polling */ + uint32_t gsah : 1; /* Grant Synchronous Abort Handling */ + uint32_t ghbs : 1; /* Grant Host Backing Store */ + uint32_t ginb : 1; /* Grant Interrupt Notification Block */ + uint32_t gerbm : 1; /* Grant ERBM Request */ + uint32_t gmx : 1; /* Grant Max XRIs */ + uint32_t gmr : 1; /* Grant Max RPIs */ +#else /* __LITTLE_ENDIAN */ + uint32_t gmr : 1; /* Grant Max RPIs */ + uint32_t gmx : 1; /* Grant Max XRIs */ + uint32_t gerbm : 1; /* Grant ERBM Request */ + uint32_t ginb : 1; /* Grant Interrupt Notification Block */ + uint32_t ghbs : 1; /* Grant Host Backing Store */ + uint32_t gsah : 1; /* Grant Synchronous Abort Handling */ + uint32_t gcrp : 1; /* Grant Command Ring Polling */ + uint32_t gmv : 1; /* Grant Max VPIs */ + uint32_t rsvd2 : 24; /* Reserved */ +#endif + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t max_rpi : 16; /* Max RPIs Port should configure */ + uint32_t max_xri : 16; /* Max XRIs Port should configure */ +#else /* __LITTLE_ENDIAN */ + uint32_t max_xri : 16; /* Max XRIs Port should configure */ + uint32_t max_rpi : 16; /* Max RPIs Port should configure */ +#endif + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t max_hbq : 16; /* Max HBQs Host expect to configure */ + uint32_t rsvd3 : 16; /* Max HBQs Host expect to configure */ +#else /* __LITTLE_ENDIAN */ + uint32_t rsvd3 : 16; /* Max HBQs Host expect to configure */ + uint32_t max_hbq : 16; /* Max HBQs Host expect to configure */ +#endif + + uint32_t rsvd4; /* Reserved */ + +#ifdef __BIG_ENDIAN_BITFIELD + uint32_t rsvd5 : 16; /* Reserved */ + uint32_t max_vpi : 16; /* Max number of virt N-Ports */ +#else /* __LITTLE_ENDIAN */ + uint32_t max_vpi : 16; /* Max number of virt N-Ports */ + uint32_t rsvd5 : 16; /* Reserved */ +#endif + } CONFIG_PORT_VAR; /* SLI-2 Port Control Block */ @@ -2262,7 +2607,9 @@ #define MAILBOX_CMD_SIZE (MAILBOX_CMD_WSIZE * sizeof(uint32_t)) typedef union { - uint32_t varWords[MAILBOX_CMD_WSIZE - 1]; + uint32_t varWords[MAILBOX_CMD_WSIZE - 1]; /* first word is type/ + * feature/max ring number + */ LOAD_SM_VAR varLdSM; /* cmd = 1 (LOAD_SM) */ READ_NV_VAR varRDnvp; /* cmd = 2 (READ_NVPARMS) */ WRITE_NV_VAR varWTnvp; /* cmd = 3 (WRITE_NVPARMS) */ @@ -2287,8 +2634,13 @@ CLEAR_LA_VAR varClearLA; /* cmd = 22 (CLEAR_LA) */ DUMP_VAR varDmp; /* Warm Start DUMP mbx cmd */ UNREG_D_ID_VAR varUnregDID; /* cmd = 0x23 (UNREG_D_ID) */ - CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP) NEW_FEATURE */ + CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP) + * NEW_FEATURE + */ + struct config_hbq_var varCfgHbq;/* cmd = 0x7c (CONFIG_HBQ) */ CONFIG_PORT_VAR varCfgPort; /* cmd = 0x88 (CONFIG_PORT) */ + REG_VPI_VAR varRegVpi; /* cmd = 0x96 (REG_VPI) */ + UNREG_VPI_VAR varUnregVpi; /* cmd = 0x97 (UNREG_VPI) */ } MAILVARIANTS; /* @@ -2305,14 +2657,27 @@ __le32 rspPutInx; }; -typedef struct _SLI2_DESC { - struct lpfc_hgp host[MAX_RINGS]; +struct sli2_desc { uint32_t unused1[16]; + struct lpfc_hgp host[MAX_RINGS]; struct lpfc_pgp port[MAX_RINGS]; -} SLI2_DESC; +}; + +struct sli3_desc { + struct lpfc_hgp host[MAX_RINGS]; + uint32_t reserved[8]; + uint32_t hbq_put[16]; +}; + +struct sli3_pgp { + struct lpfc_pgp port[MAX_RINGS]; + uint32_t hbq_get[16]; +}; typedef union { - SLI2_DESC s2; + struct sli2_desc s2; + struct sli3_desc s3; + struct sli3_pgp s3_pgp; } SLI_VAR; typedef struct { @@ -2618,6 +2983,25 @@ uint32_t fcpt_Length; /* transfer ready for IWRITE */ } FCPT_FIELDS64; +/* IOCB Command template for CMD_IOCB_RCV_ELS64_CX (0xB7) + or CMD_IOCB_RCV_SEQ64_CX (0xB5) */ + +struct rcv_sli3 { + uint32_t word8Rsvd; +#ifdef __BIG_ENDIAN_BITFIELD + uint16_t vpi; + uint16_t word9Rsvd; +#else /* __LITTLE_ENDIAN */ + uint16_t word9Rsvd; + uint16_t vpi; +#endif + uint32_t word10Rsvd; + uint32_t acc_len; /* accumulated length */ + struct ulp_bde64 bde2; +}; + + + typedef struct _IOCB { /* IOCB structure */ union { GENERIC_RSP grsp; /* Generic response */ @@ -2633,7 +3017,7 @@ /* SLI-2 structures */ struct ulp_bde64 cont64[2]; /* up to 2 64 bit continuation - bde_64s */ + * bde_64s */ ELS_REQUEST64 elsreq64; /* ELS_REQUEST template */ GEN_REQUEST64 genreq64; /* GEN_REQUEST template */ RCV_ELS_REQ64 rcvels64; /* RCV_ELS_REQ template */ @@ -2695,9 +3079,20 @@ uint32_t ulpTimeout:8; #endif + union { + struct rcv_sli3 rcvsli3; /* words 8 - 15 */ + uint32_t sli3Words[24]; /* 96 extra bytes for SLI-3 */ + } unsli3; + +#define ulpCt_h ulpXS +#define ulpCt_l ulpFCP2Rcvy + +#define IOCB_FCP 1 /* IOCB is used for FCP ELS cmds-ulpRsvByte */ +#define IOCB_IP 2 /* IOCB is used for IP ELS cmds */ #define PARM_UNUSED 0 /* PU field (Word 4) not used */ #define PARM_REL_OFF 1 /* PU field (Word 4) = R. O. */ #define PARM_READ_CHECK 2 /* PU field (Word 4) = Data Transfer Length */ +#define PARM_NPIV_DID 3 #define CLASS1 0 /* Class 1 */ #define CLASS2 1 /* Class 2 */ #define CLASS3 2 /* Class 3 */ @@ -2718,39 +3113,51 @@ #define IOSTAT_RSVD2 0xC #define IOSTAT_RSVD3 0xD #define IOSTAT_RSVD4 0xE -#define IOSTAT_RSVD5 0xF +#define IOSTAT_NEED_BUFFER 0xF #define IOSTAT_DRIVER_REJECT 0x10 /* ulpStatus - Driver defined */ #define IOSTAT_DEFAULT 0xF /* Same as rsvd5 for now */ #define IOSTAT_CNT 0x11 } IOCB_t; +/* Structure used for a single HBQ entry */ +struct lpfc_hbq_entry { + struct ulp_bde64 bde; + uint32_t buffer_tag; +}; + #define SLI1_SLIM_SIZE (4 * 1024) /* Up to 498 IOCBs will fit into 16k * 256 (MAILBOX_t) + 140 (PCB_t) + ( 32 (IOCB_t) * 498 ) = < 16384 */ -#define SLI2_SLIM_SIZE (16 * 1024) +#define SLI2_SLIM_SIZE (64 * 1024) /* Maximum IOCBs that will fit in SLI2 slim */ #define MAX_SLI2_IOCB 498 +#define MAX_SLIM_IOCB_SIZE (SLI2_SLIM_SIZE - \ + (sizeof(MAILBOX_t) + sizeof(PCB_t))) + +/* HBQ entries are 4 words each = 4k */ +#define LPFC_TOTAL_HBQ_SIZE (sizeof(struct lpfc_hbq_entry) * \ + lpfc_sli_hbq_count()) struct lpfc_sli2_slim { MAILBOX_t mbx; PCB_t pcb; - IOCB_t IOCBs[MAX_SLI2_IOCB]; + IOCB_t IOCBs[MAX_SLIM_IOCB_SIZE]; }; -/******************************************************************* -This macro check PCI device to allow special handling for LC HBAs. - -Parameters: -device : struct pci_dev 's device field - -return 1 => TRUE - 0 => FALSE - *******************************************************************/ +/* + * This function checks PCI device to allow special handling for LC HBAs. + * + * Parameters: + * device : struct pci_dev 's device field + * + * return 1 => TRUE + * 0 => FALSE + */ static inline int lpfc_is_LC_HBA(unsigned short device) { @@ -2766,3 +3173,16 @@ else return 0; } + +/* + * Determine if an IOCB failed because of a link event or firmware reset. + */ + +static inline int +lpfc_error_lost_link(IOCB_t *iocbp) +{ + return (iocbp->ulpStatus == IOSTAT_LOCAL_REJECT && + (iocbp->un.ulpWord[4] == IOERR_SLI_ABORTED || + iocbp->un.ulpWord[4] == IOERR_LINK_DOWN || + iocbp->un.ulpWord[4] == IOERR_SLI_DOWN)); +} diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_init.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_init.c 2007-12-21 15:36:14.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -40,15 +41,20 @@ #include "lpfc.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" +#include "lpfc_vport.h" #include "lpfc_version.h" +#include "lpfc_vport.h" static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int); static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); static int lpfc_post_rcv_buf(struct lpfc_hba *); static struct scsi_transport_template *lpfc_transport_template = NULL; +static struct scsi_transport_template *lpfc_vport_transport_template = NULL; static DEFINE_IDR(lpfc_hba_index); + + /************************************************************************/ /* */ /* lpfc_config_port_prep */ @@ -61,7 +67,7 @@ /* */ /************************************************************************/ int -lpfc_config_port_prep(struct lpfc_hba * phba) +lpfc_config_port_prep(struct lpfc_hba *phba) { lpfc_vpd_t *vp = &phba->vpd; int i = 0, rc; @@ -75,12 +81,12 @@ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!pmb) { - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; return -ENOMEM; } mb = &pmb->mb; - phba->hba_state = LPFC_INIT_MBX_CMDS; + phba->link_state = LPFC_INIT_MBX_CMDS; if (lpfc_is_LC_HBA(phba->pcidev->device)) { if (init_key) { @@ -100,9 +106,7 @@ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); if (rc != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_MBOX, + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, "%d:0324 Config Port initialization " "error, mbxCmd x%x READ_NVPARM, " "mbxStatus x%x\n", @@ -112,16 +116,18 @@ return -ERESTART; } memcpy(phba->wwnn, (char *)mb->un.varRDnvp.nodename, - sizeof (mb->un.varRDnvp.nodename)); + sizeof(phba->wwnn)); + memcpy(phba->wwpn, (char *)mb->un.varRDnvp.portname, + sizeof(phba->wwpn)); } + phba->sli3_options = 0x0; + /* Setup and issue mailbox READ REV command */ lpfc_read_rev(phba, pmb); rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); if (rc != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0439 Adapter failed to init, mbxCmd x%x " "READ_REV, mbxStatus x%x\n", phba->brd_no, @@ -130,6 +136,7 @@ return -ERESTART; } + /* * The value of rr must be 1 since the driver set the cv field to 1. * This setting requires the FW to set all revision fields. @@ -144,8 +151,12 @@ return -ERESTART; } + if (phba->sli_rev == 3 && !mb->un.varRdRev.v3rsp) + return -EINVAL; + /* Save information as VPD data */ vp->rev.rBit = 1; + memcpy(&vp->sli3Feat, &mb->un.varRdRev.sli3Feat, sizeof(uint32_t)); vp->rev.sli1FwRev = mb->un.varRdRev.sli1FwRev; memcpy(vp->rev.sli1FwName, (char*) mb->un.varRdRev.sli1FwName, 16); vp->rev.sli2FwRev = mb->un.varRdRev.sli2FwRev; @@ -161,6 +172,13 @@ vp->rev.postKernRev = mb->un.varRdRev.postKernRev; vp->rev.opFwRev = mb->un.varRdRev.opFwRev; + /* If the sli feature level is less then 9, we must + * tear down all RPIs and VPIs on link down if NPIV + * is enabled. + */ + if (vp->rev.feaLevelHigh < 9) + phba->sli3_options |= LPFC_SLI3_VPORT_TEARDOWN; + if (lpfc_is_LC_HBA(phba->pcidev->device)) memcpy(phba->RandomData, (char *)&mb->un.varWords[24], sizeof (phba->RandomData)); @@ -212,48 +230,34 @@ /* */ /************************************************************************/ int -lpfc_config_port_post(struct lpfc_hba * phba) +lpfc_config_port_post(struct lpfc_hba *phba) { + struct lpfc_vport *vport = phba->pport; LPFC_MBOXQ_t *pmb; MAILBOX_t *mb; struct lpfc_dmabuf *mp; struct lpfc_sli *psli = &phba->sli; uint32_t status, timeout; - int i, j, rc; + int i, j; + int rc; pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!pmb) { - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; return -ENOMEM; } mb = &pmb->mb; - lpfc_config_link(phba, pmb); - rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); - if (rc != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, - "%d:0447 Adapter failed init, mbxCmd x%x " - "CONFIG_LINK mbxStatus x%x\n", - phba->brd_no, - mb->mbxCommand, mb->mbxStatus); - phba->hba_state = LPFC_HBA_ERROR; - mempool_free( pmb, phba->mbox_mem_pool); - return -EIO; - } - /* Get login parameters for NID. */ - lpfc_read_sparam(phba, pmb); + lpfc_read_sparam(phba, pmb, 0); + pmb->vport = vport; if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0448 Adapter failed init, mbxCmd x%x " "READ_SPARM mbxStatus x%x\n", phba->brd_no, mb->mbxCommand, mb->mbxStatus); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; mp = (struct lpfc_dmabuf *) pmb->context1; mempool_free( pmb, phba->mbox_mem_pool); lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -263,25 +267,27 @@ mp = (struct lpfc_dmabuf *) pmb->context1; - memcpy(&phba->fc_sparam, mp->virt, sizeof (struct serv_parm)); + memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); pmb->context1 = NULL; if (phba->cfg_soft_wwnn) - u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn); + u64_to_wwn(phba->cfg_soft_wwnn, + vport->fc_sparam.nodeName.u.wwn); if (phba->cfg_soft_wwpn) - u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn); - memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName, + u64_to_wwn(phba->cfg_soft_wwpn, + vport->fc_sparam.portName.u.wwn); + memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, sizeof (struct lpfc_name)); - memcpy(&phba->fc_portname, &phba->fc_sparam.portName, + memcpy(&vport->fc_portname, &vport->fc_sparam.portName, sizeof (struct lpfc_name)); /* If no serial number in VPD data, use low 6 bytes of WWNN */ /* This should be consolidated into parse_vpd ? - mr */ if (phba->SerialNumber[0] == 0) { uint8_t *outptr; - outptr = &phba->fc_nodename.u.s.IEEE[0]; + outptr = &vport->fc_nodename.u.s.IEEE[0]; for (i = 0; i < 12; i++) { status = *outptr++; j = ((status & 0xf0) >> 4); @@ -303,15 +309,14 @@ } lpfc_read_config(phba, pmb); + pmb->vport = vport; if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0453 Adapter failed to init, mbxCmd x%x " "READ_CONFIG, mbxStatus x%x\n", phba->brd_no, mb->mbxCommand, mb->mbxStatus); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; mempool_free( pmb, phba->mbox_mem_pool); return -EIO; } @@ -338,9 +343,7 @@ || ((phba->cfg_link_speed == LINK_SPEED_10G) && !(phba->lmt & LMT_10Gb))) { /* Reset link speed to auto */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_LINK_EVENT, + lpfc_printf_log(phba, KERN_WARNING, LOG_LINK_EVENT, "%d:1302 Invalid speed for this board: " "Reset link speed to auto: x%x\n", phba->brd_no, @@ -348,7 +351,7 @@ phba->cfg_link_speed = LINK_SPEED_AUTO; } - phba->hba_state = LPFC_LINK_DOWN; + phba->link_state = LPFC_LINK_DOWN; /* Only process IOCBs on ring 0 till hba_state is READY */ if (psli->ring[psli->extra_ring].cmdringaddr) @@ -359,10 +362,11 @@ psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT; /* Post receive buffers for desired rings */ + if (phba->sli_rev != 3) lpfc_post_rcv_buf(phba); /* Enable appropriate host interrupts */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); status = readl(phba->HCregaddr); status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA; if (psli->num_rings > 0) @@ -380,22 +384,24 @@ writel(status, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); /* * Setup the ring 0 (els) timeout handler */ timeout = phba->fc_ratov << 1; - mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout); + mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout); + mod_timer(&phba->hb_tmofunc, jiffies + HZ * LPFC_HB_MBOX_INTERVAL); + phba->hb_outstanding = 0; + phba->last_completion_time = jiffies; lpfc_init_link(phba, pmb, phba->cfg_topology, phba->cfg_link_speed); pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + pmb->vport = vport; rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); lpfc_set_loopback_flag(phba); if (rc != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0454 Adapter failed to init, mbxCmd x%x " "INIT_LINK, mbxStatus x%x\n", phba->brd_no, @@ -408,7 +414,7 @@ writel(0xffffffff, phba->HAregaddr); readl(phba->HAregaddr); /* flush */ - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; if (rc != MBX_BUSY) mempool_free(pmb, phba->mbox_mem_pool); return -EIO; @@ -429,18 +435,19 @@ /* */ /************************************************************************/ int -lpfc_hba_down_prep(struct lpfc_hba * phba) +lpfc_hba_down_prep(struct lpfc_hba *phba) { + struct lpfc_vport *vport = phba->pport; + /* Disable interrupts */ writel(0, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - /* Cleanup potential discovery resources */ - lpfc_els_flush_rscn(phba); - lpfc_els_flush_cmd(phba); - lpfc_disc_flush_list(phba); + list_for_each_entry(vport, &phba->port_list, listentry) { + lpfc_cleanup_discovery_resources(vport); + } - return (0); + return 0; } /************************************************************************/ @@ -453,13 +460,16 @@ /* */ /************************************************************************/ int -lpfc_hba_down_post(struct lpfc_hba * phba) +lpfc_hba_down_post(struct lpfc_hba *phba) { struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; struct lpfc_dmabuf *mp, *next_mp; int i; + if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) + lpfc_sli_hbqbuf_free_all(phba); + else { /* Cleanup preposted buffers on the ELS ring */ pring = &psli->ring[LPFC_ELS_RING]; list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) { @@ -468,6 +478,7 @@ lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } + } for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; @@ -477,6 +488,119 @@ return 0; } +/* HBA heart beat timeout handler */ +void +lpfc_hb_timeout(unsigned long ptr) +{ + struct lpfc_hba *phba; + unsigned long iflag; + + phba = (struct lpfc_hba *)ptr; + spin_lock_irqsave(&phba->pport->work_port_lock, iflag); + if (!(phba->pport->work_port_events & WORKER_HB_TMO)) + phba->pport->work_port_events |= WORKER_HB_TMO; + spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag); + + if (phba->work_wait) + wake_up(phba->work_wait); + return; +} + +static void +lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq) +{ + unsigned long drvr_flag; + + spin_lock_irqsave(&phba->hbalock, drvr_flag); + phba->hb_outstanding = 0; + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + + mempool_free(pmboxq, phba->mbox_mem_pool); + if (!(phba->pport->fc_flag & FC_OFFLINE_MODE) && + !(phba->link_state == LPFC_HBA_ERROR) && + !(phba->pport->fc_flag & FC_UNLOADING)) + mod_timer(&phba->hb_tmofunc, + jiffies + HZ * LPFC_HB_MBOX_INTERVAL); + return; +} + +void +lpfc_hb_timeout_handler(struct lpfc_hba *phba) +{ + LPFC_MBOXQ_t *pmboxq; + int retval; + struct lpfc_sli *psli = &phba->sli; + + if ((phba->link_state == LPFC_HBA_ERROR) || + (phba->pport->fc_flag & FC_UNLOADING) || + (phba->pport->fc_flag & FC_OFFLINE_MODE)) + return; + + spin_lock_irq(&phba->pport->work_port_lock); + /* If the timer is already canceled do nothing */ + if (!(phba->pport->work_port_events & WORKER_HB_TMO)) { + spin_unlock_irq(&phba->pport->work_port_lock); + return; + } + + if (time_after(phba->last_completion_time + LPFC_HB_MBOX_INTERVAL * HZ, + jiffies)) { + spin_unlock_irq(&phba->pport->work_port_lock); + if (!phba->hb_outstanding) + mod_timer(&phba->hb_tmofunc, + jiffies + HZ * LPFC_HB_MBOX_INTERVAL); + else + mod_timer(&phba->hb_tmofunc, + jiffies + HZ * LPFC_HB_MBOX_TIMEOUT); + return; + } + spin_unlock_irq(&phba->pport->work_port_lock); + + /* If there is no heart beat outstanding, issue a heartbeat command */ + if (!phba->hb_outstanding) { + pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL); + if (!pmboxq) { + mod_timer(&phba->hb_tmofunc, + jiffies + HZ * LPFC_HB_MBOX_INTERVAL); + return; + } + + lpfc_heart_beat(phba, pmboxq); + pmboxq->mbox_cmpl = lpfc_hb_mbox_cmpl; + pmboxq->vport = phba->pport; + retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT); + + if (retval != MBX_BUSY && retval != MBX_SUCCESS) { + mempool_free(pmboxq, phba->mbox_mem_pool); + mod_timer(&phba->hb_tmofunc, + jiffies + HZ * LPFC_HB_MBOX_INTERVAL); + return; + } + mod_timer(&phba->hb_tmofunc, + jiffies + HZ * LPFC_HB_MBOX_TIMEOUT); + phba->hb_outstanding = 1; + return; + } else { + /* + * If heart beat timeout called with hb_outstanding set we + * need to take the HBA offline. + */ + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0459 Adapter heartbeat failure, taking " + "this port offline.\n", phba->brd_no); + + spin_lock_irq(&phba->hbalock); + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; + spin_unlock_irq(&phba->hbalock); + + lpfc_offline_prep(phba); + lpfc_offline(phba); + lpfc_unblock_mgmt_io(phba); + phba->link_state = LPFC_HBA_ERROR; + lpfc_hba_down_post(phba); + } +} + /************************************************************************/ /* */ /* lpfc_handle_eratt */ @@ -486,11 +610,15 @@ /* */ /************************************************************************/ void -lpfc_handle_eratt(struct lpfc_hba * phba) +lpfc_handle_eratt(struct lpfc_hba *phba) { + struct lpfc_vport *vport = phba->pport; struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; + struct lpfc_vport *port_iterator; uint32_t event_data; + struct Scsi_Host *shost; + /* If the pci channel is offline, ignore possible errors, * since we cannot communicate with the pci card anyway. */ if (pci_channel_offline(phba->pcidev)) @@ -504,10 +632,17 @@ "Data: x%x x%x x%x\n", phba->brd_no, phba->work_hs, phba->work_status[0], phba->work_status[1]); - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_ESTABLISH_LINK; + list_for_each_entry(port_iterator, &phba->port_list, + listentry) { + shost = lpfc_shost_from_vport(port_iterator); + + spin_lock_irq(shost->host_lock); + port_iterator->fc_flag |= FC_ESTABLISH_LINK; + spin_unlock_irq(shost->host_lock); + } + spin_lock_irq(&phba->hbalock); psli->sli_flag &= ~LPFC_SLI2_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); /* * Firmware stops when it triggled erratt with HS_FFER6. @@ -544,15 +679,18 @@ phba->work_status[0], phba->work_status[1]); event_data = FC_REG_DUMP_EVENT; - fc_host_post_vendor_event(phba->host, fc_get_event_number(), + shost = lpfc_shost_from_vport(vport); + fc_host_post_vendor_event(shost, fc_get_event_number(), sizeof(event_data), (char *) &event_data, SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX); + spin_lock_irq(&phba->hbalock); psli->sli_flag &= ~LPFC_SLI2_ACTIVE; + spin_unlock_irq(&phba->hbalock); lpfc_offline_prep(phba); lpfc_offline(phba); lpfc_unblock_mgmt_io(phba); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; lpfc_hba_down_post(phba); } } @@ -566,9 +704,11 @@ /* */ /************************************************************************/ void -lpfc_handle_latt(struct lpfc_hba * phba) +lpfc_handle_latt(struct lpfc_hba *phba) { + struct lpfc_vport *vport = phba->pport; struct lpfc_sli *psli = &phba->sli; + struct lpfc_vport *port_iterator; LPFC_MBOXQ_t *pmb; volatile uint32_t control; struct lpfc_dmabuf *mp; @@ -589,20 +729,22 @@ rc = -EIO; /* Cleanup any outstanding ELS commands */ - lpfc_els_flush_cmd(phba); + list_for_each_entry(port_iterator, &phba->port_list, listentry) + lpfc_els_flush_cmd(port_iterator); psli->slistat.link_event++; lpfc_read_la(phba, pmb, mp); pmb->mbox_cmpl = lpfc_mbx_cmpl_read_la; + pmb->vport = vport; rc = lpfc_sli_issue_mbox (phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB)); if (rc == MBX_NOT_FINISHED) goto lpfc_handle_latt_free_mbuf; /* Clear Link Attention in HA REG */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); writel(HA_LATT, phba->HAregaddr); readl(phba->HAregaddr); /* flush */ - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return; @@ -614,7 +756,7 @@ mempool_free(pmb, phba->mbox_mem_pool); lpfc_handle_latt_err_exit: /* Enable Link attention interrupts */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); psli->sli_flag |= LPFC_PROCESS_LA; control = readl(phba->HCregaddr); control |= HC_LAINT_ENA; @@ -624,15 +766,13 @@ /* Clear Link Attention in HA REG */ writel(HA_LATT, phba->HAregaddr); readl(phba->HAregaddr); /* flush */ - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); lpfc_linkdown(phba); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; /* The other case is an error from issue_mbox */ if (rc == -ENOMEM) - lpfc_printf_log(phba, - KERN_WARNING, - LOG_MBOX, + lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX, "%d:0300 READ_LA: no buffers\n", phba->brd_no); @@ -646,7 +786,7 @@ /* */ /************************************************************************/ static int -lpfc_parse_vpd(struct lpfc_hba * phba, uint8_t * vpd, int len) +lpfc_parse_vpd(struct lpfc_hba *phba, uint8_t *vpd, int len) { uint8_t lenlo, lenhi; int Length; @@ -658,9 +798,7 @@ return 0; /* Vital Product */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_INIT, + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "%d:0455 Vital Product Data: x%x x%x x%x x%x\n", phba->brd_no, (uint32_t) vpd[0], (uint32_t) vpd[1], (uint32_t) vpd[2], @@ -785,7 +923,7 @@ } static void -lpfc_get_hba_model_desc(struct lpfc_hba * phba, uint8_t * mdp, uint8_t * descp) +lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) { lpfc_vpd_t *vp; uint16_t dev_id = phba->pcidev->device; @@ -943,7 +1081,7 @@ /* Returns the number of buffers NOT posted. */ /**************************************************/ int -lpfc_post_buffer(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, int cnt, +lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt, int type) { IOCB_t *icmd; @@ -955,9 +1093,7 @@ /* While there are buffers to post */ while (cnt > 0) { /* Allocate buffer for command iocb */ - spin_lock_irq(phba->host->host_lock); iocb = lpfc_sli_get_iocbq(phba); - spin_unlock_irq(phba->host->host_lock); if (iocb == NULL) { pring->missbufcnt = cnt; return cnt; @@ -972,9 +1108,7 @@ &mp1->phys); if (mp1 == 0 || mp1->virt == 0) { kfree(mp1); - spin_lock_irq(phba->host->host_lock); lpfc_sli_release_iocbq(phba, iocb); - spin_unlock_irq(phba->host->host_lock); pring->missbufcnt = cnt; return cnt; } @@ -990,9 +1124,7 @@ kfree(mp2); lpfc_mbuf_free(phba, mp1->virt, mp1->phys); kfree(mp1); - spin_lock_irq(phba->host->host_lock); lpfc_sli_release_iocbq(phba, iocb); - spin_unlock_irq(phba->host->host_lock); pring->missbufcnt = cnt; return cnt; } @@ -1018,7 +1150,6 @@ icmd->ulpCommand = CMD_QUE_RING_BUF64_CN; icmd->ulpLe = 1; - spin_lock_irq(phba->host->host_lock); if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) { lpfc_mbuf_free(phba, mp1->virt, mp1->phys); kfree(mp1); @@ -1030,15 +1161,12 @@ } lpfc_sli_release_iocbq(phba, iocb); pring->missbufcnt = cnt; - spin_unlock_irq(phba->host->host_lock); return cnt; } - spin_unlock_irq(phba->host->host_lock); lpfc_sli_ringpostbuf_put(phba, pring, mp1); - if (mp2) { + if (mp2) lpfc_sli_ringpostbuf_put(phba, pring, mp2); } - } pring->missbufcnt = 0; return 0; } @@ -1050,7 +1178,7 @@ /* */ /************************************************************************/ static int -lpfc_post_rcv_buf(struct lpfc_hba * phba) +lpfc_post_rcv_buf(struct lpfc_hba *phba) { struct lpfc_sli *psli = &phba->sli; @@ -1151,7 +1279,7 @@ { int t; uint32_t *HashWorking; - uint32_t *pwwnn = phba->wwnn; + uint32_t *pwwnn = (uint32_t *) phba->wwnn; HashWorking = kmalloc(80 * sizeof(uint32_t), GFP_KERNEL); if (!HashWorking) @@ -1170,64 +1298,76 @@ } static void -lpfc_cleanup(struct lpfc_hba * phba) +lpfc_cleanup(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp, *next_ndlp; /* clean up phba - lpfc specific */ - lpfc_can_disctmo(phba); - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) + lpfc_can_disctmo(vport); + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) lpfc_nlp_put(ndlp); - - INIT_LIST_HEAD(&phba->fc_nodes); - return; } static void lpfc_establish_link_tmo(unsigned long ptr) { - struct lpfc_hba *phba = (struct lpfc_hba *)ptr; + struct lpfc_hba *phba = (struct lpfc_hba *) ptr; + struct lpfc_vport *vport = phba->pport; unsigned long iflag; - /* Re-establishing Link, timer expired */ lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, "%d:1300 Re-establishing Link, timer expired " "Data: x%x x%x\n", - phba->brd_no, phba->fc_flag, phba->hba_state); - spin_lock_irqsave(phba->host->host_lock, iflag); - phba->fc_flag &= ~FC_ESTABLISH_LINK; - spin_unlock_irqrestore(phba->host->host_lock, iflag); + phba->brd_no, vport->fc_flag, + vport->port_state); + list_for_each_entry(vport, &phba->port_list, listentry) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + spin_lock_irqsave(shost->host_lock, iflag); + vport->fc_flag &= ~FC_ESTABLISH_LINK; + spin_unlock_irqrestore(shost->host_lock, iflag); + } } -static int -lpfc_stop_timer(struct lpfc_hba * phba) +void +lpfc_stop_vport_timers(struct lpfc_vport *vport) { - struct lpfc_sli *psli = &phba->sli; + del_timer_sync(&vport->els_tmofunc); + del_timer_sync(&vport->fc_fdmitmo); + lpfc_can_disctmo(vport); + return; +} + +static void +lpfc_stop_phba_timers(struct lpfc_hba *phba) +{ + struct lpfc_vport *vport; del_timer_sync(&phba->fcp_poll_timer); del_timer_sync(&phba->fc_estabtmo); - del_timer_sync(&phba->fc_disctmo); - del_timer_sync(&phba->fc_fdmitmo); - del_timer_sync(&phba->els_tmofunc); - psli = &phba->sli; - del_timer_sync(&psli->mbox_tmo); - return(1); + list_for_each_entry(vport, &phba->port_list, listentry) + lpfc_stop_vport_timers(vport); + del_timer_sync(&phba->sli.mbox_tmo); + del_timer_sync(&phba->fabric_block_timer); + phba->hb_outstanding = 0; + del_timer_sync(&phba->hb_tmofunc); + return; } int -lpfc_online(struct lpfc_hba * phba) +lpfc_online(struct lpfc_hba *phba) { + struct lpfc_vport *vport = phba->pport; + if (!phba) return 0; - if (!(phba->fc_flag & FC_OFFLINE_MODE)) + if (!(vport->fc_flag & FC_OFFLINE_MODE)) return 0; - lpfc_printf_log(phba, - KERN_WARNING, - LOG_INIT, + lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "%d:0458 Bring Adapter online\n", phba->brd_no); @@ -1243,9 +1383,14 @@ return 1; } - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_OFFLINE_MODE; - spin_unlock_irq(phba->host->host_lock); + list_for_each_entry(vport, &phba->port_list, listentry) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + spin_lock_irq(shost->host_lock); + vport->fc_flag &= ~FC_OFFLINE_MODE; + if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + spin_unlock_irq(shost->host_lock); + } lpfc_unblock_mgmt_io(phba); return 0; @@ -1256,9 +1401,9 @@ { unsigned long iflag; - spin_lock_irqsave(phba->host->host_lock, iflag); - phba->fc_flag |= FC_BLOCK_MGMT_IO; - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); + phba->sli.sli_flag |= LPFC_BLOCK_MGMT_IO; + spin_unlock_irqrestore(&phba->hbalock, iflag); } void @@ -1266,17 +1411,18 @@ { unsigned long iflag; - spin_lock_irqsave(phba->host->host_lock, iflag); - phba->fc_flag &= ~FC_BLOCK_MGMT_IO; - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); + phba->sli.sli_flag &= ~LPFC_BLOCK_MGMT_IO; + spin_unlock_irqrestore(&phba->hbalock, iflag); } void lpfc_offline_prep(struct lpfc_hba * phba) { + struct lpfc_vport *vport = phba->pport; struct lpfc_nodelist *ndlp, *next_ndlp; - if (phba->fc_flag & FC_OFFLINE_MODE) + if (vport->fc_flag & FC_OFFLINE_MODE) return; lpfc_block_mgmt_io(phba); @@ -1284,39 +1430,49 @@ lpfc_linkdown(phba); /* Issue an unreg_login to all nodes */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); lpfc_sli_flush_mbox_queue(phba); } void -lpfc_offline(struct lpfc_hba * phba) +lpfc_offline(struct lpfc_hba *phba) { - unsigned long iflag; + struct lpfc_vport *vport = phba->pport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_vport *port_iterator; - if (phba->fc_flag & FC_OFFLINE_MODE) + if (vport->fc_flag & FC_OFFLINE_MODE) return; /* stop all timers associated with this hba */ - lpfc_stop_timer(phba); + lpfc_stop_phba_timers(phba); + list_for_each_entry(port_iterator, &phba->port_list, listentry) { + port_iterator->work_port_events = 0; + } - lpfc_printf_log(phba, - KERN_WARNING, - LOG_INIT, + lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "%d:0460 Bring Adapter offline\n", phba->brd_no); /* Bring down the SLI Layer and cleanup. The HBA is offline now. */ lpfc_sli_hba_down(phba); - lpfc_cleanup(phba); - spin_lock_irqsave(phba->host->host_lock, iflag); - phba->work_hba_events = 0; + spin_lock_irq(&phba->hbalock); phba->work_ha = 0; - phba->fc_flag |= FC_OFFLINE_MODE; - spin_unlock_irqrestore(phba->host->host_lock, iflag); + vport->fc_flag |= FC_OFFLINE_MODE; + spin_unlock_irq(&phba->hbalock); + list_for_each_entry(port_iterator, &phba->port_list, listentry) { + shost = lpfc_shost_from_vport(port_iterator); + + lpfc_cleanup(port_iterator); + spin_lock_irq(shost->host_lock); + vport->work_port_events = 0; + vport->fc_flag |= FC_OFFLINE_MODE; + spin_unlock_irq(shost->host_lock); + } } /****************************************************************************** @@ -1326,12 +1482,12 @@ * ******************************************************************************/ static int -lpfc_scsi_free(struct lpfc_hba * phba) +lpfc_scsi_free(struct lpfc_hba *phba) { struct lpfc_scsi_buf *sb, *sb_next; struct lpfc_iocbq *io, *io_next; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); /* Release all the lpfc_scsi_bufs maintained by this host. */ list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list, list) { list_del(&sb->list); @@ -1348,126 +1504,174 @@ phba->total_iocbq_bufs--; } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return 0; } -void lpfc_remove_device(struct lpfc_hba *phba) -{ - unsigned long iflag; - - lpfc_free_sysfs_attr(phba); - spin_lock_irqsave(phba->host->host_lock, iflag); - phba->fc_flag |= FC_UNLOADING; +struct lpfc_vport * +lpfc_create_port(struct lpfc_hba *phba, int instance, struct fc_vport *fc_vport) +{ + struct lpfc_vport *vport; + struct Scsi_Host *shost; + int error = 0; - spin_unlock_irqrestore(phba->host->host_lock, iflag); + shost = scsi_host_alloc(&lpfc_template, sizeof(struct lpfc_vport)); + if (!shost) + goto out; - fc_remove_host(phba->host); - scsi_remove_host(phba->host); + vport = (struct lpfc_vport *) shost->hostdata; + vport->phba = phba; - kthread_stop(phba->worker_thread); + vport->load_flag |= FC_LOADING; + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + shost->unique_id = instance; + shost->max_id = LPFC_MAX_TARGET; + shost->max_lun = phba->cfg_max_luns; + shost->this_id = -1; + shost->max_cmd_len = 16; /* - * Bring down the SLI Layer. This step disable all interrupts, - * clears the rings, discards all mailbox commands, and resets - * the HBA. + * Set initial can_queue value since 0 is no longer supported and + * scsi_add_host will fail. This will be adjusted later based on the + * max xri value determined in hba setup. */ - lpfc_sli_hba_down(phba); - lpfc_sli_brdrestart(phba); + shost->can_queue = phba->cfg_hba_queue_depth - 10; + if (fc_vport != NULL) { + shost->transportt = lpfc_vport_transport_template; + vport->port_type = LPFC_NPIV_PORT; + } else { + shost->transportt = lpfc_transport_template; + vport->port_type = LPFC_PHYSICAL_PORT; + } - /* Release the irq reservation */ - free_irq(phba->pcidev->irq, phba); - pci_disable_msi(phba->pcidev); + /* Initialize all internally managed lists. */ + INIT_LIST_HEAD(&vport->fc_nodes); + spin_lock_init(&vport->work_port_lock); - lpfc_cleanup(phba); - lpfc_stop_timer(phba); - phba->work_hba_events = 0; + init_timer(&vport->fc_disctmo); + vport->fc_disctmo.function = lpfc_disc_timeout; + vport->fc_disctmo.data = (unsigned long)vport; - /* - * Call scsi_free before mem_free since scsi bufs are released to their - * corresponding pools here. - */ - lpfc_scsi_free(phba); - lpfc_mem_free(phba); + init_timer(&vport->fc_fdmitmo); + vport->fc_fdmitmo.function = lpfc_fdmi_tmo; + vport->fc_fdmitmo.data = (unsigned long)vport; - /* Free resources associated with SLI2 interface */ - dma_free_coherent(&phba->pcidev->dev, SLI2_SLIM_SIZE, - phba->slim2p, phba->slim2p_mapping); + init_timer(&vport->els_tmofunc); + vport->els_tmofunc.function = lpfc_els_timeout; + vport->els_tmofunc.data = (unsigned long)vport; - /* unmap adapter SLIM and Control Registers */ - iounmap(phba->ctrl_regs_memmap_p); - iounmap(phba->slim_memmap_p); + if (fc_vport != NULL) { + error = scsi_add_host(shost, &fc_vport->dev); + } else { + error = scsi_add_host(shost, &phba->pcidev->dev); + } + if (error) + goto out_put_shost; - pci_release_regions(phba->pcidev); - pci_disable_device(phba->pcidev); + list_add_tail(&vport->listentry, &phba->port_list); + return vport; - idr_remove(&lpfc_hba_index, phba->brd_no); - scsi_host_put(phba->host); +out_put_shost: + scsi_host_put(shost); +out: + return NULL; } -void lpfc_scan_start(struct Scsi_Host *host) +void +destroy_port(struct lpfc_vport *vport) { - struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; - if (lpfc_alloc_sysfs_attr(phba)) - goto error; + kfree(vport->vname); - phba->MBslimaddr = phba->slim_memmap_p; - phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET; - phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET; - phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET; - phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET; + lpfc_debugfs_terminate(vport); + fc_remove_host(shost); + scsi_remove_host(shost); - if (lpfc_sli_hba_setup(phba)) - goto error; + spin_lock_irq(&phba->hbalock); + list_del_init(&vport->listentry); + spin_unlock_irq(&phba->hbalock); - /* - * hba setup may have changed the hba_queue_depth so we need to adjust - * the value of can_queue. - */ - host->can_queue = phba->cfg_hba_queue_depth - 10; + lpfc_cleanup(vport); return; +} -error: - lpfc_remove_device(phba); +int +lpfc_get_instance(void) +{ + int instance = 0; + + /* Assign an unused number */ + if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL)) + return -1; + if (idr_get_new(&lpfc_hba_index, NULL, &instance)) + return -1; + return instance; } +/* + * Note: there is no scan_start function as adapter initialization + * will have asynchronously kicked off the link initialization. + */ + int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time) { - struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + int stat = 0; - if (!phba->host) - return 1; - if (time >= 30 * HZ) + spin_lock_irq(shost->host_lock); + + if (vport->fc_flag & FC_UNLOADING) { + stat = 1; goto finished; + } + if (time >= 30 * HZ) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "%d:0461 Scanning longer than 30 " + "seconds. Continuing initialization\n", + phba->brd_no); + stat = 1; + goto finished; + } + if (time >= 15 * HZ && phba->link_state <= LPFC_LINK_DOWN) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "%d:0465 Link down longer than 15 " + "seconds. Continuing initialization\n", + phba->brd_no); + stat = 1; + goto finished; + } - if (phba->hba_state != LPFC_HBA_READY) - return 0; - if (phba->num_disc_nodes || phba->fc_prli_sent) - return 0; - if ((phba->fc_map_cnt == 0) && (time < 2 * HZ)) - return 0; - if (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) - return 0; - if ((phba->hba_state > LPFC_LINK_DOWN) || (time < 15 * HZ)) - return 0; + if (vport->port_state != LPFC_VPORT_READY) + goto finished; + if (vport->num_disc_nodes || vport->fc_prli_sent) + goto finished; + if (vport->fc_map_cnt == 0 && time < 2 * HZ) + goto finished; + if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0) + goto finished; + + stat = 1; finished: - if (phba->cfg_poll & DISABLE_FCP_RING_INT) { - spin_lock_irq(shost->host_lock); - lpfc_poll_start_timer(phba); spin_unlock_irq(shost->host_lock); - } + return stat; +} +void lpfc_host_attrib_init(struct Scsi_Host *shost) +{ + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; /* - * set fixed host attributes - * Must done after lpfc_sli_hba_setup() + * Set fixed host attributes. Must done after lpfc_sli_hba_setup(). */ - fc_host_node_name(shost) = wwn_to_u64(phba->fc_nodename.u.wwn); - fc_host_port_name(shost) = wwn_to_u64(phba->fc_portname.u.wwn); + fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn); + fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn); fc_host_supported_classes(shost) = FC_COS_CLASS3; memset(fc_host_supported_fc4s(shost), 0, @@ -1475,7 +1679,8 @@ fc_host_supported_fc4s(shost)[2] = 1; fc_host_supported_fc4s(shost)[7] = 1; - lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost)); + lpfc_vport_symbolic_node_name(vport, fc_host_symbolic_name(shost), + sizeof fc_host_symbolic_name(shost)); fc_host_supported_speeds(shost) = 0; if (phba->lmt & LMT_10Gb) @@ -1488,8 +1693,8 @@ fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT; fc_host_maxframe_size(shost) = - ((((uint32_t) phba->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) | - (uint32_t) phba->fc_sparam.cmn.bbRcvSizeLsb); + (((uint32_t) vport->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) | + (uint32_t) vport->fc_sparam.cmn.bbRcvSizeLsb; /* This value is also unchanging */ memset(fc_host_active_fc4s(shost), 0, @@ -1497,20 +1702,20 @@ fc_host_active_fc4s(shost)[2] = 1; fc_host_active_fc4s(shost)[7] = 1; + fc_host_max_npiv_vports(shost) = phba->max_vpi; spin_lock_irq(shost->host_lock); - phba->fc_flag &= ~FC_LOADING; + vport->fc_flag &= ~FC_LOADING; spin_unlock_irq(shost->host_lock); - - return 1; } static int __devinit lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid) { - struct Scsi_Host *host; + struct lpfc_vport *vport = NULL; struct lpfc_hba *phba; struct lpfc_sli *psli; struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL; + struct Scsi_Host *shost = NULL; unsigned long bar0map_len, bar2map_len; int error = -ENODEV, retval; int i; @@ -1521,61 +1726,46 @@ if (pci_request_regions(pdev, LPFC_DRIVER_NAME)) goto out_disable_device; - host = scsi_host_alloc(&lpfc_template, sizeof (struct lpfc_hba)); - if (!host) + phba = kzalloc(sizeof (struct lpfc_hba), GFP_KERNEL); + if (!phba) goto out_release_regions; - phba = (struct lpfc_hba*)host->hostdata; - memset(phba, 0, sizeof (struct lpfc_hba)); - phba->host = host; + spin_lock_init(&phba->hbalock); - phba->fc_flag |= FC_LOADING; phba->pcidev = pdev; /* Assign an unused board number */ - if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL)) - goto out_put_host; + if ((phba->brd_no = lpfc_get_instance()) < 0) + goto out_free_phba; - error = idr_get_new(&lpfc_hba_index, NULL, &phba->brd_no); - if (error) - goto out_put_host; - - host->unique_id = phba->brd_no; + INIT_LIST_HEAD(&phba->port_list); + INIT_LIST_HEAD(&phba->hbq_buffer_list); + /* + * Get all the module params for configuring this host and then + * establish the host. + */ + lpfc_get_cfgparam(phba); + phba->max_vpi = LPFC_MAX_VPI; /* Initialize timers used by driver */ init_timer(&phba->fc_estabtmo); phba->fc_estabtmo.function = lpfc_establish_link_tmo; phba->fc_estabtmo.data = (unsigned long)phba; - init_timer(&phba->fc_disctmo); - phba->fc_disctmo.function = lpfc_disc_timeout; - phba->fc_disctmo.data = (unsigned long)phba; - - init_timer(&phba->fc_fdmitmo); - phba->fc_fdmitmo.function = lpfc_fdmi_tmo; - phba->fc_fdmitmo.data = (unsigned long)phba; - init_timer(&phba->els_tmofunc); - phba->els_tmofunc.function = lpfc_els_timeout; - phba->els_tmofunc.data = (unsigned long)phba; + + init_timer(&phba->hb_tmofunc); + phba->hb_tmofunc.function = lpfc_hb_timeout; + phba->hb_tmofunc.data = (unsigned long)phba; + psli = &phba->sli; init_timer(&psli->mbox_tmo); psli->mbox_tmo.function = lpfc_mbox_timeout; - psli->mbox_tmo.data = (unsigned long)phba; - + psli->mbox_tmo.data = (unsigned long) phba; init_timer(&phba->fcp_poll_timer); phba->fcp_poll_timer.function = lpfc_poll_timeout; - phba->fcp_poll_timer.data = (unsigned long)phba; - - /* - * Get all the module params for configuring this host and then - * establish the host parameters. - */ - lpfc_get_cfgparam(phba); - - host->max_id = LPFC_MAX_TARGET; - host->max_lun = phba->cfg_max_luns; - host->this_id = -1; - - INIT_LIST_HEAD(&phba->fc_nodes); + phba->fcp_poll_timer.data = (unsigned long) phba; + init_timer(&phba->fabric_block_timer); + phba->fabric_block_timer.function = lpfc_fabric_block_timeout; + phba->fabric_block_timer.data = (unsigned long) phba; pci_set_master(pdev); retval = pci_set_mwi(pdev); @@ -1623,13 +1813,22 @@ memset(phba->slim2p, 0, SLI2_SLIM_SIZE); + phba->hbqslimp.virt = dma_alloc_coherent(&phba->pcidev->dev, + lpfc_sli_hbq_size(), + &phba->hbqslimp.phys, + GFP_KERNEL); + if (!phba->hbqslimp.virt) + goto out_free_slim; + + memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size()); + /* Initialize the SLI Layer to run with lpfc HBAs. */ lpfc_sli_setup(phba); lpfc_sli_queue_setup(phba); error = lpfc_mem_alloc(phba); if (error) - goto out_free_slim; + goto out_free_hbqslimp; /* Initialize and populate the iocb list per host. */ INIT_LIST_HEAD(&phba->lpfc_iocb_list); @@ -1653,10 +1852,11 @@ error = -ENOMEM; goto out_free_iocbq; } - spin_lock_irq(phba->host->host_lock); + + spin_lock_irq(&phba->hbalock); list_add(&iocbq_entry->list, &phba->lpfc_iocb_list); phba->total_iocbq_bufs++; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); } /* Initialize HBA structure */ @@ -1677,22 +1877,22 @@ goto out_free_iocbq; } - /* - * Set initial can_queue value since 0 is no longer supported and - * scsi_add_host will fail. This will be adjusted later based on the - * max xri value determined in hba setup. - */ - host->can_queue = phba->cfg_hba_queue_depth - 10; - - /* Tell the midlayer we support 16 byte commands */ - host->max_cmd_len = 16; - /* Initialize the list of scsi buffers used by driver for scsi IO. */ spin_lock_init(&phba->scsi_buf_list_lock); INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list); - host->transportt = lpfc_transport_template; - pci_set_drvdata(pdev, host); + /* Initialize list of fabric iocbs */ + INIT_LIST_HEAD(&phba->fabric_iocb_list); + + vport = lpfc_create_port(phba, phba->brd_no, NULL); + if (!vport) + goto out_kthread_stop; + + shost = lpfc_shost_from_vport(vport); + phba->pport = vport; + lpfc_debugfs_initialize(vport); + + pci_set_drvdata(pdev, shost); if (phba->cfg_use_msi) { error = pci_enable_msi(phba->pcidev); @@ -1708,33 +1908,63 @@ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0451 Enable interrupt handler failed\n", phba->brd_no); - goto out_kthread_stop; + goto out_disable_msi; } - error = scsi_add_host(host, &pdev->dev); - if (error) + phba->MBslimaddr = phba->slim_memmap_p; + phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET; + phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET; + phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET; + phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET; + + if (lpfc_alloc_sysfs_attr(vport)) goto out_free_irq; - scsi_scan_host(host); + if (lpfc_sli_hba_setup(phba)) + goto out_remove_device; + + /* + * hba setup may have changed the hba_queue_depth so we need to adjust + * the value of can_queue. + */ + shost->can_queue = phba->cfg_hba_queue_depth - 10; + + lpfc_host_attrib_init(shost); + + if (phba->cfg_poll & DISABLE_FCP_RING_INT) { + spin_lock_irq(shost->host_lock); + lpfc_poll_start_timer(phba); + spin_unlock_irq(shost->host_lock); + } + + scsi_scan_host(shost); return 0; +out_remove_device: + lpfc_free_sysfs_attr(vport); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_UNLOADING; + spin_unlock_irq(shost->host_lock); out_free_irq: - lpfc_stop_timer(phba); - phba->work_hba_events = 0; + lpfc_stop_phba_timers(phba); + phba->pport->work_port_events = 0; free_irq(phba->pcidev->irq, phba); +out_disable_msi: pci_disable_msi(phba->pcidev); + destroy_port(vport); out_kthread_stop: kthread_stop(phba->worker_thread); out_free_iocbq: list_for_each_entry_safe(iocbq_entry, iocbq_next, &phba->lpfc_iocb_list, list) { - spin_lock_irq(phba->host->host_lock); kfree(iocbq_entry); phba->total_iocbq_bufs--; - spin_unlock_irq(phba->host->host_lock); } lpfc_mem_free(phba); +out_free_hbqslimp: + dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt, + phba->hbqslimp.phys); out_free_slim: dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE, phba->slim2p, phba->slim2p_mapping); @@ -1744,27 +1974,85 @@ iounmap(phba->slim_memmap_p); out_idr_remove: idr_remove(&lpfc_hba_index, phba->brd_no); -out_put_host: - phba->host = NULL; - scsi_host_put(host); +out_free_phba: + kfree(phba); out_release_regions: pci_release_regions(pdev); out_disable_device: pci_disable_device(pdev); out: pci_set_drvdata(pdev, NULL); + if (shost) + scsi_host_put(shost); return error; } static void __devexit lpfc_pci_remove_one(struct pci_dev *pdev) { - struct Scsi_Host *host = pci_get_drvdata(pdev); - struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata; + struct Scsi_Host *shost = pci_get_drvdata(pdev); + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; + struct lpfc_vport *port_iterator; + list_for_each_entry(port_iterator, &phba->port_list, listentry) + port_iterator->load_flag |= FC_UNLOADING; + + kfree(vport->vname); + lpfc_free_sysfs_attr(vport); - lpfc_remove_device(phba); + fc_remove_host(shost); + scsi_remove_host(shost); + + /* + * Bring down the SLI Layer. This step disable all interrupts, + * clears the rings, discards all mailbox commands, and resets + * the HBA. + */ + lpfc_sli_hba_down(phba); + lpfc_sli_brdrestart(phba); + + lpfc_stop_phba_timers(phba); + spin_lock_irq(&phba->hbalock); + list_del_init(&vport->listentry); + spin_unlock_irq(&phba->hbalock); + + + lpfc_debugfs_terminate(vport); + lpfc_cleanup(vport); + + kthread_stop(phba->worker_thread); + + /* Release the irq reservation */ + free_irq(phba->pcidev->irq, phba); + pci_disable_msi(phba->pcidev); pci_set_drvdata(pdev, NULL); + scsi_host_put(shost); + + /* + * Call scsi_free before mem_free since scsi bufs are released to their + * corresponding pools here. + */ + lpfc_scsi_free(phba); + lpfc_mem_free(phba); + + dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt, + phba->hbqslimp.phys); + + /* Free resources associated with SLI2 interface */ + dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE, + phba->slim2p, phba->slim2p_mapping); + + /* unmap adapter SLIM and Control Registers */ + iounmap(phba->ctrl_regs_memmap_p); + iounmap(phba->slim_memmap_p); + + idr_remove(&lpfc_hba_index, phba->brd_no); + + kfree(phba); + + pci_release_regions(pdev); + pci_disable_device(pdev); } /** @@ -1822,10 +2110,13 @@ pci_set_master(pdev); /* Re-establishing Link */ - spin_lock_irq(phba->host->host_lock); - phba->fc_flag |= FC_ESTABLISH_LINK; + spin_lock_irq(host->host_lock); + phba->pport->fc_flag |= FC_ESTABLISH_LINK; + spin_unlock_irq(host->host_lock); + + spin_lock_irq(&phba->hbalock); psli->sli_flag &= ~LPFC_SLI2_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); /* Take device offline; this will perform cleanup */ @@ -1948,11 +2239,15 @@ lpfc_transport_template = fc_attach_transport(&lpfc_transport_functions); - if (!lpfc_transport_template) + lpfc_vport_transport_template = + fc_attach_transport(&lpfc_vport_transport_functions); + if (!lpfc_transport_template || !lpfc_vport_transport_template) return -ENOMEM; error = pci_register_driver(&lpfc_driver); - if (error) + if (error) { fc_release_transport(lpfc_transport_template); + fc_release_transport(lpfc_vport_transport_template); + } return error; } @@ -1962,6 +2257,7 @@ { pci_unregister_driver(&lpfc_driver); fc_release_transport(lpfc_transport_template); + fc_release_transport(lpfc_vport_transport_template); } module_init(lpfc_init); diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_logmsg.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_logmsg.h 2007-12-21 15:36:12.000000000 -0500 @@ -30,6 +30,7 @@ #define LOG_SLI 0x800 /* SLI events */ #define LOG_FCP_ERROR 0x1000 /* log errors, not underruns */ #define LOG_LIBDFC 0x2000 /* Libdfc events */ +#define LOG_VPORT 0x4000 /* NPIV events */ #define LOG_ALL_MSG 0xffff /* LOG all messages */ #define lpfc_printf_log(phba, level, mask, fmt, arg...) \ diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mbox.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mbox.c 2007-12-21 15:36:12.000000000 -0500 @@ -82,6 +82,22 @@ } /**********************************************/ +/* lpfc_heart_beat Issue a HEART_BEAT */ +/* mailbox command */ +/**********************************************/ +void +lpfc_heart_beat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +{ + MAILBOX_t *mb; + + mb = &pmb->mb; + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + mb->mbxCommand = MBX_HEARTBEAT; + mb->mbxOwner = OWN_HOST; + return; +} + +/**********************************************/ /* lpfc_read_la Issue a READ LA */ /* mailbox command */ /**********************************************/ @@ -134,6 +150,7 @@ void lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) { + struct lpfc_vport *vport = phba->pport; MAILBOX_t *mb = &pmb->mb; memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); @@ -147,7 +164,7 @@ mb->un.varCfgLnk.cr_count = phba->cfg_cr_count; } - mb->un.varCfgLnk.myId = phba->fc_myDID; + mb->un.varCfgLnk.myId = vport->fc_myDID; mb->un.varCfgLnk.edtov = phba->fc_edtov; mb->un.varCfgLnk.arbtov = phba->fc_arbtov; mb->un.varCfgLnk.ratov = phba->fc_ratov; @@ -239,7 +256,7 @@ /* mailbox command */ /**********************************************/ int -lpfc_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi) { struct lpfc_dmabuf *mp; MAILBOX_t *mb; @@ -270,6 +287,7 @@ mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm); mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys); mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys); + mb->un.varRdSparm.vpi = vpi; /* save address for completion */ pmb->context1 = mp; @@ -282,7 +300,8 @@ /* mailbox command */ /********************************************/ void -lpfc_unreg_did(struct lpfc_hba * phba, uint32_t did, LPFC_MBOXQ_t * pmb) +lpfc_unreg_did(struct lpfc_hba * phba, uint16_t vpi, uint32_t did, + LPFC_MBOXQ_t * pmb) { MAILBOX_t *mb; @@ -290,6 +309,7 @@ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); mb->un.varUnregDID.did = did; + mb->un.varUnregDID.vpi = vpi; mb->mbxCommand = MBX_UNREG_D_ID; mb->mbxOwner = OWN_HOST; @@ -335,19 +355,17 @@ /* mailbox command */ /********************************************/ int -lpfc_reg_login(struct lpfc_hba * phba, - uint32_t did, uint8_t * param, LPFC_MBOXQ_t * pmb, uint32_t flag) +lpfc_reg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t did, + uint8_t *param, LPFC_MBOXQ_t *pmb, uint32_t flag) { + MAILBOX_t *mb = &pmb->mb; uint8_t *sparam; struct lpfc_dmabuf *mp; - MAILBOX_t *mb; - struct lpfc_sli *psli; - psli = &phba->sli; - mb = &pmb->mb; memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); mb->un.varRegLogin.rpi = 0; + mb->un.varRegLogin.vpi = vpi; mb->un.varRegLogin.did = did; mb->un.varWords[30] = flag; /* Set flag to issue action on cmpl */ @@ -359,12 +377,10 @@ kfree(mp); mb->mbxCommand = MBX_REG_LOGIN64; /* REG_LOGIN: no buffers */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_MBOX, - "%d:0302 REG_LOGIN: no buffers Data x%x x%x\n", - phba->brd_no, - (uint32_t) did, (uint32_t) flag); + lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX, + "%d (%d):0302 REG_LOGIN: no buffers, DID x%x, " + "flag x%x\n", + phba->brd_no, vpi, did, flag); return (1); } INIT_LIST_HEAD(&mp->list); @@ -389,7 +405,8 @@ /* mailbox command */ /**********************************************/ void -lpfc_unreg_login(struct lpfc_hba * phba, uint32_t rpi, LPFC_MBOXQ_t * pmb) +lpfc_unreg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t rpi, + LPFC_MBOXQ_t * pmb) { MAILBOX_t *mb; @@ -398,12 +415,52 @@ mb->un.varUnregLogin.rpi = (uint16_t) rpi; mb->un.varUnregLogin.rsvd1 = 0; + mb->un.varUnregLogin.vpi = vpi; mb->mbxCommand = MBX_UNREG_LOGIN; mb->mbxOwner = OWN_HOST; return; } +/**************************************************/ +/* lpfc_reg_vpi Issue a REG_VPI */ +/* mailbox command */ +/**************************************************/ +void +lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid, + LPFC_MBOXQ_t *pmb) +{ + MAILBOX_t *mb = &pmb->mb; + + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + + mb->un.varRegVpi.vpi = vpi; + mb->un.varRegVpi.sid = sid; + + mb->mbxCommand = MBX_REG_VPI; + mb->mbxOwner = OWN_HOST; + return; + +} + +/**************************************************/ +/* lpfc_unreg_vpi Issue a UNREG_VNPI */ +/* mailbox command */ +/**************************************************/ +void +lpfc_unreg_vpi(struct lpfc_hba *phba, uint16_t vpi, LPFC_MBOXQ_t *pmb) +{ + MAILBOX_t *mb = &pmb->mb; + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + + mb->un.varUnregVpi.vpi = vpi; + + mb->mbxCommand = MBX_UNREG_VPI; + mb->mbxOwner = OWN_HOST; + return; + +} + static void lpfc_config_pcb_setup(struct lpfc_hba * phba) { @@ -412,14 +469,18 @@ PCB_t *pcbp = &phba->slim2p->pcb; dma_addr_t pdma_addr; uint32_t offset; - uint32_t iocbCnt; + uint32_t iocbCnt = 0; int i; pcbp->maxRing = (psli->num_rings - 1); - iocbCnt = 0; for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; + + pring->sizeCiocb = phba->sli_rev == 3 ? SLI3_IOCB_CMD_SIZE: + SLI2_IOCB_CMD_SIZE; + pring->sizeRiocb = phba->sli_rev == 3 ? SLI3_IOCB_RSP_SIZE: + SLI2_IOCB_RSP_SIZE; /* A ring MUST have both cmd and rsp entries defined to be valid */ if ((pring->numCiocb == 0) || (pring->numRiocb == 0)) { @@ -434,20 +495,18 @@ continue; } /* Command ring setup for ring */ - pring->cmdringaddr = - (void *)&phba->slim2p->IOCBs[iocbCnt]; + pring->cmdringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt]; pcbp->rdsc[i].cmdEntries = pring->numCiocb; - offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] - - (uint8_t *)phba->slim2p; + offset = (uint8_t *) &phba->slim2p->IOCBs[iocbCnt] - + (uint8_t *) phba->slim2p; pdma_addr = phba->slim2p_mapping + offset; pcbp->rdsc[i].cmdAddrHigh = putPaddrHigh(pdma_addr); pcbp->rdsc[i].cmdAddrLow = putPaddrLow(pdma_addr); iocbCnt += pring->numCiocb; /* Response ring setup for ring */ - pring->rspringaddr = - (void *)&phba->slim2p->IOCBs[iocbCnt]; + pring->rspringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt]; pcbp->rdsc[i].rspEntries = pring->numRiocb; offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] - @@ -462,16 +521,108 @@ void lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) { - MAILBOX_t *mb; - - mb = &pmb->mb; + MAILBOX_t *mb = &pmb->mb; memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); mb->un.varRdRev.cv = 1; + mb->un.varRdRev.v3req = 1; /* Request SLI3 info */ mb->mbxCommand = MBX_READ_REV; mb->mbxOwner = OWN_HOST; return; } +static void +lpfc_build_hbq_profile2(struct config_hbq_var *hbqmb, + struct lpfc_hbq_init *hbq_desc) +{ + hbqmb->profiles.profile2.seqlenbcnt = hbq_desc->seqlenbcnt; + hbqmb->profiles.profile2.maxlen = hbq_desc->maxlen; + hbqmb->profiles.profile2.seqlenoff = hbq_desc->seqlenoff; +} + +static void +lpfc_build_hbq_profile3(struct config_hbq_var *hbqmb, + struct lpfc_hbq_init *hbq_desc) +{ + hbqmb->profiles.profile3.seqlenbcnt = hbq_desc->seqlenbcnt; + hbqmb->profiles.profile3.maxlen = hbq_desc->maxlen; + hbqmb->profiles.profile3.cmdcodeoff = hbq_desc->cmdcodeoff; + hbqmb->profiles.profile3.seqlenoff = hbq_desc->seqlenoff; + memcpy(&hbqmb->profiles.profile3.cmdmatch, hbq_desc->cmdmatch, + sizeof(hbqmb->profiles.profile3.cmdmatch)); +} + +static void +lpfc_build_hbq_profile5(struct config_hbq_var *hbqmb, + struct lpfc_hbq_init *hbq_desc) +{ + hbqmb->profiles.profile5.seqlenbcnt = hbq_desc->seqlenbcnt; + hbqmb->profiles.profile5.maxlen = hbq_desc->maxlen; + hbqmb->profiles.profile5.cmdcodeoff = hbq_desc->cmdcodeoff; + hbqmb->profiles.profile5.seqlenoff = hbq_desc->seqlenoff; + memcpy(&hbqmb->profiles.profile5.cmdmatch, hbq_desc->cmdmatch, + sizeof(hbqmb->profiles.profile5.cmdmatch)); +} + +void +lpfc_config_hbq(struct lpfc_hba *phba, struct lpfc_hbq_init *hbq_desc, + uint32_t hbq_entry_index, LPFC_MBOXQ_t *pmb) +{ + int i; + MAILBOX_t *mb = &pmb->mb; + struct config_hbq_var *hbqmb = &mb->un.varCfgHbq; + + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + hbqmb->entry_count = hbq_desc->entry_count; /* # entries in HBQ */ + hbqmb->recvNotify = hbq_desc->rn; /* Receive + * Notification */ + hbqmb->numMask = hbq_desc->mask_count; /* # R_CTL/TYPE masks + * # in words 0-19 */ + hbqmb->profile = hbq_desc->profile; /* Selection profile: + * 0 = all, + * 7 = logentry */ + hbqmb->ringMask = hbq_desc->ring_mask; /* Binds HBQ to a ring + * e.g. Ring0=b0001, + * ring2=b0100 */ + hbqmb->headerLen = hbq_desc->headerLen; /* 0 if not profile 4 + * or 5 */ + hbqmb->logEntry = hbq_desc->logEntry; /* Set to 1 if this + * HBQ will be used + * for LogEntry + * buffers */ + hbqmb->hbqaddrLow = putPaddrLow(phba->hbqslimp.phys) + + hbq_entry_index * sizeof(struct lpfc_hbq_entry); + hbqmb->hbqaddrHigh = putPaddrHigh(phba->hbqslimp.phys); + + mb->mbxCommand = MBX_CONFIG_HBQ; + mb->mbxOwner = OWN_HOST; + + /* Copy info for profiles 2,3,5. Other + * profiles this area is reserved + */ + if (hbq_desc->profile == 2) + lpfc_build_hbq_profile2(hbqmb, hbq_desc); + else if (hbq_desc->profile == 3) + lpfc_build_hbq_profile3(hbqmb, hbq_desc); + else if (hbq_desc->profile == 5) + lpfc_build_hbq_profile5(hbqmb, hbq_desc); + + /* Return if no rctl / type masks for this HBQ */ + if (!hbq_desc->mask_count) + return; + + /* Otherwise we setup specific rctl / type masks for this HBQ */ + for (i = 0; i < hbq_desc->mask_count; i++) { + hbqmb->hbqMasks[i].tmatch = hbq_desc->hbqMasks[i].tmatch; + hbqmb->hbqMasks[i].tmask = hbq_desc->hbqMasks[i].tmask; + hbqmb->hbqMasks[i].rctlmatch = hbq_desc->hbqMasks[i].rctlmatch; + hbqmb->hbqMasks[i].rctlmask = hbq_desc->hbqMasks[i].rctlmask; + } + + return; +} + + + void lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb) { @@ -514,15 +665,16 @@ } void -lpfc_config_port(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { + MAILBOX_t __iomem *mb_slim = (MAILBOX_t __iomem *) phba->MBslimaddr; MAILBOX_t *mb = &pmb->mb; dma_addr_t pdma_addr; uint32_t bar_low, bar_high; size_t offset; struct lpfc_hgp hgp; - void __iomem *to_slim; int i; + uint32_t pgp_offset; memset(pmb, 0, sizeof(LPFC_MBOXQ_t)); mb->mbxCommand = MBX_CONFIG_PORT; @@ -535,12 +687,29 @@ mb->un.varCfgPort.pcbLow = putPaddrLow(pdma_addr); mb->un.varCfgPort.pcbHigh = putPaddrHigh(pdma_addr); + /* If HBA supports SLI=3 ask for it */ + + if (phba->sli_rev == 3 && phba->vpd.sli3Feat.cerbm) { + mb->un.varCfgPort.cerbm = 1; /* Request HBQs */ + mb->un.varCfgPort.max_hbq = 1; /* Requesting 2 HBQs */ + if (phba->max_vpi && phba->cfg_npiv_enable && + phba->vpd.sli3Feat.cmv) { + mb->un.varCfgPort.max_vpi = phba->max_vpi; + mb->un.varCfgPort.cmv = 1; + phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED; + } else + mb->un.varCfgPort.max_vpi = phba->max_vpi = 0; + } else + phba->sli_rev = 2; + mb->un.varCfgPort.sli_mode = phba->sli_rev; + /* Now setup pcb */ phba->slim2p->pcb.type = TYPE_NATIVE_SLI2; phba->slim2p->pcb.feature = FEATURE_INITIAL_SLI2; /* Setup Mailbox pointers */ - phba->slim2p->pcb.mailBoxSize = sizeof(MAILBOX_t); + phba->slim2p->pcb.mailBoxSize = offsetof(MAILBOX_t, us) + + sizeof(struct sli2_desc); offset = (uint8_t *)&phba->slim2p->mbx - (uint8_t *)phba->slim2p; pdma_addr = phba->slim2p_mapping + offset; phba->slim2p->pcb.mbAddrHigh = putPaddrHigh(pdma_addr); @@ -568,29 +737,70 @@ pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_0, &bar_low); pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_1, &bar_high); + /* + * Set up HGP - Port Memory + * + * The port expects the host get/put pointers to reside in memory + * following the "non-diagnostic" mode mailbox (32 words, 0x80 bytes) + * area of SLIM. In SLI-2 mode, there's an additional 16 reserved + * words (0x40 bytes). This area is not reserved if HBQs are + * configured in SLI-3. + * + * CR0Put - SLI2(no HBQs) = 0xc0, With HBQs = 0x80 + * RR0Get 0xc4 0x84 + * CR1Put 0xc8 0x88 + * RR1Get 0xcc 0x8c + * CR2Put 0xd0 0x90 + * RR2Get 0xd4 0x94 + * CR3Put 0xd8 0x98 + * RR3Get 0xdc 0x9c + * + * Reserved 0xa0-0xbf + * If HBQs configured: + * HBQ 0 Put ptr 0xc0 + * HBQ 1 Put ptr 0xc4 + * HBQ 2 Put ptr 0xc8 + * ...... + * HBQ(M-1)Put Pointer 0xc0+(M-1)*4 + * + */ + + if (phba->sli_rev == 3) { + phba->host_gp = &mb_slim->us.s3.host[0]; + phba->hbq_put = &mb_slim->us.s3.hbq_put[0]; + } else { + phba->host_gp = &mb_slim->us.s2.host[0]; + phba->hbq_put = NULL; + } /* mask off BAR0's flag bits 0 - 3 */ phba->slim2p->pcb.hgpAddrLow = (bar_low & PCI_BASE_ADDRESS_MEM_MASK) + - (SLIMOFF*sizeof(uint32_t)); + (void __iomem *) phba->host_gp - + (void __iomem *)phba->MBslimaddr; if (bar_low & PCI_BASE_ADDRESS_MEM_TYPE_64) phba->slim2p->pcb.hgpAddrHigh = bar_high; else phba->slim2p->pcb.hgpAddrHigh = 0; /* write HGP data to SLIM at the required longword offset */ memset(&hgp, 0, sizeof(struct lpfc_hgp)); - to_slim = phba->MBslimaddr + (SLIMOFF*sizeof (uint32_t)); for (i=0; i < phba->sli.num_rings; i++) { - lpfc_memcpy_to_slim(to_slim, &hgp, sizeof(struct lpfc_hgp)); - to_slim += sizeof (struct lpfc_hgp); + lpfc_memcpy_to_slim(phba->host_gp + i, &hgp, + sizeof(*phba->host_gp)); } /* Setup Port Group ring pointer */ - offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port - + if (phba->sli_rev == 3) + pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s3_pgp.port - (uint8_t *)phba->slim2p; - pdma_addr = phba->slim2p_mapping + offset; + else + pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port - + (uint8_t *)phba->slim2p; + + pdma_addr = phba->slim2p_mapping + pgp_offset; phba->slim2p->pcb.pgpAddrHigh = putPaddrHigh(pdma_addr); phba->slim2p->pcb.pgpAddrLow = putPaddrLow(pdma_addr); + phba->hbq_get = &phba->slim2p->mbx.us.s3_pgp.hbq_get[0]; /* Use callback routine to setp rings in the pcb */ lpfc_config_pcb_setup(phba); @@ -606,11 +816,7 @@ /* Swap PCB if needed */ lpfc_sli_pcimem_bcopy(&phba->slim2p->pcb, &phba->slim2p->pcb, - sizeof (PCB_t)); - - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "%d:0405 Service Level Interface (SLI) 2 selected\n", - phba->brd_no); + sizeof(PCB_t)); } void @@ -644,15 +850,23 @@ LPFC_MBOXQ_t *mbq = NULL; struct lpfc_sli *psli = &phba->sli; - list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t, - list); - if (mbq) { + list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t, list); + if (mbq) psli->mboxq_cnt--; - } return mbq; } +void +lpfc_mbox_cmpl_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq) +{ + /* This function expects to be called from interupt context */ + spin_lock(&phba->hbalock); + list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl); + spin_unlock(&phba->hbalock); + return; +} + int lpfc_mbox_tmo_val(struct lpfc_hba *phba, int cmd) { diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mem.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mem.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2005 Emulex. All rights reserved. * + * Copyright (C) 2004-2006 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -38,10 +38,13 @@ #define LPFC_MBUF_POOL_SIZE 64 /* max elements in MBUF safety pool */ #define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */ + + int lpfc_mem_alloc(struct lpfc_hba * phba) { struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; + int longs; int i; phba->lpfc_scsi_dma_buf_pool = pci_pool_create("lpfc_scsi_dma_buf_pool", @@ -80,10 +83,27 @@ if (!phba->nlp_mem_pool) goto fail_free_mbox_pool; + phba->lpfc_hbq_pool = pci_pool_create("lpfc_hbq_pool",phba->pcidev, + LPFC_BPL_SIZE, 8, 0); + if (!phba->lpfc_hbq_pool) + goto fail_free_nlp_mem_pool; + + /* vpi zero is reserved for the physical port so add 1 to max */ + longs = ((phba->max_vpi + 1) + BITS_PER_LONG - 1) / BITS_PER_LONG; + phba->vpi_bmask = kzalloc(longs * sizeof(unsigned long), GFP_KERNEL); + if (!phba->vpi_bmask) + goto fail_free_hbq_pool; + return 0; + fail_free_hbq_pool: + lpfc_sli_hbqbuf_free_all(phba); + fail_free_nlp_mem_pool: + mempool_destroy(phba->nlp_mem_pool); + phba->nlp_mem_pool = NULL; fail_free_mbox_pool: mempool_destroy(phba->mbox_mem_pool); + phba->mbox_mem_pool = NULL; fail_free_mbuf_pool: while (i--) pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt, @@ -91,8 +111,10 @@ kfree(pool->elements); fail_free_lpfc_mbuf_pool: pci_pool_destroy(phba->lpfc_mbuf_pool); + phba->lpfc_mbuf_pool = NULL; fail_free_dma_buf_pool: pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool); + phba->lpfc_scsi_dma_buf_pool = NULL; fail: return -ENOMEM; } @@ -106,6 +128,9 @@ struct lpfc_dmabuf *mp; int i; + kfree(phba->vpi_bmask); + lpfc_sli_hbqbuf_free_all(phba); + list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) { mp = (struct lpfc_dmabuf *) (mbox->context1); if (mp) { @@ -115,6 +140,15 @@ list_del(&mbox->list); mempool_free(mbox, phba->mbox_mem_pool); } + list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) { + mp = (struct lpfc_dmabuf *) (mbox->context1); + if (mp) { + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + } + list_del(&mbox->list); + mempool_free(mbox, phba->mbox_mem_pool); + } psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; if (psli->mbox_active) { @@ -132,12 +166,20 @@ pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt, pool->elements[i].phys); kfree(pool->elements); + + pci_pool_destroy(phba->lpfc_hbq_pool); mempool_destroy(phba->nlp_mem_pool); mempool_destroy(phba->mbox_mem_pool); pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool); pci_pool_destroy(phba->lpfc_mbuf_pool); + phba->lpfc_hbq_pool = NULL; + phba->nlp_mem_pool = NULL; + phba->mbox_mem_pool = NULL; + phba->lpfc_scsi_dma_buf_pool = NULL; + phba->lpfc_mbuf_pool = NULL; + /* Free the iocb lookup array */ kfree(psli->iocbq_lookup); psli->iocbq_lookup = NULL; @@ -148,20 +190,23 @@ lpfc_mbuf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle) { struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; + unsigned long iflags; void *ret; ret = pci_pool_alloc(phba->lpfc_mbuf_pool, GFP_KERNEL, handle); - if (!ret && ( mem_flags & MEM_PRI) && pool->current_count) { + spin_lock_irqsave(&phba->hbalock, iflags); + if (!ret && (mem_flags & MEM_PRI) && pool->current_count) { pool->current_count--; ret = pool->elements[pool->current_count].virt; *handle = pool->elements[pool->current_count].phys; } + spin_unlock_irqrestore(&phba->hbalock, iflags); return ret; } void -lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma) +__lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma) { struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; @@ -174,3 +219,51 @@ } return; } + +void +lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma) +{ + unsigned long iflags; + + spin_lock_irqsave(&phba->hbalock, iflags); + __lpfc_mbuf_free(phba, virt, dma); + spin_unlock_irqrestore(&phba->hbalock, iflags); + return; +} + +void * +lpfc_hbq_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle) +{ + void *ret; + ret = pci_pool_alloc(phba->lpfc_hbq_pool, GFP_ATOMIC, handle); + return ret; +} + +void +lpfc_hbq_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma) +{ + pci_pool_free(phba->lpfc_hbq_pool, virt, dma); + return; +} + +void +lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp) +{ + struct hbq_dmabuf *hbq_entry; + + if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { + hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf); + if (hbq_entry->tag == -1) { + lpfc_hbq_free(phba, hbq_entry->dbuf.virt, + hbq_entry->dbuf.phys); + kfree(hbq_entry); + } else { + lpfc_sli_free_hbq(phba, hbq_entry); + } + } else { + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + } + return; +} + diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_nportdisc.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_nportdisc.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,4 +1,4 @@ -/******************************************************************* + /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * * Copyright (C) 2004-2007 Emulex. All rights reserved. * @@ -35,20 +35,22 @@ #include "lpfc.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" +#include "lpfc_vport.h" +#include "lpfc_debugfs.h" /* Called to verify a rcv'ed ADISC was intended for us. */ static int -lpfc_check_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, - struct lpfc_name * nn, struct lpfc_name * pn) +lpfc_check_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct lpfc_name *nn, struct lpfc_name *pn) { /* Compare the ADISC rsp WWNN / WWPN matches our internal node * table entry for that node. */ - if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)) != 0) + if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name))) return 0; - if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)) != 0) + if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name))) return 0; /* we match, return success */ @@ -56,11 +58,10 @@ } int -lpfc_check_sparm(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, struct serv_parm * sp, - uint32_t class) +lpfc_check_sparm(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct serv_parm * sp, uint32_t class) { - volatile struct serv_parm *hsp = &phba->fc_sparam; + volatile struct serv_parm *hsp = &vport->fc_sparam; uint16_t hsp_value, ssp_value = 0; /* @@ -75,12 +76,14 @@ hsp->cls1.rcvDataSizeLsb; ssp_value = (sp->cls1.rcvDataSizeMsb << 8) | sp->cls1.rcvDataSizeLsb; + if (!ssp_value) + goto bad_service_param; if (ssp_value > hsp_value) { sp->cls1.rcvDataSizeLsb = hsp->cls1.rcvDataSizeLsb; sp->cls1.rcvDataSizeMsb = hsp->cls1.rcvDataSizeMsb; } } else if (class == CLASS1) { - return 0; + goto bad_service_param; } if (sp->cls2.classValid) { @@ -88,12 +91,14 @@ hsp->cls2.rcvDataSizeLsb; ssp_value = (sp->cls2.rcvDataSizeMsb << 8) | sp->cls2.rcvDataSizeLsb; + if (!ssp_value) + goto bad_service_param; if (ssp_value > hsp_value) { sp->cls2.rcvDataSizeLsb = hsp->cls2.rcvDataSizeLsb; sp->cls2.rcvDataSizeMsb = hsp->cls2.rcvDataSizeMsb; } } else if (class == CLASS2) { - return 0; + goto bad_service_param; } if (sp->cls3.classValid) { @@ -101,12 +106,14 @@ hsp->cls3.rcvDataSizeLsb; ssp_value = (sp->cls3.rcvDataSizeMsb << 8) | sp->cls3.rcvDataSizeLsb; + if (!ssp_value) + goto bad_service_param; if (ssp_value > hsp_value) { sp->cls3.rcvDataSizeLsb = hsp->cls3.rcvDataSizeLsb; sp->cls3.rcvDataSizeMsb = hsp->cls3.rcvDataSizeMsb; } } else if (class == CLASS3) { - return 0; + goto bad_service_param; } /* @@ -125,11 +132,21 @@ memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name)); memcpy(&ndlp->nlp_portname, &sp->portName, sizeof (struct lpfc_name)); return 1; +bad_service_param: + lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0207 Device %x " + "(%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x) sent " + "invalid service parameters. Ignoring device.\n", + vport->phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID, + sp->nodeName.u.wwn[0], sp->nodeName.u.wwn[1], + sp->nodeName.u.wwn[2], sp->nodeName.u.wwn[3], + sp->nodeName.u.wwn[4], sp->nodeName.u.wwn[5], + sp->nodeName.u.wwn[6], sp->nodeName.u.wwn[7]); + return 0; } static void * -lpfc_check_elscmpl_iocb(struct lpfc_hba * phba, - struct lpfc_iocbq *cmdiocb, +lpfc_check_elscmpl_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb) { struct lpfc_dmabuf *pcmd, *prsp; @@ -168,32 +185,29 @@ * routine effectively results in a "software abort". */ int -lpfc_els_abort(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) +lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) { LIST_HEAD(completions); - struct lpfc_sli *psli; - struct lpfc_sli_ring *pring; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; struct lpfc_iocbq *iocb, *next_iocb; IOCB_t *cmd; /* Abort outstanding I/O on NPort */ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0205 Abort outstanding I/O on NPort x%x " + "%d (%d):0205 Abort outstanding I/O on NPort x%x " "Data: x%x x%x x%x\n", - phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_state, ndlp->nlp_rpi); + phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); - psli = &phba->sli; - pring = &psli->ring[LPFC_ELS_RING]; + lpfc_fabric_abort_nport(ndlp); /* First check the txq */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { - /* Check to see if iocb matches the nport we are looking - for */ + /* Check to see if iocb matches the nport we are looking for */ if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) { - /* It matches, so deque and call compl with an - error */ + /* It matches, so deque and call compl with anp error */ list_move_tail(&iocb->list, &completions); pring->txq_cnt--; } @@ -201,37 +215,39 @@ /* Next check the txcmplq */ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { - /* Check to see if iocb matches the nport we are looking - for */ - if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) + /* Check to see if iocb matches the nport we are looking for */ + if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) { lpfc_sli_issue_abort_iotag(phba, pring, iocb); } - spin_unlock_irq(phba->host->host_lock); + } + spin_unlock_irq(&phba->hbalock); while (!list_empty(&completions)) { iocb = list_get_first(&completions, struct lpfc_iocbq, list); cmd = &iocb->iocb; - list_del(&iocb->list); + list_del_init(&iocb->list); - if (iocb->iocb_cmpl) { + if (!iocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, iocb); + else { cmd->ulpStatus = IOSTAT_LOCAL_REJECT; cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; (iocb->iocb_cmpl) (phba, iocb, iocb); - } else - lpfc_sli_release_iocbq(phba, iocb); + } } /* If we are delaying issuing an ELS command, cancel it */ if (ndlp->nlp_flag & NLP_DELAY_TMO) - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(phba->pport, ndlp); return 0; } static int -lpfc_rcv_plogi(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, +lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_iocbq *cmdiocb) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; struct lpfc_dmabuf *pcmd; uint32_t *lp; IOCB_t *icmd; @@ -241,14 +257,14 @@ int rc; memset(&stat, 0, sizeof (struct ls_rjt)); - if (phba->hba_state <= LPFC_FLOGI) { + if (vport->port_state <= LPFC_FLOGI) { /* Before responding to PLOGI, check for pt2pt mode. * If we are pt2pt, with an outstanding FLOGI, abort * the FLOGI and resend it first. */ - if (phba->fc_flag & FC_PT2PT) { + if (vport->fc_flag & FC_PT2PT) { lpfc_els_abort_flogi(phba); - if (!(phba->fc_flag & FC_PT2PT_PLOGI)) { + if (!(vport->fc_flag & FC_PT2PT_PLOGI)) { /* If the other side is supposed to initiate * the PLOGI anyway, just ACC it now and * move on with discovery. @@ -257,45 +273,42 @@ phba->fc_ratov = FF_DEF_RATOV; /* Start discovery - this should just do CLEAR_LA */ - lpfc_disc_start(phba); - } else { - lpfc_initial_flogi(phba); - } + lpfc_disc_start(vport); + } else + lpfc_initial_flogi(vport); } else { stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY; stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, - ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, + ndlp, NULL); return 0; } } pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; lp = (uint32_t *) pcmd->virt; sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); - if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3) == 0)) { + if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3) == 0)) { /* Reject this request because invalid parameters */ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, + NULL); return 0; } icmd = &cmdiocb->iocb; /* PLOGI chkparm OK */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_ELS, - "%d:0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n", - phba->brd_no, + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag, ndlp->nlp_rpi); - if ((phba->cfg_fcp_class == 2) && - (sp->cls2.classValid)) { + if (phba->cfg_fcp_class == 2 && sp->cls2.classValid) ndlp->nlp_fcp_info |= CLASS2; - } else { + else ndlp->nlp_fcp_info |= CLASS3; - } + ndlp->nlp_class_sup = 0; if (sp->cls1.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS1; @@ -317,35 +330,37 @@ case NLP_STE_PRLI_ISSUE: case NLP_STE_UNMAPPED_NODE: case NLP_STE_MAPPED_NODE: - lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0); return 1; } - if ((phba->fc_flag & FC_PT2PT) - && !(phba->fc_flag & FC_PT2PT_PLOGI)) { + if ((vport->fc_flag & FC_PT2PT) && + !(vport->fc_flag & FC_PT2PT_PLOGI)) { /* rcv'ed PLOGI decides what our NPortId will be */ - phba->fc_myDID = icmd->un.rcvels.parmRo; + vport->fc_myDID = icmd->un.rcvels.parmRo; mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (mbox == NULL) goto out; lpfc_config_link(phba, mbox); mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mbox->vport = vport; rc = lpfc_sli_issue_mbox (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); if (rc == MBX_NOT_FINISHED) { - mempool_free( mbox, phba->mbox_mem_pool); + mempool_free(mbox, phba->mbox_mem_pool); goto out; } - lpfc_can_disctmo(phba); + lpfc_can_disctmo(vport); } mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (mbox == NULL) + if (!mbox) goto out; - if (lpfc_reg_login(phba, icmd->un.rcvels.remoteID, - (uint8_t *) sp, mbox, 0)) { - mempool_free( mbox, phba->mbox_mem_pool); + rc = lpfc_reg_login(phba, vport->vpi, icmd->un.rcvels.remoteID, + (uint8_t *) sp, mbox, 0); + if (rc) { + mempool_free(mbox, phba->mbox_mem_pool); goto out; } @@ -357,7 +372,10 @@ * mbox->context2 = lpfc_nlp_get(ndlp) deferred until mailbox * command issued in lpfc_cmpl_els_acc(). */ + mbox->vport = vport; + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); + spin_unlock_irq(shost->host_lock); /* * If there is an outstanding PLOGI issued, abort it before @@ -373,21 +391,38 @@ lpfc_els_abort(phba, ndlp); } - lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0); + if ((vport->port_type == LPFC_NPIV_PORT && + phba->cfg_vport_restrict_login)) { + + /* In order to preserve RPIs, we want to cleanup + * the default RPI the firmware created to rcv + * this ELS request. The only way to do this is + * to register, then unregister the RPI. + */ + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_RM_DFLT_RPI; + spin_unlock_irq(shost->host_lock); + stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD; + stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, + ndlp, mbox); + return 1; + } + lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0); return 1; out: stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); return 0; } static int -lpfc_rcv_padisc(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, +lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_iocbq *cmdiocb) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_dmabuf *pcmd; struct serv_parm *sp; struct lpfc_name *pnn, *ppn; @@ -412,12 +447,11 @@ } icmd = &cmdiocb->iocb; - if ((icmd->ulpStatus == 0) && - (lpfc_check_adisc(phba, ndlp, pnn, ppn))) { + if (icmd->ulpStatus == 0 && lpfc_check_adisc(vport, ndlp, pnn, ppn)) { if (cmd == ELS_CMD_ADISC) { - lpfc_els_rsp_adisc_acc(phba, cmdiocb, ndlp); + lpfc_els_rsp_adisc_acc(vport, cmdiocb, ndlp); } else { - lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, + lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0); } return 1; @@ -427,55 +461,57 @@ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS; stat.un.b.vendorUnique = 0; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); /* 1 sec timeout */ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); return 0; } static int -lpfc_rcv_logo(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, - struct lpfc_iocbq *cmdiocb, - uint32_t els_cmd) +lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct lpfc_iocbq *cmdiocb, uint32_t els_cmd) { - /* Put ndlp on NPR list with 1 sec timeout for plogi, ACC logo */ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + /* Put ndlp in NPR state with 1 sec timeout for plogi, ACC logo */ /* Only call LOGO ACC for first LOGO, this avoids sending unnecessary * PLOGIs during LOGO storms from a device. */ + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_LOGO_ACC; + spin_unlock_irq(shost->host_lock); if (els_cmd == ELS_CMD_PRLO) - lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); else - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); if (!(ndlp->nlp_type & NLP_FABRIC) || (ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) { /* Only try to re-login if this is NOT a Fabric Node */ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); } else { ndlp->nlp_prev_state = ndlp->nlp_state; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); } - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); /* The driver has to wait until the ACC completes before it continues * processing the LOGO. The action will resume in * lpfc_cmpl_els_logo_acc routine. Since part of processing includes an @@ -485,8 +521,7 @@ } static void -lpfc_rcv_prli(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, +lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_iocbq *cmdiocb) { struct lpfc_dmabuf *pcmd; @@ -501,8 +536,7 @@ ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR); ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; - if ((npr->acceptRspCode == PRLI_REQ_EXECUTED) && - (npr->prliType == PRLI_FCP_TYPE)) { + if (npr->prliType == PRLI_FCP_TYPE) { if (npr->initiatorFunc) ndlp->nlp_type |= NLP_FCP_INITIATOR; if (npr->targetFunc) @@ -517,36 +551,42 @@ roles |= FC_RPORT_ROLE_FCP_INITIATOR; if (ndlp->nlp_type & NLP_FCP_TARGET) roles |= FC_RPORT_ROLE_FCP_TARGET; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, + "rport rolechg: role:x%x did:x%x flg:x%x", + roles, ndlp->nlp_DID, ndlp->nlp_flag); + fc_remote_port_rolechg(rport, roles); } } static uint32_t -lpfc_disc_set_adisc(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp) +lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + /* Check config parameter use-adisc or FCP-2 */ - if ((phba->cfg_use_adisc == 0) && - !(phba->fc_flag & FC_RSCN_MODE)) { - if (!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE)) - return 0; - } - spin_lock_irq(phba->host->host_lock); + if ((phba->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || + ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); return 1; + } + ndlp->nlp_flag &= ~NLP_NPR_ADISC; + lpfc_unreg_rpi(vport, ndlp); + return 0; } static uint32_t -lpfc_disc_illegal(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_DISCOVERY, - "%d:0253 Illegal State Transition: node x%x event x%x, " - "state x%x Data: x%x x%x\n", - phba->brd_no, + lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0253 Illegal State Transition: node x%x " + "event x%x, state x%x Data: x%x x%x\n", + vport->phba->brd_no, vport->vpi, ndlp->nlp_DID, evt, ndlp->nlp_state, ndlp->nlp_rpi, ndlp->nlp_flag); return ndlp->nlp_state; @@ -555,150 +595,161 @@ /* Start of Discovery State Machine routines */ static uint32_t -lpfc_rcv_plogi_unused_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_plogi_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb; cmdiocb = (struct lpfc_iocbq *) arg; - if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) { + if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) { ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); return ndlp->nlp_state; } - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } static uint32_t -lpfc_rcv_els_unused_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_els_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - lpfc_issue_els_logo(phba, ndlp, 0); - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); + lpfc_issue_els_logo(vport, ndlp, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_unused_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_LOGO_ACC; - spin_unlock_irq(phba->host->host_lock); - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); + spin_unlock_irq(shost->host_lock); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_logo_unused_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_cmpl_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } static uint32_t -lpfc_device_rm_unused_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_device_rm_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } static uint32_t -lpfc_rcv_plogi_plogi_issue(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, +lpfc_rcv_plogi_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, void *arg, uint32_t evt) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *cmdiocb = arg; - struct lpfc_dmabuf *pcmd; - struct serv_parm *sp; - uint32_t *lp; + struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; + uint32_t *lp = (uint32_t *) pcmd->virt; + struct serv_parm *sp = (struct serv_parm *) (lp + 1); struct ls_rjt stat; int port_cmp; - pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; - lp = (uint32_t *) pcmd->virt; - sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); - memset(&stat, 0, sizeof (struct ls_rjt)); /* For a PLOGI, we only accept if our portname is less * than the remote portname. */ phba->fc_stat.elsLogiCol++; - port_cmp = memcmp(&phba->fc_portname, &sp->portName, - sizeof (struct lpfc_name)); + port_cmp = memcmp(&vport->fc_portname, &sp->portName, + sizeof(struct lpfc_name)); if (port_cmp >= 0) { /* Reject this request because the remote node will accept ours */ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, + NULL); } else { - lpfc_rcv_plogi(phba, ndlp, cmdiocb); - } /* if our portname was less */ + lpfc_rcv_plogi(vport, ndlp, cmdiocb); + } /* If our portname was less */ return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_plogi_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prli_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + struct ls_rjt stat; - cmdiocb = (struct lpfc_iocbq *) arg; + memset(&stat, 0, sizeof (struct ls_rjt)); + stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY; + stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); + return ndlp->nlp_state; +} + +static uint32_t +lpfc_rcv_logo_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) +{ + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; /* software abort outstanding PLOGI */ - lpfc_els_abort(phba, ndlp); + lpfc_els_abort(vport->phba, ndlp); - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_els_plogi_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_els_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; /* software abort outstanding PLOGI */ lpfc_els_abort(phba, ndlp); if (evt == NLP_EVT_RCV_LOGO) { - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); } else { - lpfc_issue_els_logo(phba, ndlp, 0); + lpfc_issue_els_logo(vport, ndlp, 0); } - /* Put ndlp in npr list set plogi timer for 1 sec */ + /* Put ndlp in npr state set plogi timer for 1 sec */ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_plogi_plogi_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *cmdiocb, *rspiocb; struct lpfc_dmabuf *pcmd, *prsp, *mp; uint32_t *lp; @@ -721,31 +772,26 @@ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; - prsp = list_get_first(&pcmd->list, - struct lpfc_dmabuf, - list); - lp = (uint32_t *) prsp->virt; + prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list); + lp = (uint32_t *) prsp->virt; sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); - if (!lpfc_check_sparm(phba, ndlp, sp, CLASS3)) + if (!lpfc_check_sparm(vport, ndlp, sp, CLASS3)) goto out; /* PLOGI chkparm OK */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_ELS, - "%d:0121 PLOGI chkparm OK " + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (%d):0121 PLOGI chkparm OK " "Data: x%x x%x x%x x%x\n", - phba->brd_no, + phba->brd_no, vport->vpi, ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag, ndlp->nlp_rpi); - if ((phba->cfg_fcp_class == 2) && - (sp->cls2.classValid)) { + if (phba->cfg_fcp_class == 2 && (sp->cls2.classValid)) ndlp->nlp_fcp_info |= CLASS2; - } else { + else ndlp->nlp_fcp_info |= CLASS3; - } + ndlp->nlp_class_sup = 0; if (sp->cls1.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS1; @@ -756,16 +802,23 @@ if (sp->cls4.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS4; ndlp->nlp_maxframe = - ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | - sp->cmn.bbRcvSizeLsb; + ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb; - if (!(mbox = mempool_alloc(phba->mbox_mem_pool, - GFP_KERNEL))) + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) { + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0133 PLOGI: no memory for reg_login " + "Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, + ndlp->nlp_DID, ndlp->nlp_state, + ndlp->nlp_flag, ndlp->nlp_rpi); goto out; + } + + lpfc_unreg_rpi(vport, ndlp); - lpfc_unreg_rpi(phba, ndlp); - if (lpfc_reg_login(phba, irsp->un.elsreq64.remoteID, (uint8_t *) sp, - mbox, 0) == 0) { + if (lpfc_reg_login(phba, vport->vpi, irsp->un.elsreq64.remoteID, + (uint8_t *) sp, mbox, 0) == 0) { switch (ndlp->nlp_DID) { case NameServer_DID: mbox->mbox_cmpl = lpfc_mbx_cmpl_ns_reg_login; @@ -777,68 +830,104 @@ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; } mbox->context2 = lpfc_nlp_get(ndlp); + mbox->vport = vport; if (lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) { - lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE); + lpfc_nlp_set_state(vport, ndlp, + NLP_STE_REG_LOGIN_ISSUE); return ndlp->nlp_state; } lpfc_nlp_put(ndlp); - mp = (struct lpfc_dmabuf *)mbox->context1; + mp = (struct lpfc_dmabuf *) mbox->context1; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); mempool_free(mbox, phba->mbox_mem_pool); + + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0134 PLOGI: cannot issue reg_login " + "Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, + ndlp->nlp_DID, ndlp->nlp_state, + ndlp->nlp_flag, ndlp->nlp_rpi); } else { mempool_free(mbox, phba->mbox_mem_pool); + + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0135 PLOGI: cannot format reg_login " + "Data: x%x x%x x%x x%x\n", + phba->brd_no, vport->vpi, + ndlp->nlp_DID, ndlp->nlp_state, + ndlp->nlp_flag, ndlp->nlp_rpi); } - out: +out: + if (ndlp->nlp_DID == NameServer_DID) { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0261 Cannot Register NameServer login\n", + phba->brd_no, vport->vpi); + } + /* Free this node since the driver cannot login or has the wrong sparm */ - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } static uint32_t -lpfc_device_rm_plogi_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_device_rm_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NODEV_REMOVE; + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; - } - else { + } else { /* software abort outstanding PLOGI */ - lpfc_els_abort(phba, ndlp); + lpfc_els_abort(vport->phba, ndlp); - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } } static uint32_t -lpfc_device_recov_plogi_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_device_recov_plogi_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + + /* Don't do anything that will mess up processing of the + * previous RSCN. + */ + if (vport->fc_flag & FC_RSCN_DEFERRED) + return ndlp->nlp_state; + /* software abort outstanding PLOGI */ lpfc_els_abort(phba, ndlp); ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_plogi_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_plogi_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *cmdiocb; /* software abort outstanding ADISC */ @@ -846,34 +935,31 @@ cmdiocb = (struct lpfc_iocbq *) arg; - if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) { + if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) return ndlp->nlp_state; - } + ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prli_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_prli_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); + lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_logo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *cmdiocb; cmdiocb = (struct lpfc_iocbq *) arg; @@ -881,42 +967,43 @@ /* software abort outstanding ADISC */ lpfc_els_abort(phba, ndlp); - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_padisc_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_padisc_adisc_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb; cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_padisc(phba, ndlp, cmdiocb); + lpfc_rcv_padisc(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prlo_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_prlo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb; cmdiocb = (struct lpfc_iocbq *) arg; /* Treat like rcv logo */ - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO); return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_adisc_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; ADISC *ap; @@ -928,101 +1015,112 @@ irsp = &rspiocb->iocb; if ((irsp->ulpStatus) || - (!lpfc_check_adisc(phba, ndlp, &ap->nodeName, &ap->portName))) { + (!lpfc_check_adisc(vport, ndlp, &ap->nodeName, &ap->portName))) { /* 1 sec timeout */ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; - memset(&ndlp->nlp_nodename, 0, sizeof (struct lpfc_name)); - memset(&ndlp->nlp_portname, 0, sizeof (struct lpfc_name)); + memset(&ndlp->nlp_nodename, 0, sizeof(struct lpfc_name)); + memset(&ndlp->nlp_portname, 0, sizeof(struct lpfc_name)); ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - lpfc_unreg_rpi(phba, ndlp); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + lpfc_unreg_rpi(vport, ndlp); return ndlp->nlp_state; } if (ndlp->nlp_type & NLP_FCP_TARGET) { ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE); } else { ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); } return ndlp->nlp_state; } static uint32_t -lpfc_device_rm_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_device_rm_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NODEV_REMOVE; + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; - } - else { + } else { /* software abort outstanding ADISC */ - lpfc_els_abort(phba, ndlp); + lpfc_els_abort(vport->phba, ndlp); - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } } static uint32_t -lpfc_device_recov_adisc_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_device_recov_adisc_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + + /* Don't do anything that will mess up processing of the + * previous RSCN. + */ + if (vport->fc_flag & FC_RSCN_DEFERRED) + return ndlp->nlp_state; + /* software abort outstanding ADISC */ lpfc_els_abort(phba, ndlp); ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - ndlp->nlp_flag |= NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); - + spin_unlock_irq(shost->host_lock); + lpfc_disc_set_adisc(vport, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_plogi_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_rcv_plogi_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_plogi(phba, ndlp, cmdiocb); + lpfc_rcv_plogi(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prli_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); + lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; LPFC_MBOXQ_t *mb; LPFC_MBOXQ_t *nextmb; struct lpfc_dmabuf *mp; @@ -1033,12 +1131,13 @@ if ((mb = phba->sli.mbox_active)) { if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) && (ndlp == (struct lpfc_nodelist *) mb->context2)) { + lpfc_nlp_put(ndlp); mb->context2 = NULL; mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; } } - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) && (ndlp == (struct lpfc_nodelist *) mb->context2)) { @@ -1047,61 +1146,61 @@ lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } + lpfc_nlp_put(ndlp); list_del(&mb->list); mempool_free(mb, phba->mbox_mem_pool); } } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_padisc_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_rcv_padisc_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_padisc(phba, ndlp, cmdiocb); + lpfc_rcv_padisc(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prlo_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_rcv_prlo_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb; cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, - void *arg, uint32_t evt) +lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, + uint32_t evt) { - LPFC_MBOXQ_t *pmb; - MAILBOX_t *mb; - uint32_t did; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg; + MAILBOX_t *mb = &pmb->mb; + uint32_t did = mb->un.varWords[1]; - pmb = (LPFC_MBOXQ_t *) arg; - mb = &pmb->mb; - did = mb->un.varWords[1]; if (mb->mbxStatus) { /* RegLogin failed */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_DISCOVERY, - "%d:0246 RegLogin failed Data: x%x x%x x%x\n", - phba->brd_no, - did, mb->mbxStatus, phba->hba_state); + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "%d (%d):0246 RegLogin failed Data: x%x x%x " + "x%x\n", + phba->brd_no, vport->vpi, + did, mb->mbxStatus, vport->port_state); /* * If RegLogin failed due to lack of HBA resources do not @@ -1109,20 +1208,20 @@ */ if (mb->mbxStatus == MBXERR_RPI_FULL) { ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); return ndlp->nlp_state; } - /* Put ndlp in npr list set plogi timer for 1 sec */ + /* Put ndlp in npr state set plogi timer for 1 sec */ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; - lpfc_issue_els_logo(phba, ndlp, 0); + lpfc_issue_els_logo(vport, ndlp, 0); ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); return ndlp->nlp_state; } @@ -1131,91 +1230,99 @@ /* Only if we are not a fabric nport do we issue PRLI */ if (!(ndlp->nlp_type & NLP_FABRIC)) { ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE); - lpfc_issue_els_prli(phba, ndlp, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); + lpfc_issue_els_prli(vport, ndlp, 0); } else { ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); } return ndlp->nlp_state; } static uint32_t -lpfc_device_rm_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_device_rm_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NODEV_REMOVE; + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; - } - else { - lpfc_drop_node(phba, ndlp); + } else { + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } } static uint32_t -lpfc_device_recov_reglogin_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + /* Don't do anything that will mess up processing of the + * previous RSCN. + */ + if (vport->fc_flag & FC_RSCN_DEFERRED) + return ndlp->nlp_state; + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); + lpfc_disc_set_adisc(vport, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_plogi_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_plogi_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb; cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_plogi(phba, ndlp, cmdiocb); + lpfc_rcv_plogi(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prli_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); + lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_logo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; /* Software abort outstanding PRLI before sending acc */ - lpfc_els_abort(phba, ndlp); + lpfc_els_abort(vport->phba, ndlp); - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_padisc_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_padisc_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_rcv_padisc(phba, ndlp, cmdiocb); + lpfc_rcv_padisc(vport, ndlp, cmdiocb); return ndlp->nlp_state; } @@ -1225,21 +1332,22 @@ * NEXT STATE = PRLI_ISSUE */ static uint32_t -lpfc_rcv_prlo_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prlo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_prli_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct lpfc_iocbq *cmdiocb, *rspiocb; + struct lpfc_hba *phba = vport->phba; IOCB_t *irsp; PRLI *npr; @@ -1249,8 +1357,12 @@ irsp = &rspiocb->iocb; if (irsp->ulpStatus) { + if ((vport->port_type == LPFC_NPIV_PORT) && + phba->cfg_vport_restrict_login) { + goto out; + } ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); return ndlp->nlp_state; } @@ -1266,9 +1378,25 @@ if (npr->Retry) ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE; } + if (!(ndlp->nlp_type & NLP_FCP_TARGET) && + (vport->port_type == LPFC_NPIV_PORT) && + phba->cfg_vport_restrict_login) { +out: + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_TARGET_REMOVE; + spin_unlock_irq(shost->host_lock); + lpfc_issue_els_logo(vport, ndlp, 0); + + ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + return ndlp->nlp_state; + } ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE); + if (ndlp->nlp_type & NLP_FCP_TARGET) + lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE); + else + lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); return ndlp->nlp_state; } @@ -1289,19 +1417,23 @@ * on plogi list so it can be freed when LOGO completes. * */ + static uint32_t -lpfc_device_rm_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_device_rm_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NODEV_REMOVE; + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; - } - else { + } else { /* software abort outstanding PLOGI */ - lpfc_els_abort(phba, ndlp); + lpfc_els_abort(vport->phba, ndlp); - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } } @@ -1324,261 +1456,251 @@ * outstanding PRLI command, then free the node entry. */ static uint32_t -lpfc_device_recov_prli_issue(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_device_recov_prli_issue(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, + uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_hba *phba = vport->phba; + + /* Don't do anything that will mess up processing of the + * previous RSCN. + */ + if (vport->fc_flag & FC_RSCN_DEFERRED) + return ndlp->nlp_state; + /* software abort outstanding PRLI */ lpfc_els_abort(phba, ndlp); ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); + lpfc_disc_set_adisc(vport, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_plogi_unmap_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_plogi_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_plogi(phba, ndlp, cmdiocb); + lpfc_rcv_plogi(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prli_unmap_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prli_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_rcv_prli(phba, ndlp, cmdiocb); - lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); + lpfc_rcv_prli(vport, ndlp, cmdiocb); + lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_unmap_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_logo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_padisc_unmap_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_padisc_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_padisc(phba, ndlp, cmdiocb); + lpfc_rcv_padisc(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prlo_unmap_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prlo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); return ndlp->nlp_state; } static uint32_t -lpfc_device_recov_unmap_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_device_recov_unmap_node(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, + uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + ndlp->nlp_prev_state = NLP_STE_UNMAPPED_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - lpfc_disc_set_adisc(phba, ndlp); + spin_unlock_irq(shost->host_lock); + lpfc_disc_set_adisc(vport, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_plogi_mapped_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_plogi_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_rcv_plogi(phba, ndlp, cmdiocb); + lpfc_rcv_plogi(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prli_mapped_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prli_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); + lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_mapped_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_logo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_padisc_mapped_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_padisc_mapped_node(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - lpfc_rcv_padisc(phba, ndlp, cmdiocb); + lpfc_rcv_padisc(vport, ndlp, cmdiocb); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prlo_mapped_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_rcv_prlo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; /* flush the target */ - spin_lock_irq(phba->host->host_lock); lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); - spin_unlock_irq(phba->host->host_lock); /* Treat like rcv logo */ - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO); return ndlp->nlp_state; } static uint32_t -lpfc_device_recov_mapped_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, +lpfc_device_recov_mapped_node(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + ndlp->nlp_prev_state = NLP_STE_MAPPED_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); - spin_lock_irq(phba->host->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - spin_unlock_irq(phba->host->host_lock); - lpfc_disc_set_adisc(phba, ndlp); + spin_unlock_irq(shost->host_lock); + lpfc_disc_set_adisc(vport, ndlp); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_plogi_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; /* Ignore PLOGI if we have an outstanding LOGO */ - if (ndlp->nlp_flag & NLP_LOGO_SND) { + if (ndlp->nlp_flag & (NLP_LOGO_SND | NLP_LOGO_ACC)) { return ndlp->nlp_state; } - if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) { - spin_lock_irq(phba->host->host_lock); + if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; } /* send PLOGI immediately, move to PLOGI issue state */ if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); } return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prli_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; struct ls_rjt stat; - cmdiocb = (struct lpfc_iocbq *) arg; - memset(&stat, 0, sizeof (struct ls_rjt)); stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; - lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); + lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { if (ndlp->nlp_flag & NLP_NPR_ADISC) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); - lpfc_issue_els_adisc(phba, ndlp, 0); + spin_unlock_irq(shost->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); + lpfc_issue_els_adisc(vport, ndlp, 0); } else { ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); } } return ndlp->nlp_state; } static uint32_t -lpfc_rcv_logo_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); + lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); return ndlp->nlp_state; } static uint32_t -lpfc_rcv_padisc_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_padisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; - - cmdiocb = (struct lpfc_iocbq *) arg; + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - lpfc_rcv_padisc(phba, ndlp, cmdiocb); + lpfc_rcv_padisc(vport, ndlp, cmdiocb); /* * Do not start discovery if discovery is about to start @@ -1586,53 +1708,52 @@ * here will affect the counting of discovery threads. */ if (!(ndlp->nlp_flag & NLP_DELAY_TMO) && - !(ndlp->nlp_flag & NLP_NPR_2B_DISC)){ + !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) { if (ndlp->nlp_flag & NLP_NPR_ADISC) { + ndlp->nlp_flag &= ~NLP_NPR_ADISC; ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); - lpfc_issue_els_adisc(phba, ndlp, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); + lpfc_issue_els_adisc(vport, ndlp, 0); } else { ndlp->nlp_prev_state = NLP_STE_NPR_NODE; - lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); - lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); } } return ndlp->nlp_state; } static uint32_t -lpfc_rcv_prlo_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_rcv_prlo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - struct lpfc_iocbq *cmdiocb; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; - cmdiocb = (struct lpfc_iocbq *) arg; - - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_LOGO_ACC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); - lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); - if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { + if ((ndlp->nlp_flag & NLP_DELAY_TMO) == 0) { mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DELAY_TMO; ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; } else { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); } return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_plogi_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; @@ -1642,15 +1763,15 @@ irsp = &rspiocb->iocb; if (irsp->ulpStatus) { - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_prli_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_cmpl_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; @@ -1660,25 +1781,24 @@ irsp = &rspiocb->iocb; if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) { - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_logo_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_cmpl_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - lpfc_unreg_rpi(phba, ndlp); + lpfc_unreg_rpi(vport, ndlp); /* This routine does nothing, just return the current state */ return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_adisc_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_cmpl_adisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; @@ -1688,28 +1808,25 @@ irsp = &rspiocb->iocb; if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) { - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; } static uint32_t -lpfc_cmpl_reglogin_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_cmpl_reglogin_npr_node(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - LPFC_MBOXQ_t *pmb; - MAILBOX_t *mb; - - pmb = (LPFC_MBOXQ_t *) arg; - mb = &pmb->mb; + LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg; + MAILBOX_t *mb = &pmb->mb; if (!mb->mbxStatus) ndlp->nlp_rpi = mb->un.varWords[0]; else { if (ndlp->nlp_flag & NLP_NODEV_REMOVE) { - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } } @@ -1717,28 +1834,38 @@ } static uint32_t -lpfc_device_rm_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_device_rm_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NODEV_REMOVE; + spin_unlock_irq(shost->host_lock); return ndlp->nlp_state; } - lpfc_drop_node(phba, ndlp); + lpfc_drop_node(vport, ndlp); return NLP_STE_FREED_NODE; } static uint32_t -lpfc_device_recov_npr_node(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, - uint32_t evt) +lpfc_device_recov_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { - spin_lock_irq(phba->host->host_lock); + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + /* Don't do anything that will mess up processing of the + * previous RSCN. + */ + if (vport->fc_flag & FC_RSCN_DEFERRED) + return ndlp->nlp_state; + + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(shost->host_lock); if (ndlp->nlp_flag & NLP_DELAY_TMO) { - lpfc_cancel_retry_delay_tmo(phba, ndlp); + lpfc_cancel_retry_delay_tmo(vport, ndlp); } return ndlp->nlp_state; } @@ -1801,7 +1928,7 @@ */ static uint32_t (*lpfc_disc_action[NLP_STE_MAX_STATE * NLP_EVT_MAX_EVENT]) - (struct lpfc_hba *, struct lpfc_nodelist *, void *, uint32_t) = { + (struct lpfc_vport *, struct lpfc_nodelist *, void *, uint32_t) = { /* Action routine Event Current State */ lpfc_rcv_plogi_unused_node, /* RCV_PLOGI UNUSED_NODE */ lpfc_rcv_els_unused_node, /* RCV_PRLI */ @@ -1818,7 +1945,7 @@ lpfc_disc_illegal, /* DEVICE_RECOVERY */ lpfc_rcv_plogi_plogi_issue, /* RCV_PLOGI PLOGI_ISSUE */ - lpfc_rcv_els_plogi_issue, /* RCV_PRLI */ + lpfc_rcv_prli_plogi_issue, /* RCV_PRLI */ lpfc_rcv_logo_plogi_issue, /* RCV_LOGO */ lpfc_rcv_els_plogi_issue, /* RCV_ADISC */ lpfc_rcv_els_plogi_issue, /* RCV_PDISC */ @@ -1917,34 +2044,40 @@ }; int -lpfc_disc_state_machine(struct lpfc_hba * phba, - struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) +lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) { + struct lpfc_hba *phba = vport->phba; uint32_t cur_state, rc; - uint32_t(*func) (struct lpfc_hba *, struct lpfc_nodelist *, void *, + uint32_t(*func) (struct lpfc_vport *, struct lpfc_nodelist *, void *, uint32_t); lpfc_nlp_get(ndlp); cur_state = ndlp->nlp_state; /* DSM in event on NPort in state */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0211 DSM in event x%x on NPort x%x in state %d " - "Data: x%x\n", - phba->brd_no, + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0211 DSM in event x%x on NPort x%x in " + "state %d Data: x%x\n", + phba->brd_no, vport->vpi, evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag); + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, + "DSM in: evt:%d ste:%d did:x%x", + evt, cur_state, ndlp->nlp_DID); + func = lpfc_disc_action[(cur_state * NLP_EVT_MAX_EVENT) + evt]; - rc = (func) (phba, ndlp, arg, evt); + rc = (func) (vport, ndlp, arg, evt); /* DSM out state on NPort */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_DISCOVERY, - "%d:0212 DSM out state %d on NPort x%x Data: x%x\n", - phba->brd_no, + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, + "%d (%d):0212 DSM out state %d on NPort x%x " + "Data: x%x\n", + phba->brd_no, vport->vpi, + rc, ndlp->nlp_DID, ndlp->nlp_flag); + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, + "DSM out: ste:%d did:x%x flg:x%x", rc, ndlp->nlp_DID, ndlp->nlp_flag); lpfc_nlp_put(ndlp); diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -37,10 +37,158 @@ #include "lpfc.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" +#include "lpfc_vport.h" #define LPFC_RESET_WAIT 2 #define LPFC_ABORT_WAIT 2 +/* + * This function is called with no lock held when there is a resource + * error in driver or in firmware. + */ +void +lpfc_adjust_queue_depth(struct lpfc_hba *phba) +{ + unsigned long flags; + + spin_lock_irqsave(&phba->hbalock, flags); + atomic_inc(&phba->num_rsrc_err); + phba->last_rsrc_error_time = jiffies; + + if ((phba->last_ramp_down_time + QUEUE_RAMP_DOWN_INTERVAL) > jiffies) { + spin_unlock_irqrestore(&phba->hbalock, flags); + return; + } + + phba->last_ramp_down_time = jiffies; + + spin_unlock_irqrestore(&phba->hbalock, flags); + + spin_lock_irqsave(&phba->pport->work_port_lock, flags); + if ((phba->pport->work_port_events & + WORKER_RAMP_DOWN_QUEUE) == 0) { + phba->pport->work_port_events |= WORKER_RAMP_DOWN_QUEUE; + } + spin_unlock_irqrestore(&phba->pport->work_port_lock, flags); + + spin_lock_irqsave(&phba->hbalock, flags); + if (phba->work_wait) + wake_up(phba->work_wait); + spin_unlock_irqrestore(&phba->hbalock, flags); + + return; +} + +/* + * This function is called with no lock held when there is a successful + * SCSI command completion. + */ +static inline void +lpfc_rampup_queue_depth(struct lpfc_hba *phba, + struct scsi_device *sdev) +{ + unsigned long flags; + atomic_inc(&phba->num_cmd_success); + + if (phba->cfg_lun_queue_depth <= sdev->queue_depth) + return; + + spin_lock_irqsave(&phba->hbalock, flags); + if (((phba->last_ramp_up_time + QUEUE_RAMP_UP_INTERVAL) > jiffies) || + ((phba->last_rsrc_error_time + QUEUE_RAMP_UP_INTERVAL ) > jiffies)) { + spin_unlock_irqrestore(&phba->hbalock, flags); + return; + } + + phba->last_ramp_up_time = jiffies; + spin_unlock_irqrestore(&phba->hbalock, flags); + + spin_lock_irqsave(&phba->pport->work_port_lock, flags); + if ((phba->pport->work_port_events & + WORKER_RAMP_UP_QUEUE) == 0) { + phba->pport->work_port_events |= WORKER_RAMP_UP_QUEUE; + } + spin_unlock_irqrestore(&phba->pport->work_port_lock, flags); + + spin_lock_irqsave(&phba->hbalock, flags); + if (phba->work_wait) + wake_up(phba->work_wait); + spin_unlock_irqrestore(&phba->hbalock, flags); +} + +void +lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) +{ + struct lpfc_vport *vport; + struct Scsi_Host *host; + struct scsi_device *sdev; + unsigned long new_queue_depth; + unsigned long num_rsrc_err, num_cmd_success; + + num_rsrc_err = atomic_read(&phba->num_rsrc_err); + num_cmd_success = atomic_read(&phba->num_cmd_success); + + spin_lock_irq(&phba->hbalock); + list_for_each_entry(vport, &phba->port_list, listentry) { + host = lpfc_shost_from_vport(vport); + if (!scsi_host_get(host)) + continue; + + spin_unlock_irq(&phba->hbalock); + + shost_for_each_device(sdev, host) { + new_queue_depth = sdev->queue_depth * num_rsrc_err / + (num_rsrc_err + num_cmd_success); + if (!new_queue_depth) + new_queue_depth = sdev->queue_depth - 1; + else + new_queue_depth = + sdev->queue_depth - new_queue_depth; + + if (sdev->ordered_tags) + scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, + new_queue_depth); + else + scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, + new_queue_depth); + } + spin_lock_irq(&phba->hbalock); + scsi_host_put(host); + } + spin_unlock_irq(&phba->hbalock); + atomic_set(&phba->num_rsrc_err, 0); + atomic_set(&phba->num_cmd_success, 0); +} + +void +lpfc_ramp_up_queue_handler(struct lpfc_hba *phba) +{ + struct lpfc_vport *vport; + struct Scsi_Host *host; + struct scsi_device *sdev; + + spin_lock_irq(&phba->hbalock); + list_for_each_entry(vport, &phba->port_list, listentry) { + host = lpfc_shost_from_vport(vport); + if (!scsi_host_get(host)) + continue; + + spin_unlock_irq(&phba->hbalock); + shost_for_each_device(sdev, host) { + if (sdev->ordered_tags) + scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, + sdev->queue_depth+1); + else + scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, + sdev->queue_depth+1); + } + spin_lock_irq(&phba->hbalock); + scsi_host_put(host); + } + spin_unlock_irq(&phba->hbalock); + atomic_set(&phba->num_rsrc_err, 0); + atomic_set(&phba->num_cmd_success, 0); +} /* * This routine allocates a scsi buffer, which contains all the necessary @@ -51,8 +199,9 @@ * and the BPL BDE is setup in the IOCB. */ static struct lpfc_scsi_buf * -lpfc_new_scsi_buf(struct lpfc_hba * phba) +lpfc_new_scsi_buf(struct lpfc_vport *vport) { + struct lpfc_hba *phba = vport->phba; struct lpfc_scsi_buf *psb; struct ulp_bde64 *bpl; IOCB_t *iocb; @@ -63,7 +212,6 @@ if (!psb) return NULL; memset(psb, 0, sizeof (struct lpfc_scsi_buf)); - psb->scsi_hba = phba; /* * Get memory from the pci pool to map the virt space to pci bus space @@ -155,7 +303,7 @@ } static void -lpfc_release_scsi_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb) +lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb) { unsigned long iflag = 0; @@ -166,7 +314,7 @@ } static int -lpfc_scsi_prep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd) +lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd) { struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd; struct scatterlist *sgel = NULL; @@ -175,8 +323,7 @@ IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb; dma_addr_t physaddr; uint32_t i, num_bde = 0; - int datadir = scsi_cmnd->sc_data_direction; - int dma_error; + int nseg, datadir = scsi_cmnd->sc_data_direction; /* * There are three possibilities here - use scatter-gather segment, use @@ -185,26 +332,22 @@ * data bde entry. */ bpl += 2; - if (scsi_cmnd->use_sg) { + nseg = scsi_dma_map(scsi_cmnd); + if (nseg > 0) { /* * The driver stores the segment count returned from pci_map_sg * because this a count of dma-mappings used to map the use_sg * pages. They are not guaranteed to be the same for those * architectures that implement an IOMMU. */ - sgel = (struct scatterlist *)scsi_cmnd->request_buffer; - lpfc_cmd->seg_cnt = dma_map_sg(&phba->pcidev->dev, sgel, - scsi_cmnd->use_sg, datadir); - if (lpfc_cmd->seg_cnt == 0) - return 1; + lpfc_cmd->seg_cnt = nseg; if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) { printk(KERN_ERR "%s: Too many sg segments from " "dma_map_sg. Config %d, seg_cnt %d", __FUNCTION__, phba->cfg_sg_seg_cnt, lpfc_cmd->seg_cnt); - dma_unmap_sg(&phba->pcidev->dev, sgel, - lpfc_cmd->seg_cnt, datadir); + scsi_dma_unmap(scsi_cmnd); return 1; } @@ -214,7 +357,7 @@ * single scsi command. Just run through the seg_cnt and format * the bde's. */ - for (i = 0; i < lpfc_cmd->seg_cnt; i++) { + scsi_for_each_sg(scsi_cmnd, sgel, nseg, i) { physaddr = sg_dma_address(sgel); bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr)); bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr)); @@ -225,35 +368,10 @@ bpl->tus.f.bdeFlags = BUFF_USE_RCV; bpl->tus.w = le32_to_cpu(bpl->tus.w); bpl++; - sgel++; num_bde++; } - } else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) { - physaddr = dma_map_single(&phba->pcidev->dev, - scsi_cmnd->request_buffer, - scsi_cmnd->request_bufflen, - datadir); - dma_error = dma_mapping_error(physaddr); - if (dma_error) { - lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0718 Unable to dma_map_single " - "request_buffer: x%x\n", - phba->brd_no, dma_error); + } else if (nseg < 0) return 1; - } - - lpfc_cmd->nonsg_phys = physaddr; - bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr)); - bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr)); - bpl->tus.f.bdeSize = scsi_cmnd->request_bufflen; - if (datadir == DMA_TO_DEVICE) - bpl->tus.f.bdeFlags = 0; - else - bpl->tus.f.bdeFlags = BUFF_USE_RCV; - bpl->tus.w = le32_to_cpu(bpl->tus.w); - num_bde = 1; - bpl++; - } /* * Finish initializing those IOCB fields that are dependent on the @@ -266,7 +384,7 @@ (num_bde * sizeof (struct ulp_bde64)); iocb_cmd->ulpBdeCount = 1; iocb_cmd->ulpLe = 1; - fcp_cmnd->fcpDl = be32_to_cpu(scsi_cmnd->request_bufflen); + fcp_cmnd->fcpDl = be32_to_cpu(scsi_bufflen(scsi_cmnd)); return 0; } @@ -279,26 +397,20 @@ * a request buffer, but did not request use_sg. There is a third * case, but it does not require resource deallocation. */ - if ((psb->seg_cnt > 0) && (psb->pCmd->use_sg)) { - dma_unmap_sg(&phba->pcidev->dev, psb->pCmd->request_buffer, - psb->seg_cnt, psb->pCmd->sc_data_direction); - } else { - if ((psb->nonsg_phys) && (psb->pCmd->request_bufflen)) { - dma_unmap_single(&phba->pcidev->dev, psb->nonsg_phys, - psb->pCmd->request_bufflen, - psb->pCmd->sc_data_direction); - } - } + if (psb->seg_cnt > 0) + scsi_dma_unmap(psb->pCmd); } static void -lpfc_handle_fcp_err(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_iocbq *rsp_iocb) +lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, + struct lpfc_iocbq *rsp_iocb) { struct scsi_cmnd *cmnd = lpfc_cmd->pCmd; struct fcp_cmnd *fcpcmd = lpfc_cmd->fcp_cmnd; struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp; - struct lpfc_hba *phba = lpfc_cmd->scsi_hba; + struct lpfc_hba *phba = vport->phba; uint32_t fcpi_parm = rsp_iocb->iocb.un.fcpi.fcpi_parm; + uint32_t vpi = vport->vpi; uint32_t resp_info = fcprsp->rspStatus2; uint32_t scsi_status = fcprsp->rspStatus3; uint32_t *lp; @@ -331,9 +443,9 @@ logit = LOG_FCP; lpfc_printf_log(phba, KERN_WARNING, logit, - "%d:0730 FCP command x%x failed: x%x SNS x%x x%x " + "%d (%d):0730 FCP command x%x failed: x%x SNS x%x x%x " "Data: x%x x%x x%x x%x x%x\n", - phba->brd_no, cmnd->cmnd[0], scsi_status, + phba->brd_no, vpi, cmnd->cmnd[0], scsi_status, be32_to_cpu(*lp), be32_to_cpu(*(lp + 3)), resp_info, be32_to_cpu(fcprsp->rspResId), be32_to_cpu(fcprsp->rspSnsLen), @@ -349,15 +461,16 @@ } } - cmnd->resid = 0; + scsi_set_resid(cmnd, 0); if (resp_info & RESID_UNDER) { - cmnd->resid = be32_to_cpu(fcprsp->rspResId); + scsi_set_resid(cmnd, be32_to_cpu(fcprsp->rspResId)); lpfc_printf_log(phba, KERN_INFO, LOG_FCP, - "%d:0716 FCP Read Underrun, expected %d, " - "residual %d Data: x%x x%x x%x\n", phba->brd_no, - be32_to_cpu(fcpcmd->fcpDl), cmnd->resid, - fcpi_parm, cmnd->cmnd[0], cmnd->underflow); + "%d (%d):0716 FCP Read Underrun, expected %d, " + "residual %d Data: x%x x%x x%x\n", + phba->brd_no, vpi, be32_to_cpu(fcpcmd->fcpDl), + scsi_get_resid(cmnd), fcpi_parm, cmnd->cmnd[0], + cmnd->underflow); /* * If there is an under run check if under run reported by @@ -366,15 +479,16 @@ */ if ((cmnd->sc_data_direction == DMA_FROM_DEVICE) && fcpi_parm && - (cmnd->resid != fcpi_parm)) { + (scsi_get_resid(cmnd) != fcpi_parm)) { lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR, - "%d:0735 FCP Read Check Error and Underrun " - "Data: x%x x%x x%x x%x\n", phba->brd_no, + "%d (%d):0735 FCP Read Check Error " + "and Underrun Data: x%x x%x x%x x%x\n", + phba->brd_no, vpi, be32_to_cpu(fcpcmd->fcpDl), - cmnd->resid, - fcpi_parm, cmnd->cmnd[0]); - cmnd->resid = cmnd->request_bufflen; + scsi_get_resid(cmnd), fcpi_parm, + cmnd->cmnd[0]); + scsi_set_resid(cmnd, scsi_bufflen(cmnd)); host_status = DID_ERROR; } /* @@ -385,22 +499,23 @@ */ if (!(resp_info & SNS_LEN_VALID) && (scsi_status == SAM_STAT_GOOD) && - (cmnd->request_bufflen - cmnd->resid) < cmnd->underflow) { + (scsi_bufflen(cmnd) - scsi_get_resid(cmnd) + < cmnd->underflow)) { lpfc_printf_log(phba, KERN_INFO, LOG_FCP, - "%d:0717 FCP command x%x residual " + "%d (%d):0717 FCP command x%x residual " "underrun converted to error " - "Data: x%x x%x x%x\n", phba->brd_no, - cmnd->cmnd[0], cmnd->request_bufflen, - cmnd->resid, cmnd->underflow); - + "Data: x%x x%x x%x\n", + phba->brd_no, vpi, cmnd->cmnd[0], + cmnd->request_bufflen, + scsi_get_resid(cmnd), cmnd->underflow); host_status = DID_ERROR; } } else if (resp_info & RESID_OVER) { lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, - "%d:0720 FCP command x%x residual " + "%d (%d):0720 FCP command x%x residual " "overrun error. Data: x%x x%x \n", - phba->brd_no, cmnd->cmnd[0], - cmnd->request_bufflen, cmnd->resid); + phba->brd_no, vpi, cmnd->cmnd[0], + scsi_bufflen(cmnd), scsi_get_resid(cmnd)); host_status = DID_ERROR; /* @@ -410,13 +525,14 @@ } else if ((scsi_status == SAM_STAT_GOOD) && fcpi_parm && (cmnd->sc_data_direction == DMA_FROM_DEVICE)) { lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR, - "%d:0734 FCP Read Check Error Data: " - "x%x x%x x%x x%x\n", phba->brd_no, + "%d (%d):0734 FCP Read Check Error Data: " + "x%x x%x x%x x%x\n", + phba->brd_no, vpi, be32_to_cpu(fcpcmd->fcpDl), be32_to_cpu(fcprsp->rspResId), fcpi_parm, cmnd->cmnd[0]); host_status = DID_ERROR; - cmnd->resid = cmnd->request_bufflen; + scsi_set_resid(cmnd, scsi_bufflen(cmnd)); } out: @@ -429,9 +545,13 @@ { struct lpfc_scsi_buf *lpfc_cmd = (struct lpfc_scsi_buf *) pIocbIn->context1; + struct lpfc_vport *vport = pIocbIn->vport; struct lpfc_rport_data *rdata = lpfc_cmd->rdata; struct lpfc_nodelist *pnode = rdata->pnode; struct scsi_cmnd *cmd = lpfc_cmd->pCmd; + uint32_t vpi = (lpfc_cmd->cur_iocbq.vport + ? lpfc_cmd->cur_iocbq.vport->vpi + : 0); int result; struct scsi_device *sdev, *tmp_sdev; int depth = 0; @@ -447,22 +567,31 @@ lpfc_cmd->status = IOSTAT_DEFAULT; lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, - "%d:0729 FCP cmd x%x failed <%d/%d> status: " - "x%x result: x%x Data: x%x x%x\n", - phba->brd_no, cmd->cmnd[0], cmd->device->id, - cmd->device->lun, lpfc_cmd->status, - lpfc_cmd->result, pIocbOut->iocb.ulpContext, + "%d (%d):0729 FCP cmd x%x failed <%d/%d> " + "status: x%x result: x%x Data: x%x x%x\n", + phba->brd_no, vpi, cmd->cmnd[0], + cmd->device ? cmd->device->id : 0xffff, + cmd->device ? cmd->device->lun : 0xffff, + lpfc_cmd->status, lpfc_cmd->result, + pIocbOut->iocb.ulpContext, lpfc_cmd->cur_iocbq.iocb.ulpIoTag); switch (lpfc_cmd->status) { case IOSTAT_FCP_RSP_ERROR: /* Call FCP RSP handler to determine result */ - lpfc_handle_fcp_err(lpfc_cmd,pIocbOut); + lpfc_handle_fcp_err(vport, lpfc_cmd, pIocbOut); break; case IOSTAT_NPORT_BSY: case IOSTAT_FABRIC_BSY: cmd->result = ScsiResult(DID_BUS_BUSY, 0); break; + case IOSTAT_LOCAL_REJECT: + if (lpfc_cmd->result == RJT_UNAVAIL_PERM || + lpfc_cmd->result == IOERR_NO_RESOURCES || + lpfc_cmd->result == RJT_LOGIN_REQUIRED) { + cmd->result = ScsiResult(DID_REQUEUE, 0); + break; + } /* else: fall through */ default: cmd->result = ScsiResult(DID_ERROR, 0); break; @@ -479,11 +608,12 @@ uint32_t *lp = (uint32_t *)cmd->sense_buffer; lpfc_printf_log(phba, KERN_INFO, LOG_FCP, - "%d:0710 Iodone <%d/%d> cmd %p, error x%x " - "SNS x%x x%x Data: x%x x%x\n", - phba->brd_no, cmd->device->id, + "%d (%d):0710 Iodone <%d/%d> cmd %p, error " + "x%x SNS x%x x%x Data: x%x x%x\n", + phba->brd_no, vpi, cmd->device->id, cmd->device->lun, cmd, cmd->result, - *lp, *(lp + 3), cmd->retries, cmd->resid); + *lp, *(lp + 3), cmd->retries, + scsi_get_resid(cmd)); } result = cmd->result; @@ -496,6 +626,10 @@ return; } + + if (!result) + lpfc_rampup_queue_depth(phba, sdev); + if (!result && pnode != NULL && ((jiffies - pnode->last_ramp_up_time) > LPFC_Q_RAMP_UP_INTERVAL * HZ) && @@ -544,8 +678,9 @@ if (depth) { lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, - "%d:0711 detected queue full - lun queue depth " - " adjusted to %d.\n", phba->brd_no, depth); + "%d (%d):0711 detected queue full - " + "lun queue depth adjusted to %d.\n", + phba->brd_no, vpi, depth); } } @@ -553,9 +688,10 @@ } static void -lpfc_scsi_prep_cmnd(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd, +lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_nodelist *pnode) { + struct lpfc_hba *phba = vport->phba; struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd; struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd; IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb; @@ -592,22 +728,7 @@ * bumping the bpl beyond the fcp_cmnd and fcp_rsp regions to the first * data bde entry. */ - if (scsi_cmnd->use_sg) { - if (datadir == DMA_TO_DEVICE) { - iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR; - iocb_cmd->un.fcpi.fcpi_parm = 0; - iocb_cmd->ulpPU = 0; - fcp_cmnd->fcpCntl3 = WRITE_DATA; - phba->fc4OutputRequests++; - } else { - iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR; - iocb_cmd->ulpPU = PARM_READ_CHECK; - iocb_cmd->un.fcpi.fcpi_parm = - scsi_cmnd->request_bufflen; - fcp_cmnd->fcpCntl3 = READ_DATA; - phba->fc4InputRequests++; - } - } else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) { + if (scsi_sg_count(scsi_cmnd)) { if (datadir == DMA_TO_DEVICE) { iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR; iocb_cmd->un.fcpi.fcpi_parm = 0; @@ -617,8 +738,7 @@ } else { iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR; iocb_cmd->ulpPU = PARM_READ_CHECK; - iocb_cmd->un.fcpi.fcpi_parm = - scsi_cmnd->request_bufflen; + iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd); fcp_cmnd->fcpCntl3 = READ_DATA; phba->fc4InputRequests++; } @@ -642,15 +762,15 @@ piocbq->context1 = lpfc_cmd; piocbq->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl; piocbq->iocb.ulpTimeout = lpfc_cmd->timeout; + piocbq->vport = vport; } static int -lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_hba *phba, +lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, unsigned int lun, uint8_t task_mgmt_cmd) { - struct lpfc_sli *psli; struct lpfc_iocbq *piocbq; IOCB_t *piocb; struct fcp_cmnd *fcp_cmnd; @@ -661,8 +781,9 @@ return 0; } - psli = &phba->sli; piocbq = &(lpfc_cmd->cur_iocbq); + piocbq->vport = vport; + piocb = &piocbq->iocb; fcp_cmnd = lpfc_cmd->fcp_cmnd; @@ -688,7 +809,7 @@ piocb->ulpTimeout = lpfc_cmd->timeout; } - return (1); + return 1; } static void @@ -704,10 +825,11 @@ } static int -lpfc_scsi_tgt_reset(struct lpfc_scsi_buf * lpfc_cmd, struct lpfc_hba * phba, +lpfc_scsi_tgt_reset(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_vport *vport, unsigned tgt_id, unsigned int lun, struct lpfc_rport_data *rdata) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *iocbq; struct lpfc_iocbq *iocbqrsp; int ret; @@ -716,12 +838,11 @@ return FAILED; lpfc_cmd->rdata = rdata; - ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, lun, + ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun, FCP_TARGET_RESET); if (!ret) return FAILED; - lpfc_cmd->scsi_hba = phba; iocbq = &lpfc_cmd->cur_iocbq; iocbqrsp = lpfc_sli_get_iocbq(phba); @@ -730,10 +851,10 @@ /* Issue Target Reset to TGT */ lpfc_printf_log(phba, KERN_INFO, LOG_FCP, - "%d:0702 Issue Target Reset to TGT %d " + "%d (%d):0702 Issue Target Reset to TGT %d " "Data: x%x x%x\n", - phba->brd_no, tgt_id, rdata->pnode->nlp_rpi, - rdata->pnode->nlp_flag); + phba->brd_no, vport->vpi, tgt_id, + rdata->pnode->nlp_rpi, rdata->pnode->nlp_flag); ret = lpfc_sli_issue_iocb_wait(phba, &phba->sli.ring[phba->sli.fcp_ring], @@ -758,7 +879,8 @@ const char * lpfc_info(struct Scsi_Host *host) { - struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) host->hostdata; + struct lpfc_hba *phba = vport->phba; int len; static char lpfcinfobuf[384]; @@ -800,25 +922,21 @@ void lpfc_poll_timeout(unsigned long ptr) { - struct lpfc_hba *phba = (struct lpfc_hba *)ptr; - unsigned long iflag; - - spin_lock_irqsave(phba->host->host_lock, iflag); + struct lpfc_hba *phba = (struct lpfc_hba *) ptr; if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) { lpfc_sli_poll_fcp_ring (phba); if (phba->cfg_poll & DISABLE_FCP_RING_INT) lpfc_poll_rearm_timer(phba); } - - spin_unlock_irqrestore(phba->host->host_lock, iflag); } static int lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *)) { - struct lpfc_hba *phba = - (struct lpfc_hba *) cmnd->device->host->hostdata; + struct Scsi_Host *shost = cmnd->device->host; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_sli *psli = &phba->sli; struct lpfc_rport_data *rdata = cmnd->device->hostdata; struct lpfc_nodelist *ndlp = rdata->pnode; @@ -840,11 +958,14 @@ cmnd->result = ScsiResult(DID_BUS_BUSY, 0); goto out_fail_command; } - lpfc_cmd = lpfc_get_scsi_buf (phba); + lpfc_cmd = lpfc_get_scsi_buf(phba); if (lpfc_cmd == NULL) { + lpfc_adjust_queue_depth(phba); + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, - "%d:0707 driver's buffer pool is empty, " - "IO busied\n", phba->brd_no); + "%d (%d):0707 driver's buffer pool is empty, " + "IO busied\n", + phba->brd_no, vport->vpi); goto out_host_busy; } @@ -862,7 +983,7 @@ if (err) goto out_host_busy_free_buf; - lpfc_scsi_prep_cmnd(phba, lpfc_cmd, ndlp); + lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp); err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring], &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB); @@ -908,7 +1029,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) { struct Scsi_Host *shost = cmnd->device->host; - struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring]; struct lpfc_iocbq *iocb; struct lpfc_iocbq *abtsiocb; @@ -918,8 +1040,6 @@ int ret = SUCCESS; lpfc_block_error_handler(cmnd); - spin_lock_irq(shost->host_lock); - lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble; BUG_ON(!lpfc_cmd); @@ -956,12 +1076,13 @@ icmd->ulpLe = 1; icmd->ulpClass = cmd->ulpClass; - if (phba->hba_state >= LPFC_LINK_UP) + if (lpfc_is_link_up(phba)) icmd->ulpCommand = CMD_ABORT_XRI_CN; else icmd->ulpCommand = CMD_CLOSE_XRI_CN; abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl; + abtsiocb->vport = vport; if (lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0) == IOCB_ERROR) { lpfc_sli_release_iocbq(phba, abtsiocb); ret = FAILED; @@ -977,9 +1098,7 @@ if (phba->cfg_poll & DISABLE_FCP_RING_INT) lpfc_sli_poll_fcp_ring (phba); - spin_unlock_irq(phba->host->host_lock); - schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ); - spin_lock_irq(phba->host->host_lock); + schedule_timeout_uninterruptible(LPFC_ABORT_WAIT * HZ); if (++loop_count > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT) break; @@ -988,22 +1107,21 @@ if (lpfc_cmd->pCmd == cmnd) { ret = FAILED; lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0748 abort handler timed out waiting for " - "abort to complete: ret %#x, ID %d, LUN %d, " - "snum %#lx\n", - phba->brd_no, ret, cmnd->device->id, - cmnd->device->lun, cmnd->serial_number); + "%d (%d):0748 abort handler timed out waiting " + "for abort to complete: ret %#x, ID %d, " + "LUN %d, snum %#lx\n", + phba->brd_no, vport->vpi, ret, + cmnd->device->id, cmnd->device->lun, + cmnd->serial_number); } out: lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, - "%d:0749 SCSI Layer I/O Abort Request " + "%d (%d):0749 SCSI Layer I/O Abort Request " "Status x%x ID %d LUN %d snum %#lx\n", - phba->brd_no, ret, cmnd->device->id, + phba->brd_no, vport->vpi, ret, cmnd->device->id, cmnd->device->lun, cmnd->serial_number); - spin_unlock_irq(shost->host_lock); - return ret; } @@ -1011,7 +1129,8 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd) { struct Scsi_Host *shost = cmnd->device->host; - struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_scsi_buf *lpfc_cmd; struct lpfc_iocbq *iocbq, *iocbqrsp; struct lpfc_rport_data *rdata = cmnd->device->hostdata; @@ -1022,28 +1141,26 @@ int cnt, loopcnt; lpfc_block_error_handler(cmnd); - spin_lock_irq(shost->host_lock); loopcnt = 0; /* * If target is not in a MAPPED state, delay the reset until * target is rediscovered or devloss timeout expires. */ - while ( 1 ) { + while (1) { if (!pnode) goto out; if (pnode->nlp_state != NLP_STE_MAPPED_NODE) { - spin_unlock_irq(phba->host->host_lock); schedule_timeout_uninterruptible(msecs_to_jiffies(500)); - spin_lock_irq(phba->host->host_lock); loopcnt++; rdata = cmnd->device->hostdata; if (!rdata || (loopcnt > ((phba->cfg_devloss_tmo * 2) + 1))) { lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0721 LUN Reset rport failure:" - " cnt x%x rdata x%p\n", - phba->brd_no, loopcnt, rdata); + "%d (%d):0721 LUN Reset rport " + "failure: cnt x%x rdata x%p\n", + phba->brd_no, vport->vpi, + loopcnt, rdata); goto out; } pnode = rdata->pnode; @@ -1054,15 +1171,14 @@ break; } - lpfc_cmd = lpfc_get_scsi_buf (phba); + lpfc_cmd = lpfc_get_scsi_buf(phba); if (lpfc_cmd == NULL) goto out; lpfc_cmd->timeout = 60; - lpfc_cmd->scsi_hba = phba; lpfc_cmd->rdata = rdata; - ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, cmnd->device->lun, + ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, cmnd->device->lun, FCP_TARGET_RESET); if (!ret) goto out_free_scsi_buf; @@ -1075,8 +1191,9 @@ goto out_free_scsi_buf; lpfc_printf_log(phba, KERN_INFO, LOG_FCP, - "%d:0703 Issue target reset to TGT %d LUN %d rpi x%x " - "nlp_flag x%x\n", phba->brd_no, cmnd->device->id, + "%d (%d):0703 Issue target reset to TGT %d LUN %d " + "rpi x%x nlp_flag x%x\n", + phba->brd_no, vport->vpi, cmnd->device->id, cmnd->device->lun, pnode->nlp_rpi, pnode->nlp_flag); iocb_status = lpfc_sli_issue_iocb_wait(phba, @@ -1111,9 +1228,7 @@ 0, LPFC_CTX_LUN); loopcnt = 0; while(cnt) { - spin_unlock_irq(phba->host->host_lock); schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ); - spin_lock_irq(phba->host->host_lock); if (++loopcnt > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT) @@ -1127,8 +1242,9 @@ if (cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0719 device reset I/O flush failure: cnt x%x\n", - phba->brd_no, cnt); + "%d (%d):0719 device reset I/O flush failure: " + "cnt x%x\n", + phba->brd_no, vport->vpi, cnt); ret = FAILED; } @@ -1137,13 +1253,12 @@ lpfc_release_scsi_buf(phba, lpfc_cmd); } lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0713 SCSI layer issued device reset (%d, %d) " + "%d (%d):0713 SCSI layer issued device reset (%d, %d) " "return x%x status x%x result x%x\n", - phba->brd_no, cmnd->device->id, cmnd->device->lun, - ret, cmd_status, cmd_result); + phba->brd_no, vport->vpi, cmnd->device->id, + cmnd->device->lun, ret, cmd_status, cmd_result); out: - spin_unlock_irq(shost->host_lock); return ret; } @@ -1151,7 +1266,8 @@ lpfc_bus_reset_handler(struct scsi_cmnd *cmnd) { struct Scsi_Host *shost = cmnd->device->host; - struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp = NULL; int match; int ret = FAILED, i, err_count = 0; @@ -1159,7 +1275,6 @@ struct lpfc_scsi_buf * lpfc_cmd; lpfc_block_error_handler(cmnd); - spin_lock_irq(shost->host_lock); lpfc_cmd = lpfc_get_scsi_buf(phba); if (lpfc_cmd == NULL) @@ -1167,7 +1282,6 @@ /* The lpfc_cmd storage is reused. Set all loop invariants. */ lpfc_cmd->timeout = 60; - lpfc_cmd->scsi_hba = phba; /* * Since the driver manages a single bus device, reset all @@ -1177,7 +1291,8 @@ for (i = 0; i < LPFC_MAX_TARGET; i++) { /* Search for mapped node by target ID */ match = 0; - list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { + spin_lock_irq(shost->host_lock); + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { if (ndlp->nlp_state == NLP_STE_MAPPED_NODE && i == ndlp->nlp_sid && ndlp->rport) { @@ -1185,15 +1300,18 @@ break; } } + spin_unlock_irq(shost->host_lock); if (!match) continue; - ret = lpfc_scsi_tgt_reset(lpfc_cmd, phba, i, cmnd->device->lun, + ret = lpfc_scsi_tgt_reset(lpfc_cmd, vport, i, + cmnd->device->lun, ndlp->rport->dd_data); if (ret != SUCCESS) { lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0700 Bus Reset on target %d failed\n", - phba->brd_no, i); + "%d (%d):0700 Bus Reset on target %d " + "failed\n", + phba->brd_no, vport->vpi, i); err_count++; break; } @@ -1219,9 +1337,7 @@ 0, 0, 0, LPFC_CTX_HOST); loopcnt = 0; while(cnt) { - spin_unlock_irq(phba->host->host_lock); schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ); - spin_lock_irq(phba->host->host_lock); if (++loopcnt > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT) @@ -1234,25 +1350,24 @@ if (cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0715 Bus Reset I/O flush failure: cnt x%x left x%x\n", - phba->brd_no, cnt, i); + "%d (%d):0715 Bus Reset I/O flush failure: " + "cnt x%x left x%x\n", + phba->brd_no, vport->vpi, cnt, i); ret = FAILED; } - lpfc_printf_log(phba, - KERN_ERR, - LOG_FCP, - "%d:0714 SCSI layer issued Bus Reset Data: x%x\n", - phba->brd_no, ret); + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, + "%d (%d):0714 SCSI layer issued Bus Reset Data: x%x\n", + phba->brd_no, vport->vpi, ret); out: - spin_unlock_irq(shost->host_lock); return ret; } static int lpfc_slave_alloc(struct scsi_device *sdev) { - struct lpfc_hba *phba = (struct lpfc_hba *)sdev->host->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata; + struct lpfc_hba *phba = vport->phba; struct lpfc_scsi_buf *scsi_buf = NULL; struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); uint32_t total = 0, i; @@ -1273,27 +1388,35 @@ */ total = phba->total_scsi_bufs; num_to_alloc = phba->cfg_lun_queue_depth + 2; - if (total >= phba->cfg_hba_queue_depth) { + + /* Allow some exchanges to be available always to complete discovery */ + if (total >= phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) { lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, - "%d:0704 At limitation of %d preallocated " - "command buffers\n", phba->brd_no, total); + "%d (%d):0704 At limitation of %d " + "preallocated command buffers\n", + phba->brd_no, vport->vpi, total); return 0; - } else if (total + num_to_alloc > phba->cfg_hba_queue_depth) { + + /* Allow some exchanges to be available always to complete discovery */ + } else if (total + num_to_alloc > + phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) { lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, - "%d:0705 Allocation request of %d command " - "buffers will exceed max of %d. Reducing " - "allocation request to %d.\n", phba->brd_no, - num_to_alloc, phba->cfg_hba_queue_depth, + "%d (%d):0705 Allocation request of %d " + "command buffers will exceed max of %d. " + "Reducing allocation request to %d.\n", + phba->brd_no, vport->vpi, num_to_alloc, + phba->cfg_hba_queue_depth, (phba->cfg_hba_queue_depth - total)); num_to_alloc = phba->cfg_hba_queue_depth - total; } for (i = 0; i < num_to_alloc; i++) { - scsi_buf = lpfc_new_scsi_buf(phba); + scsi_buf = lpfc_new_scsi_buf(vport); if (!scsi_buf) { lpfc_printf_log(phba, KERN_ERR, LOG_FCP, - "%d:0706 Failed to allocate command " - "buffer\n", phba->brd_no); + "%d (%d):0706 Failed to allocate " + "command buffer\n", + phba->brd_no, vport->vpi); break; } @@ -1308,7 +1431,8 @@ static int lpfc_slave_configure(struct scsi_device *sdev) { - struct lpfc_hba *phba = (struct lpfc_hba *) sdev->host->hostdata; + struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata; + struct lpfc_hba *phba = vport->phba; struct fc_rport *rport = starget_to_rport(sdev->sdev_target); if (sdev->tagged_supported) @@ -1340,6 +1464,7 @@ return; } + struct scsi_host_template lpfc_template = { .module = THIS_MODULE, .name = LPFC_DRIVER_NAME, @@ -1352,11 +1477,10 @@ .slave_configure = lpfc_slave_configure, .slave_destroy = lpfc_slave_destroy, .scan_finished = lpfc_scan_finished, - .scan_start = lpfc_scan_start, .this_id = -1, .sg_tablesize = LPFC_SG_SEG_CNT, .cmd_per_lun = LPFC_CMD_PER_LUN, .use_clustering = ENABLE_CLUSTERING, - .shost_attrs = lpfc_host_attrs, + .shost_attrs = lpfc_hba_attrs, .max_sectors = 0xFFFF, }; diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2005 Emulex. All rights reserved. * + * Copyright (C) 2004-2006 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * @@ -110,7 +110,6 @@ struct lpfc_scsi_buf { struct list_head list; struct scsi_cmnd *pCmd; - struct lpfc_hba *scsi_hba; struct lpfc_rport_data *rdata; uint32_t timeout; diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.c 2007-12-21 15:36:12.000000000 -0500 @@ -38,23 +38,25 @@ #include "lpfc_crtn.h" #include "lpfc_logmsg.h" #include "lpfc_compat.h" +#include "lpfc_debugfs.h" /* * Define macro to log: Mailbox command x%x cannot issue Data * This allows multiple uses of lpfc_msgBlk0311 * w/o perturbing log msg utility. */ -#define LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) \ +#define LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) \ lpfc_printf_log(phba, \ KERN_INFO, \ LOG_MBOX | LOG_SLI, \ - "%d:0311 Mailbox command x%x cannot issue " \ - "Data: x%x x%x x%x\n", \ + "%d (%d):0311 Mailbox command x%x cannot " \ + "issue Data: x%x x%x x%x\n", \ phba->brd_no, \ - mb->mbxCommand, \ - phba->hba_state, \ + pmbox->vport ? pmbox->vport->vpi : 0, \ + pmbox->mb.mbxCommand, \ + phba->pport->port_state, \ psli->sli_flag, \ - flag); + flag) /* There are only four IOCB completion types. */ @@ -65,8 +67,26 @@ LPFC_ABORT_IOCB } lpfc_iocb_type; -struct lpfc_iocbq * -lpfc_sli_get_iocbq(struct lpfc_hba * phba) + /* SLI-2/SLI-3 provide different sized iocbs. Given a pointer + * to the start of the ring, and the slot number of the + * desired iocb entry, calc a pointer to that entry. + */ +static inline IOCB_t * +lpfc_cmd_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) +{ + return (IOCB_t *) (((char *) pring->cmdringaddr) + + pring->cmdidx * phba->iocb_cmd_size); +} + +static inline IOCB_t * +lpfc_resp_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) +{ + return (IOCB_t *) (((char *) pring->rspringaddr) + + pring->rspidx * phba->iocb_rsp_size); +} + +static struct lpfc_iocbq * +__lpfc_sli_get_iocbq(struct lpfc_hba *phba) { struct list_head *lpfc_iocb_list = &phba->lpfc_iocb_list; struct lpfc_iocbq * iocbq = NULL; @@ -75,10 +95,22 @@ return iocbq; } +struct lpfc_iocbq * +lpfc_sli_get_iocbq(struct lpfc_hba *phba) +{ + struct lpfc_iocbq * iocbq = NULL; + unsigned long iflags; + + spin_lock_irqsave(&phba->hbalock, iflags); + iocbq = __lpfc_sli_get_iocbq(phba); + spin_unlock_irqrestore(&phba->hbalock, iflags); + return iocbq; +} + void -lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq) +__lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) { - size_t start_clean = (size_t)(&((struct lpfc_iocbq *)NULL)->iocb); + size_t start_clean = offsetof(struct lpfc_iocbq, iocb); /* * Clean all volatile data fields, preserve iotag and node struct. @@ -87,6 +119,19 @@ list_add_tail(&iocbq->list, &phba->lpfc_iocb_list); } +void +lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) +{ + unsigned long iflags; + + /* + * Clean all volatile data fields, preserve iotag and node struct. + */ + spin_lock_irqsave(&phba->hbalock, iflags); + __lpfc_sli_release_iocbq(phba, iocbq); + spin_unlock_irqrestore(&phba->hbalock, iflags); +} + /* * Translate the iocb command to an iocb command type used to decide the final * disposition of each completed IOCB. @@ -155,6 +200,9 @@ case CMD_RCV_ELS_REQ_CX: case CMD_RCV_SEQUENCE64_CX: case CMD_RCV_ELS_REQ64_CX: + case CMD_IOCB_RCV_SEQ64_CX: + case CMD_IOCB_RCV_ELS64_CX: + case CMD_IOCB_RCV_CONT64_CX: type = LPFC_UNSOL_IOCB; break; default: @@ -166,73 +214,77 @@ } static int -lpfc_sli_ring_map(struct lpfc_hba * phba, LPFC_MBOXQ_t *pmb) +lpfc_sli_ring_map(struct lpfc_hba *phba) { struct lpfc_sli *psli = &phba->sli; - MAILBOX_t *pmbox = &pmb->mb; - int i, rc; + LPFC_MBOXQ_t *pmb; + MAILBOX_t *pmbox; + int i, rc, ret = 0; + pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!pmb) + return -ENOMEM; + pmbox = &pmb->mb; + phba->link_state = LPFC_INIT_MBX_CMDS; for (i = 0; i < psli->num_rings; i++) { - phba->hba_state = LPFC_INIT_MBX_CMDS; lpfc_config_ring(phba, i, pmb); rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); if (rc != MBX_SUCCESS) { - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, - "%d:0446 Adapter failed to init, " + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0446 Adapter failed to init (%d), " "mbxCmd x%x CFG_RING, mbxStatus x%x, " "ring %d\n", - phba->brd_no, + phba->brd_no, rc, pmbox->mbxCommand, pmbox->mbxStatus, i); - phba->hba_state = LPFC_HBA_ERROR; - return -ENXIO; + phba->link_state = LPFC_HBA_ERROR; + ret = -ENXIO; + break; } } - return 0; + mempool_free(pmb, phba->mbox_mem_pool); + return ret; } static int -lpfc_sli_ringtxcmpl_put(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocb) +lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *piocb) { list_add_tail(&piocb->list, &pring->txcmplq); pring->txcmplq_cnt++; - if (unlikely(pring->ringno == LPFC_ELS_RING)) - mod_timer(&phba->els_tmofunc, + if ((unlikely(pring->ringno == LPFC_ELS_RING)) && + (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && + (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) { + if (!piocb->vport) + BUG(); + else + mod_timer(&piocb->vport->els_tmofunc, jiffies + HZ * (phba->fc_ratov << 1)); + } - return (0); + + return 0; } static struct lpfc_iocbq * -lpfc_sli_ringtx_get(struct lpfc_hba * phba, struct lpfc_sli_ring * pring) +lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { - struct list_head *dlp; struct lpfc_iocbq *cmd_iocb; - dlp = &pring->txq; - cmd_iocb = NULL; - list_remove_head((&pring->txq), cmd_iocb, - struct lpfc_iocbq, - list); - if (cmd_iocb) { - /* If the first ptr is not equal to the list header, - * deque the IOCBQ_t and return it. - */ + list_remove_head((&pring->txq), cmd_iocb, struct lpfc_iocbq, list); + if (cmd_iocb != NULL) pring->txq_cnt--; - } - return (cmd_iocb); + return cmd_iocb; } static IOCB_t * lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { - struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; + struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? + &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : + &phba->slim2p->mbx.us.s2.port[pring->ringno]; uint32_t max_cmd_idx = pring->numCiocb; - IOCB_t *iocb = NULL; if ((pring->next_cmdidx == pring->cmdidx) && (++pring->next_cmdidx >= max_cmd_idx)) @@ -249,15 +301,17 @@ phba->brd_no, pring->ringno, pring->local_getidx, max_cmd_idx); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; /* * All error attention handlers are posted to * worker thread */ phba->work_ha |= HA_ERATT; phba->work_hs = HS_FFER3; + + /* hbalock should already be held */ if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); return NULL; } @@ -266,39 +320,34 @@ return NULL; } - iocb = IOCB_ENTRY(pring->cmdringaddr, pring->cmdidx); - - return iocb; + return lpfc_cmd_iocb(phba, pring); } uint16_t -lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq) +lpfc_sli_next_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) { - struct lpfc_iocbq ** new_arr; - struct lpfc_iocbq ** old_arr; + struct lpfc_iocbq **new_arr; + struct lpfc_iocbq **old_arr; size_t new_len; struct lpfc_sli *psli = &phba->sli; uint16_t iotag; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); iotag = psli->last_iotag; if(++iotag < psli->iocbq_lookup_len) { psli->last_iotag = iotag; psli->iocbq_lookup[iotag] = iocbq; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); iocbq->iotag = iotag; return iotag; - } - else if (psli->iocbq_lookup_len < (0xffff + } else if (psli->iocbq_lookup_len < (0xffff - LPFC_IOCBQ_LOOKUP_INCREMENT)) { new_len = psli->iocbq_lookup_len + LPFC_IOCBQ_LOOKUP_INCREMENT; - spin_unlock_irq(phba->host->host_lock); - new_arr = kmalloc(new_len * sizeof (struct lpfc_iocbq *), + spin_unlock_irq(&phba->hbalock); + new_arr = kzalloc(new_len * sizeof (struct lpfc_iocbq *), GFP_KERNEL); if (new_arr) { - memset((char *)new_arr, 0, - new_len * sizeof (struct lpfc_iocbq *)); - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); old_arr = psli->iocbq_lookup; if (new_len <= psli->iocbq_lookup_len) { /* highly unprobable case */ @@ -307,11 +356,11 @@ if(++iotag < psli->iocbq_lookup_len) { psli->last_iotag = iotag; psli->iocbq_lookup[iotag] = iocbq; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); iocbq->iotag = iotag; return iotag; } - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); return 0; } if (psli->iocbq_lookup) @@ -322,13 +371,13 @@ psli->iocbq_lookup_len = new_len; psli->last_iotag = iotag; psli->iocbq_lookup[iotag] = iocbq; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); iocbq->iotag = iotag; kfree(old_arr); return iotag; } } else - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); lpfc_printf_log(phba, KERN_ERR,LOG_SLI, "%d:0318 Failed to allocate IOTAG.last IOTAG is %d\n", @@ -349,7 +398,7 @@ /* * Issue iocb command to adapter */ - lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, sizeof (IOCB_t)); + lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, phba->iocb_cmd_size); wmb(); pring->stats.iocb_cmd++; @@ -361,20 +410,18 @@ if (nextiocb->iocb_cmpl) lpfc_sli_ringtxcmpl_put(phba, pring, nextiocb); else - lpfc_sli_release_iocbq(phba, nextiocb); + __lpfc_sli_release_iocbq(phba, nextiocb); /* * Let the HBA know what IOCB slot will be the next one the * driver will put a command into. */ pring->cmdidx = pring->next_cmdidx; - writel(pring->cmdidx, phba->MBslimaddr - + (SLIMOFF + (pring->ringno * 2)) * 4); + writel(pring->cmdidx, &phba->host_gp[pring->ringno].cmdPutInx); } static void -lpfc_sli_update_full_ring(struct lpfc_hba * phba, - struct lpfc_sli_ring *pring) +lpfc_sli_update_full_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { int ringno = pring->ringno; @@ -393,8 +440,7 @@ } static void -lpfc_sli_update_ring(struct lpfc_hba * phba, - struct lpfc_sli_ring *pring) +lpfc_sli_update_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { int ringno = pring->ringno; @@ -407,7 +453,7 @@ } static void -lpfc_sli_resume_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring) +lpfc_sli_resume_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { IOCB_t *iocb; struct lpfc_iocbq *nextiocb; @@ -420,7 +466,7 @@ * (d) IOCB processing is not blocked by the outstanding mbox command. */ if (pring->txq_cnt && - (phba->hba_state > LPFC_LINK_DOWN) && + lpfc_is_link_up(phba) && (pring->ringno != phba->sli.fcp_ring || phba->sli.sli_flag & LPFC_PROCESS_LA) && !(pring->flag & LPFC_STOP_IOCB_MBX)) { @@ -440,11 +486,15 @@ /* lpfc_sli_turn_on_ring is only called by lpfc_sli_handle_mb_event below */ static void -lpfc_sli_turn_on_ring(struct lpfc_hba * phba, int ringno) +lpfc_sli_turn_on_ring(struct lpfc_hba *phba, int ringno) { - struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[ringno]; + struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? + &phba->slim2p->mbx.us.s3_pgp.port[ringno] : + &phba->slim2p->mbx.us.s2.port[ringno]; + unsigned long iflags; /* If the ring is active, flag it */ + spin_lock_irqsave(&phba->hbalock, iflags); if (phba->sli.ring[ringno].cmdringaddr) { if (phba->sli.ring[ringno].flag & LPFC_STOP_IOCB_MBX) { phba->sli.ring[ringno].flag &= ~LPFC_STOP_IOCB_MBX; @@ -453,11 +503,176 @@ */ phba->sli.ring[ringno].local_getidx = le32_to_cpu(pgp->cmdGetInx); - spin_lock_irq(phba->host->host_lock); lpfc_sli_resume_iocb(phba, &phba->sli.ring[ringno]); - spin_unlock_irq(phba->host->host_lock); } } + spin_unlock_irqrestore(&phba->hbalock, iflags); +} + +struct lpfc_hbq_entry * +lpfc_sli_next_hbq_slot(struct lpfc_hba *phba, uint32_t hbqno) +{ + struct hbq_s *hbqp = &phba->hbqs[hbqno]; + + if (hbqp->next_hbqPutIdx == hbqp->hbqPutIdx && + ++hbqp->next_hbqPutIdx >= hbqp->entry_count) + hbqp->next_hbqPutIdx = 0; + + if (unlikely(hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx)) { + uint32_t raw_index = phba->hbq_get[hbqno]; + uint32_t getidx = le32_to_cpu(raw_index); + + hbqp->local_hbqGetIdx = getidx; + + if (unlikely(hbqp->local_hbqGetIdx >= hbqp->entry_count)) { + lpfc_printf_log(phba, KERN_ERR, + LOG_SLI | LOG_VPORT, + "%d:1802 HBQ %d: local_hbqGetIdx " + "%u is > than hbqp->entry_count %u\n", + phba->brd_no, hbqno, + hbqp->local_hbqGetIdx, + hbqp->entry_count); + + phba->link_state = LPFC_HBA_ERROR; + return NULL; + } + + if (hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx) + return NULL; + } + + return (struct lpfc_hbq_entry *) phba->hbqslimp.virt + hbqp->hbqPutIdx; +} + +void +lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba) +{ + struct lpfc_dmabuf *dmabuf, *next_dmabuf; + struct hbq_dmabuf *hbq_buf; + + /* Return all memory used by all HBQs */ + list_for_each_entry_safe(dmabuf, next_dmabuf, + &phba->hbq_buffer_list, list) { + hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf); + list_del(&hbq_buf->dbuf.list); + lpfc_hbq_free(phba, hbq_buf->dbuf.virt, hbq_buf->dbuf.phys); + kfree(hbq_buf); + } +} + +static void +lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno, + struct hbq_dmabuf *hbq_buf) +{ + struct lpfc_hbq_entry *hbqe; + dma_addr_t physaddr = hbq_buf->dbuf.phys; + + /* Get next HBQ entry slot to use */ + hbqe = lpfc_sli_next_hbq_slot(phba, hbqno); + if (hbqe) { + struct hbq_s *hbqp = &phba->hbqs[hbqno]; + + hbqe->bde.addrHigh = le32_to_cpu(putPaddrHigh(physaddr)); + hbqe->bde.addrLow = le32_to_cpu(putPaddrLow(physaddr)); + hbqe->bde.tus.f.bdeSize = FCELSSIZE; + hbqe->bde.tus.f.bdeFlags = 0; + hbqe->bde.tus.w = le32_to_cpu(hbqe->bde.tus.w); + hbqe->buffer_tag = le32_to_cpu(hbq_buf->tag); + /* Sync SLIM */ + hbqp->hbqPutIdx = hbqp->next_hbqPutIdx; + writel(hbqp->hbqPutIdx, phba->hbq_put + hbqno); + /* flush */ + readl(phba->hbq_put + hbqno); + list_add_tail(&hbq_buf->dbuf.list, &phba->hbq_buffer_list); + } +} + +static struct lpfc_hbq_init lpfc_els_hbq = { + .rn = 1, + .entry_count = 200, + .mask_count = 0, + .profile = 0, + .ring_mask = 1 << LPFC_ELS_RING, + .buffer_count = 0, + .init_count = 20, + .add_count = 5, +}; + +static struct lpfc_hbq_init *lpfc_hbq_defs[] = { + &lpfc_els_hbq, +}; + +int +lpfc_sli_hbqbuf_fill_hbqs(struct lpfc_hba *phba, uint32_t hbqno, uint32_t count) +{ + uint32_t i, start, end; + struct hbq_dmabuf *hbq_buffer; + + start = lpfc_hbq_defs[hbqno]->buffer_count; + end = count + lpfc_hbq_defs[hbqno]->buffer_count; + if (end > lpfc_hbq_defs[hbqno]->entry_count) { + end = lpfc_hbq_defs[hbqno]->entry_count; + } + + /* Populate HBQ entries */ + for (i = start; i < end; i++) { + hbq_buffer = kmalloc(sizeof(struct hbq_dmabuf), + GFP_KERNEL); + if (!hbq_buffer) + return 1; + hbq_buffer->dbuf.virt = lpfc_hbq_alloc(phba, MEM_PRI, + &hbq_buffer->dbuf.phys); + if (hbq_buffer->dbuf.virt == NULL) + return 1; + hbq_buffer->tag = (i | (hbqno << 16)); + lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer); + lpfc_hbq_defs[hbqno]->buffer_count++; + } + return 0; +} + +int +lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *phba, uint32_t qno) +{ + return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno, + lpfc_hbq_defs[qno]->add_count)); +} + +int +lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *phba, uint32_t qno) +{ + return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno, + lpfc_hbq_defs[qno]->init_count)); +} + +struct hbq_dmabuf * +lpfc_sli_hbqbuf_find(struct lpfc_hba *phba, uint32_t tag) +{ + struct lpfc_dmabuf *d_buf; + struct hbq_dmabuf *hbq_buf; + + list_for_each_entry(d_buf, &phba->hbq_buffer_list, list) { + hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf); + if ((hbq_buf->tag & 0xffff) == tag) { + return hbq_buf; + } + } + lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_VPORT, + "%d:1803 Bad hbq tag. Data: x%x x%x\n", + phba->brd_no, tag, + lpfc_hbq_defs[tag >> 16]->buffer_count); + return NULL; +} + +void +lpfc_sli_free_hbq(struct lpfc_hba *phba, struct hbq_dmabuf *sp) +{ + uint32_t hbqno; + + if (sp) { + hbqno = sp->tag >> 16; + lpfc_sli_hbq_to_firmware(phba, hbqno, sp); + } } static int @@ -511,32 +726,38 @@ case MBX_FLASH_WR_ULA: case MBX_SET_DEBUG: case MBX_LOAD_EXP_ROM: + case MBX_REG_VPI: + case MBX_UNREG_VPI: + case MBX_HEARTBEAT: ret = mbxCommand; break; default: ret = MBX_SHUTDOWN; break; } - return (ret); + return ret; } static void -lpfc_sli_wake_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq) +lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) { wait_queue_head_t *pdone_q; + unsigned long drvr_flag; /* * If pdone_q is empty, the driver thread gave up waiting and * continued running. */ pmboxq->mbox_flag |= LPFC_MBX_WAKE; + spin_lock_irqsave(&phba->hbalock, drvr_flag); pdone_q = (wait_queue_head_t *) pmboxq->context1; if (pdone_q) wake_up_interruptible(pdone_q); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); return; } void -lpfc_sli_def_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) +lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_dmabuf *mp; uint16_t rpi; @@ -553,78 +774,63 @@ * If a REG_LOGIN succeeded after node is destroyed or node * is in re-discovery driver need to cleanup the RPI. */ - if (!(phba->fc_flag & FC_UNLOADING) && - (pmb->mb.mbxCommand == MBX_REG_LOGIN64) && - (!pmb->mb.mbxStatus)) { + if (!(phba->pport->load_flag & FC_UNLOADING) && + pmb->mb.mbxCommand == MBX_REG_LOGIN64 && + !pmb->mb.mbxStatus) { rpi = pmb->mb.un.varWords[0]; - lpfc_unreg_login(phba, rpi, pmb); - pmb->mbox_cmpl=lpfc_sli_def_mbox_cmpl; + lpfc_unreg_login(phba, pmb->mb.un.varRegLogin.vpi, rpi, pmb); + pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); if (rc != MBX_NOT_FINISHED) return; } - mempool_free( pmb, phba->mbox_mem_pool); + mempool_free(pmb, phba->mbox_mem_pool); return; } int -lpfc_sli_handle_mb_event(struct lpfc_hba * phba) +lpfc_sli_handle_mb_event(struct lpfc_hba *phba) { - MAILBOX_t *mbox; MAILBOX_t *pmbox; LPFC_MBOXQ_t *pmb; - struct lpfc_sli *psli; - int i, rc; - uint32_t process_next; - - psli = &phba->sli; - /* We should only get here if we are in SLI2 mode */ - if (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE)) { - return (1); - } + int rc; + LIST_HEAD(cmplq); phba->sli.slistat.mbox_event++; + /* Get all completed mailboxe buffers into the cmplq */ + spin_lock_irq(&phba->hbalock); + list_splice_init(&phba->sli.mboxq_cmpl, &cmplq); + spin_unlock_irq(&phba->hbalock); + /* Get a Mailbox buffer to setup mailbox commands for callback */ - if ((pmb = phba->sli.mbox_active)) { - pmbox = &pmb->mb; - mbox = &phba->slim2p->mbx; + do { + list_remove_head(&cmplq, pmb, LPFC_MBOXQ_t, list); + if (pmb == NULL) + break; - /* First check out the status word */ - lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof (uint32_t)); + pmbox = &pmb->mb; - /* Sanity check to ensure the host owns the mailbox */ - if (pmbox->mbxOwner != OWN_HOST) { - /* Lets try for a while */ - for (i = 0; i < 10240; i++) { - /* First copy command data */ - lpfc_sli_pcimem_bcopy(mbox, pmbox, - sizeof (uint32_t)); - if (pmbox->mbxOwner == OWN_HOST) - goto mbout; + if (pmbox->mbxCommand != MBX_HEARTBEAT) { + if (pmb->vport) { + lpfc_debugfs_disc_trc(pmb->vport, + LPFC_DISC_TRC_MBOX_VPORT, + "MBOX cmpl vport: cmd:x%x mb:x%x x%x", + (uint32_t)pmbox->mbxCommand, + pmbox->un.varWords[0], + pmbox->un.varWords[1]); + } + else { + lpfc_debugfs_disc_trc(phba->pport, + LPFC_DISC_TRC_MBOX, + "MBOX cmpl: cmd:x%x mb:x%x x%x", + (uint32_t)pmbox->mbxCommand, + pmbox->un.varWords[0], + pmbox->un.varWords[1]); } - /* Stray Mailbox Interrupt, mbxCommand mbxStatus - */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_MBOX | LOG_SLI, - "%d:0304 Stray Mailbox Interrupt " - "mbxCommand x%x mbxStatus x%x\n", - phba->brd_no, - pmbox->mbxCommand, - pmbox->mbxStatus); - - spin_lock_irq(phba->host->host_lock); - phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE; - spin_unlock_irq(phba->host->host_lock); - return (1); } - - mbout: - del_timer_sync(&phba->sli.mbox_tmo); - phba->work_hba_events &= ~WORKER_MBOX_TMO; /* * It is a fatal error if unknown mbox command completion. @@ -633,51 +839,50 @@ MBX_SHUTDOWN) { /* Unknow mailbox command compl */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_MBOX | LOG_SLI, - "%d:0323 Unknown Mailbox command %x Cmpl\n", + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, + "%d (%d):0323 Unknown Mailbox command " + "%x Cmpl\n", phba->brd_no, + pmb->vport ? pmb->vport->vpi : 0, pmbox->mbxCommand); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; phba->work_hs = HS_FFER3; lpfc_handle_eratt(phba); - return (0); + continue; } - phba->sli.mbox_active = NULL; if (pmbox->mbxStatus) { phba->sli.slistat.mbox_stat_err++; if (pmbox->mbxStatus == MBXERR_NO_RESOURCES) { /* Mbox cmd cmpl error - RETRYing */ - lpfc_printf_log(phba, - KERN_INFO, + lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, - "%d:0305 Mbox cmd cmpl error - " - "RETRYing Data: x%x x%x x%x x%x\n", + "%d (%d):0305 Mbox cmd cmpl " + "error - RETRYing Data: x%x " + "x%x x%x x%x\n", phba->brd_no, + pmb->vport ? pmb->vport->vpi :0, pmbox->mbxCommand, pmbox->mbxStatus, pmbox->un.varWords[0], - phba->hba_state); + pmb->vport->port_state); pmbox->mbxStatus = 0; pmbox->mbxOwner = OWN_HOST; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); if (rc == MBX_SUCCESS) - return (0); + continue; } } /* Mailbox cmd Cmpl */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_MBOX | LOG_SLI, - "%d:0307 Mailbox cmd x%x Cmpl x%p " + lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, + "%d (%d):0307 Mailbox cmd x%x Cmpl x%p " "Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n", phba->brd_no, + pmb->vport ? pmb->vport->vpi : 0, pmbox->mbxCommand, pmb->mbox_cmpl, *((uint32_t *) pmbox), @@ -690,39 +895,35 @@ pmbox->un.varWords[6], pmbox->un.varWords[7]); - if (pmb->mbox_cmpl) { - lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE); + if (pmb->mbox_cmpl) pmb->mbox_cmpl(phba,pmb); - } - } - + } while (1); + return 0; +} - do { - process_next = 0; /* by default don't loop */ - spin_lock_irq(phba->host->host_lock); - phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; +static struct lpfc_dmabuf * +lpfc_sli_replace_hbqbuff(struct lpfc_hba *phba, uint32_t tag) +{ + struct hbq_dmabuf *hbq_entry, *new_hbq_entry; - /* Process next mailbox command if there is one */ - if ((pmb = lpfc_mbox_get(phba))) { - spin_unlock_irq(phba->host->host_lock); - rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); - if (rc == MBX_NOT_FINISHED) { - pmb->mb.mbxStatus = MBX_NOT_FINISHED; - pmb->mbox_cmpl(phba,pmb); - process_next = 1; - continue; /* loop back */ - } - } else { - spin_unlock_irq(phba->host->host_lock); - /* Turn on IOCB processing */ - for (i = 0; i < phba->sli.num_rings; i++) - lpfc_sli_turn_on_ring(phba, i); + hbq_entry = lpfc_sli_hbqbuf_find(phba, tag); + if (hbq_entry == NULL) + return NULL; + list_del(&hbq_entry->dbuf.list); + new_hbq_entry = kmalloc(sizeof(struct hbq_dmabuf), GFP_ATOMIC); + if (new_hbq_entry == NULL) + return &hbq_entry->dbuf; + new_hbq_entry->dbuf = hbq_entry->dbuf; + new_hbq_entry->tag = -1; + hbq_entry->dbuf.virt = lpfc_hbq_alloc(phba, 0, &hbq_entry->dbuf.phys); + if (hbq_entry->dbuf.virt == NULL) { + kfree(new_hbq_entry); + return &hbq_entry->dbuf; } - - } while (process_next); - - return (0); + lpfc_sli_free_hbq(phba, hbq_entry); + return &new_hbq_entry->dbuf; } + static int lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *saveq) @@ -735,7 +936,9 @@ match = 0; irsp = &(saveq->iocb); if ((irsp->ulpCommand == CMD_RCV_ELS_REQ64_CX) - || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)) { + || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX) + || (irsp->ulpCommand == CMD_IOCB_RCV_ELS64_CX) + || (irsp->ulpCommand == CMD_IOCB_RCV_CONT64_CX)) { Rctl = FC_ELS_REQ; Type = FC_ELS_DATA; } else { @@ -747,13 +950,24 @@ /* Firmware Workaround */ if ((Rctl == 0) && (pring->ringno == LPFC_ELS_RING) && - (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX)) { + (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX || + irsp->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) { Rctl = FC_ELS_REQ; Type = FC_ELS_DATA; w5p->hcsw.Rctl = Rctl; w5p->hcsw.Type = Type; } } + + if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { + if (irsp->ulpBdeCount != 0) + saveq->context2 = lpfc_sli_replace_hbqbuff(phba, + irsp->un.ulpWord[3]); + if (irsp->ulpBdeCount == 2) + saveq->context3 = lpfc_sli_replace_hbqbuff(phba, + irsp->un.ulpWord[15]); + } + /* unSolicited Responses */ if (pring->prt[0].profile) { if (pring->prt[0].lpfc_sli_rcv_unsol_event) @@ -781,23 +995,21 @@ /* Unexpected Rctl / Type received */ /* Ring handler: unexpected Rctl Type received */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_SLI, + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, "%d:0313 Ring %d handler: unexpected Rctl x%x " - "Type x%x received \n", + "Type x%x received\n", phba->brd_no, pring->ringno, Rctl, Type); } - return(1); + return 1; } static struct lpfc_iocbq * -lpfc_sli_iocbq_lookup(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, - struct lpfc_iocbq * prspiocb) +lpfc_sli_iocbq_lookup(struct lpfc_hba *phba, + struct lpfc_sli_ring *pring, + struct lpfc_iocbq *prspiocb) { struct lpfc_iocbq *cmd_iocb = NULL; uint16_t iotag; @@ -806,7 +1018,7 @@ if (iotag != 0 && iotag <= phba->sli.last_iotag) { cmd_iocb = phba->sli.iocbq_lookup[iotag]; - list_del(&cmd_iocb->list); + list_del_init(&cmd_iocb->list); pring->txcmplq_cnt--; return cmd_iocb; } @@ -821,16 +1033,18 @@ } static int -lpfc_sli_process_sol_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, +lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *saveq) { - struct lpfc_iocbq * cmdiocbp; + struct lpfc_iocbq *cmdiocbp; int rc = 1; unsigned long iflag; /* Based on the iotag field, get the cmd IOCB from the txcmplq */ - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq); + spin_unlock_irqrestore(&phba->hbalock, iflag); + if (cmdiocbp) { if (cmdiocbp->iocb_cmpl) { /* @@ -846,17 +1060,8 @@ saveq->iocb.un.ulpWord[4] = IOERR_SLI_ABORTED; } - spin_unlock_irqrestore(phba->host->host_lock, - iflag); - (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq); - spin_lock_irqsave(phba->host->host_lock, iflag); } - else { - spin_unlock_irqrestore(phba->host->host_lock, - iflag); (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq); - spin_lock_irqsave(phba->host->host_lock, iflag); - } } else lpfc_sli_release_iocbq(phba, cmdiocbp); } else { @@ -870,12 +1075,12 @@ * Ring handler: unexpected completion IoTag * */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_SLI, - "%d:0322 Ring %d handler: unexpected " - "completion IoTag x%x Data: x%x x%x x%x x%x\n", + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "%d (%d):0322 Ring %d handler: " + "unexpected completion IoTag x%x " + "Data: x%x x%x x%x x%x\n", phba->brd_no, + cmdiocbp->vport->vpi, pring->ringno, saveq->iocb.ulpIoTag, saveq->iocb.ulpStatus, @@ -885,14 +1090,15 @@ } } - spin_unlock_irqrestore(phba->host->host_lock, iflag); return rc; } -static void lpfc_sli_rsp_pointers_error(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring) +static void +lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { - struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; + struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? + &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : + &phba->slim2p->mbx.us.s2.port[pring->ringno]; /* * Ring handler: portRspPut is bigger then * rsp ring @@ -904,7 +1110,7 @@ le32_to_cpu(pgp->rspPutInx), pring->numRiocb); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; /* * All error attention handlers are posted to @@ -912,16 +1118,18 @@ */ phba->work_ha |= HA_ERATT; phba->work_hs = HS_FFER3; + + /* hbalock should already be held */ if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); return; } -void lpfc_sli_poll_fcp_ring(struct lpfc_hba * phba) +void lpfc_sli_poll_fcp_ring(struct lpfc_hba *phba) { - struct lpfc_sli * psli = &phba->sli; - struct lpfc_sli_ring * pring = &psli->ring[LPFC_FCP_RING]; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_FCP_RING]; IOCB_t *irsp = NULL; IOCB_t *entry = NULL; struct lpfc_iocbq *cmdiocbq = NULL; @@ -931,13 +1139,15 @@ uint32_t portRspPut, portRspMax; int type; uint32_t rsp_cmpl = 0; - void __iomem *to_slim; uint32_t ha_copy; + unsigned long iflags; pring->stats.iocb_event++; - /* The driver assumes SLI-2 mode */ - pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; + pgp = (phba->sli_rev == 3) ? + &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : + &phba->slim2p->mbx.us.s2.port[pring->ringno]; + /* * The next available response entry should never exceed the maximum @@ -952,15 +1162,13 @@ rmb(); while (pring->rspidx != portRspPut) { - - entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx); - + entry = lpfc_resp_iocb(phba, pring); if (++pring->rspidx >= portRspMax) pring->rspidx = 0; lpfc_sli_pcimem_bcopy((uint32_t *) entry, (uint32_t *) &rspiocbq.iocb, - sizeof (IOCB_t)); + phba->iocb_rsp_size); irsp = &rspiocbq.iocb; type = lpfc_sli_iocb_cmd_type(irsp->ulpCommand & CMD_IOCB_MASK); pring->stats.iocb_rsp++; @@ -998,8 +1206,10 @@ break; } + spin_lock_irqsave(&phba->hbalock, iflags); cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring, &rspiocbq); + spin_unlock_irqrestore(&phba->hbalock, iflags); if ((cmdiocbq) && (cmdiocbq->iocb_cmpl)) { (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, &rspiocbq); @@ -1033,9 +1243,7 @@ * been updated, sync the pgp->rspPutInx and fetch the new port * response put pointer. */ - to_slim = phba->MBslimaddr + - (SLIMOFF + (pring->ringno * 2) + 1) * 4; - writeb(pring->rspidx, to_slim); + writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx); if (pring->rspidx == portRspPut) portRspPut = le32_to_cpu(pgp->rspPutInx); @@ -1045,13 +1253,16 @@ ha_copy >>= (LPFC_FCP_RING * 4); if ((rsp_cmpl > 0) && (ha_copy & HA_R0RE_REQ)) { + spin_lock_irqsave(&phba->hbalock, iflags); pring->stats.iocb_rsp_full++; status = ((CA_R0ATT | CA_R0RE_RSP) << (LPFC_FCP_RING * 4)); writel(status, phba->CAregaddr); readl(phba->CAregaddr); + spin_unlock_irqrestore(&phba->hbalock, iflags); } if ((ha_copy & HA_R0CE_RSP) && (pring->flag & LPFC_CALL_RING_AVAILABLE)) { + spin_lock_irqsave(&phba->hbalock, iflags); pring->flag &= ~LPFC_CALL_RING_AVAILABLE; pring->stats.iocb_cmd_empty++; @@ -1062,6 +1273,7 @@ if ((pring->lpfc_sli_cmd_available)) (pring->lpfc_sli_cmd_available) (phba, pring); + spin_unlock_irqrestore(&phba->hbalock, iflags); } return; @@ -1072,10 +1284,12 @@ * to check it explicitly. */ static int -lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, uint32_t mask) +lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba, + struct lpfc_sli_ring *pring, uint32_t mask) { - struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; + struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? + &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : + &phba->slim2p->mbx.us.s2.port[pring->ringno]; IOCB_t *irsp = NULL; IOCB_t *entry = NULL; struct lpfc_iocbq *cmdiocbq = NULL; @@ -1086,9 +1300,8 @@ lpfc_iocb_type type; unsigned long iflag; uint32_t rsp_cmpl = 0; - void __iomem *to_slim; - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); pring->stats.iocb_event++; /* @@ -1099,7 +1312,7 @@ portRspPut = le32_to_cpu(pgp->rspPutInx); if (unlikely(portRspPut >= portRspMax)) { lpfc_sli_rsp_pointers_error(phba, pring); - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, iflag); return 1; } @@ -1110,14 +1323,15 @@ * structure. The copy involves a byte-swap since the * network byte order and pci byte orders are different. */ - entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx); + entry = lpfc_resp_iocb(phba, pring); + phba->last_completion_time = jiffies; if (++pring->rspidx >= portRspMax) pring->rspidx = 0; lpfc_sli_pcimem_bcopy((uint32_t *) entry, (uint32_t *) &rspiocbq.iocb, - sizeof (IOCB_t)); + phba->iocb_rsp_size); INIT_LIST_HEAD(&(rspiocbq.list)); irsp = &rspiocbq.iocb; @@ -1126,14 +1340,28 @@ rsp_cmpl++; if (unlikely(irsp->ulpStatus)) { + /* + * If resource errors reported from HBA, reduce + * queuedepths of the SCSI device. + */ + if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && + (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) { + spin_unlock_irqrestore(&phba->hbalock, iflag); + lpfc_adjust_queue_depth(phba); + spin_lock_irqsave(&phba->hbalock, iflag); + } + /* Rsp ring error: IOCB */ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, "%d:0336 Rsp Ring %d error: IOCB Data: " "x%x x%x x%x x%x x%x x%x x%x x%x\n", phba->brd_no, pring->ringno, - irsp->un.ulpWord[0], irsp->un.ulpWord[1], - irsp->un.ulpWord[2], irsp->un.ulpWord[3], - irsp->un.ulpWord[4], irsp->un.ulpWord[5], + irsp->un.ulpWord[0], + irsp->un.ulpWord[1], + irsp->un.ulpWord[2], + irsp->un.ulpWord[3], + irsp->un.ulpWord[4], + irsp->un.ulpWord[5], *(((uint32_t *) irsp) + 6), *(((uint32_t *) irsp) + 7)); } @@ -1149,7 +1377,8 @@ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "%d:0333 IOCB cmd 0x%x" " processed. Skipping" - " completion\n", phba->brd_no, + " completion\n", + phba->brd_no, irsp->ulpCommand); break; } @@ -1161,19 +1390,19 @@ (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, &rspiocbq); } else { - spin_unlock_irqrestore( - phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, + iflag); (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, &rspiocbq); - spin_lock_irqsave(phba->host->host_lock, + spin_lock_irqsave(&phba->hbalock, iflag); } } break; case LPFC_UNSOL_IOCB: - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, iflag); lpfc_sli_process_unsol_iocb(phba, pring, &rspiocbq); - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); break; default: if (irsp->ulpCommand == CMD_ADAPTER_MSG) { @@ -1188,8 +1417,10 @@ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "%d:0334 Unknown IOCB command " "Data: x%x, x%x x%x x%x x%x\n", - phba->brd_no, type, irsp->ulpCommand, - irsp->ulpStatus, irsp->ulpIoTag, + phba->brd_no, type, + irsp->ulpCommand, + irsp->ulpStatus, + irsp->ulpIoTag, irsp->ulpContext); } break; @@ -1201,9 +1432,7 @@ * been updated, sync the pgp->rspPutInx and fetch the new port * response put pointer. */ - to_slim = phba->MBslimaddr + - (SLIMOFF + (pring->ringno * 2) + 1) * 4; - writel(pring->rspidx, to_slim); + writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx); if (pring->rspidx == portRspPut) portRspPut = le32_to_cpu(pgp->rspPutInx); @@ -1228,31 +1457,31 @@ } - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, iflag); return rc; } - int -lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, uint32_t mask) +lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba, + struct lpfc_sli_ring *pring, uint32_t mask) { + struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? + &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : + &phba->slim2p->mbx.us.s2.port[pring->ringno]; IOCB_t *entry; IOCB_t *irsp = NULL; struct lpfc_iocbq *rspiocbp = NULL; struct lpfc_iocbq *next_iocb; struct lpfc_iocbq *cmdiocbp; struct lpfc_iocbq *saveq; - struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; uint8_t iocb_cmd_type; lpfc_iocb_type type; uint32_t status, free_saveq; uint32_t portRspPut, portRspMax; int rc = 1; unsigned long iflag; - void __iomem *to_slim; - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); pring->stats.iocb_event++; /* @@ -1266,16 +1495,14 @@ * Ring handler: portRspPut is bigger then * rsp ring */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_SLI, + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "%d:0303 Ring %d handler: portRspPut %d " "is bigger then rsp ring %d\n", - phba->brd_no, - pring->ringno, portRspPut, portRspMax); + phba->brd_no, pring->ringno, portRspPut, + portRspMax); - phba->hba_state = LPFC_HBA_ERROR; - spin_unlock_irqrestore(phba->host->host_lock, iflag); + phba->link_state = LPFC_HBA_ERROR; + spin_unlock_irqrestore(&phba->hbalock, iflag); phba->work_hs = HS_FFER3; lpfc_handle_eratt(phba); @@ -1298,23 +1525,24 @@ * the ulpLe field is set, the entire Command has been * received. */ - entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx); - rspiocbp = lpfc_sli_get_iocbq(phba); + entry = lpfc_resp_iocb(phba, pring); + + phba->last_completion_time = jiffies; + rspiocbp = __lpfc_sli_get_iocbq(phba); if (rspiocbp == NULL) { printk(KERN_ERR "%s: out of buffers! Failing " "completion.\n", __FUNCTION__); break; } - lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb, sizeof (IOCB_t)); + lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb, + phba->iocb_rsp_size); irsp = &rspiocbp->iocb; if (++pring->rspidx >= portRspMax) pring->rspidx = 0; - to_slim = phba->MBslimaddr + (SLIMOFF + (pring->ringno * 2) - + 1) * 4; - writel(pring->rspidx, to_slim); + writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx); if (list_empty(&(pring->iocb_continueq))) { list_add(&rspiocbp->list, &(pring->iocb_continueq)); @@ -1338,13 +1566,26 @@ pring->stats.iocb_rsp++; + /* + * If resource errors reported from HBA, reduce + * queuedepths of the SCSI device. + */ + if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && + (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) { + spin_unlock_irqrestore(&phba->hbalock, iflag); + lpfc_adjust_queue_depth(phba); + spin_lock_irqsave(&phba->hbalock, iflag); + } + if (irsp->ulpStatus) { /* Rsp ring error: IOCB */ - lpfc_printf_log(phba, - KERN_WARNING, - LOG_SLI, - "%d:0328 Rsp Ring %d error: IOCB Data: " - "x%x x%x x%x x%x x%x x%x x%x x%x\n", + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "%d:0328 Rsp Ring %d error: " + "IOCB Data: " + "x%x x%x x%x x%x " + "x%x x%x x%x x%x " + "x%x x%x x%x x%x " + "x%x x%x x%x x%x\n", phba->brd_no, pring->ringno, irsp->un.ulpWord[0], @@ -1354,7 +1595,15 @@ irsp->un.ulpWord[4], irsp->un.ulpWord[5], *(((uint32_t *) irsp) + 6), - *(((uint32_t *) irsp) + 7)); + *(((uint32_t *) irsp) + 7), + *(((uint32_t *) irsp) + 8), + *(((uint32_t *) irsp) + 9), + *(((uint32_t *) irsp) + 10), + *(((uint32_t *) irsp) + 11), + *(((uint32_t *) irsp) + 12), + *(((uint32_t *) irsp) + 13), + *(((uint32_t *) irsp) + 14), + *(((uint32_t *) irsp) + 15)); } /* @@ -1366,17 +1615,17 @@ iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK; type = lpfc_sli_iocb_cmd_type(iocb_cmd_type); if (type == LPFC_SOL_IOCB) { - spin_unlock_irqrestore(phba->host->host_lock, + spin_unlock_irqrestore(&phba->hbalock, iflag); rc = lpfc_sli_process_sol_iocb(phba, pring, saveq); - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); } else if (type == LPFC_UNSOL_IOCB) { - spin_unlock_irqrestore(phba->host->host_lock, + spin_unlock_irqrestore(&phba->hbalock, iflag); rc = lpfc_sli_process_unsol_iocb(phba, pring, saveq); - spin_lock_irqsave(phba->host->host_lock, iflag); + spin_lock_irqsave(&phba->hbalock, iflag); } else if (type == LPFC_ABORT_IOCB) { if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) && ((cmdiocbp = @@ -1386,15 +1635,15 @@ routine */ if (cmdiocbp->iocb_cmpl) { spin_unlock_irqrestore( - phba->host->host_lock, + &phba->hbalock, iflag); (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq); spin_lock_irqsave( - phba->host->host_lock, + &phba->hbalock, iflag); } else - lpfc_sli_release_iocbq(phba, + __lpfc_sli_release_iocbq(phba, cmdiocbp); } } else if (type == LPFC_UNKNOWN_IOCB) { @@ -1411,11 +1660,10 @@ phba->brd_no, adaptermsg); } else { /* Unknown IOCB command */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_SLI, - "%d:0335 Unknown IOCB command " - "Data: x%x x%x x%x x%x\n", + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "%d:0335 Unknown IOCB " + "command Data: x%x " + "x%x x%x x%x\n", phba->brd_no, irsp->ulpCommand, irsp->ulpStatus, @@ -1425,18 +1673,15 @@ } if (free_saveq) { - if (!list_empty(&saveq->list)) { - list_for_each_entry_safe(rspiocbp, - next_iocb, - &saveq->list, - list) { + list_for_each_entry_safe(rspiocbp, next_iocb, + &saveq->list, list) { list_del(&rspiocbp->list); - lpfc_sli_release_iocbq(phba, + __lpfc_sli_release_iocbq(phba, rspiocbp); } + __lpfc_sli_release_iocbq(phba, saveq); } - lpfc_sli_release_iocbq(phba, saveq); - } + rspiocbp = NULL; } /* @@ -1449,7 +1694,7 @@ } } /* while (pring->rspidx != portRspPut) */ - if ((rspiocbp != 0) && (mask & HA_R0RE_REQ)) { + if ((rspiocbp != NULL) && (mask & HA_R0RE_REQ)) { /* At least one response entry has been freed */ pring->stats.iocb_rsp_full++; /* SET RxRE_RSP in Chip Att register */ @@ -1470,24 +1715,25 @@ } - spin_unlock_irqrestore(phba->host->host_lock, iflag); + spin_unlock_irqrestore(&phba->hbalock, iflag); return rc; } -int +void lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { LIST_HEAD(completions); struct lpfc_iocbq *iocb, *next_iocb; IOCB_t *cmd = NULL; - int errcnt; - errcnt = 0; + if (pring->ringno == LPFC_ELS_RING) { + lpfc_fabric_abort_hba(phba); + } /* Error everything on txq and txcmplq * First do the txq. */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); list_splice_init(&pring->txq, &completions); pring->txq_cnt = 0; @@ -1495,26 +1741,25 @@ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) lpfc_sli_issue_abort_iotag(phba, pring, iocb); - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); while (!list_empty(&completions)) { iocb = list_get_first(&completions, struct lpfc_iocbq, list); cmd = &iocb->iocb; - list_del(&iocb->list); + list_del_init(&iocb->list); - if (iocb->iocb_cmpl) { + if (!iocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, iocb); + else { cmd->ulpStatus = IOSTAT_LOCAL_REJECT; cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; (iocb->iocb_cmpl) (phba, iocb, iocb); - } else - lpfc_sli_release_iocbq(phba, iocb); } - - return errcnt; + } } int -lpfc_sli_brdready(struct lpfc_hba * phba, uint32_t mask) +lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask) { uint32_t status; int i = 0; @@ -1541,7 +1786,8 @@ msleep(2500); if (i == 15) { - phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */ + /* Do post */ + phba->pport->port_state = LPFC_VPORT_UNKNOWN; lpfc_sli_brdrestart(phba); } /* Read the HBA Host Status Register */ @@ -1550,7 +1796,7 @@ /* Check to see if any errors occurred during init */ if ((status & HS_FFERM) || (i >= 20)) { - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; retval = 1; } @@ -1559,7 +1805,7 @@ #define BARRIER_TEST_PATTERN (0xdeadbeef) -void lpfc_reset_barrier(struct lpfc_hba * phba) +void lpfc_reset_barrier(struct lpfc_hba *phba) { uint32_t __iomem *resp_buf; uint32_t __iomem *mbox_buf; @@ -1584,12 +1830,12 @@ hc_copy = readl(phba->HCregaddr); writel((hc_copy & ~HC_ERINT_ENA), phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - phba->fc_flag |= FC_IGNORE_ERATT; + phba->link_flag |= LS_IGNORE_ERATT; if (readl(phba->HAregaddr) & HA_ERATT) { /* Clear Chip error bit */ writel(HA_ERATT, phba->HAregaddr); - phba->stopped = 1; + phba->pport->stopped = 1; } mbox = 0; @@ -1606,7 +1852,7 @@ if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) { if (phba->sli.sli_flag & LPFC_SLI2_ACTIVE || - phba->stopped) + phba->pport->stopped) goto restore_hc; else goto clear_errat; @@ -1623,17 +1869,17 @@ if (readl(phba->HAregaddr) & HA_ERATT) { writel(HA_ERATT, phba->HAregaddr); - phba->stopped = 1; + phba->pport->stopped = 1; } restore_hc: - phba->fc_flag &= ~FC_IGNORE_ERATT; + phba->link_flag &= ~LS_IGNORE_ERATT; writel(hc_copy, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ } int -lpfc_sli_brdkill(struct lpfc_hba * phba) +lpfc_sli_brdkill(struct lpfc_hba *phba) { struct lpfc_sli *psli; LPFC_MBOXQ_t *pmb; @@ -1645,26 +1891,22 @@ psli = &phba->sli; /* Kill HBA */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_SLI, + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "%d:0329 Kill HBA Data: x%x x%x\n", - phba->brd_no, - phba->hba_state, - psli->sli_flag); + phba->brd_no, phba->pport->port_state, psli->sli_flag); if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) == 0) return 1; /* Disable the error attention */ - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); status = readl(phba->HCregaddr); status &= ~HC_ERINT_ENA; writel(status, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - phba->fc_flag |= FC_IGNORE_ERATT; - spin_unlock_irq(phba->host->host_lock); + phba->link_flag |= LS_IGNORE_ERATT; + spin_unlock_irq(&phba->hbalock); lpfc_kill_board(phba, pmb); pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; @@ -1673,9 +1915,9 @@ if (retval != MBX_SUCCESS) { if (retval != MBX_BUSY) mempool_free(pmb, phba->mbox_mem_pool); - spin_lock_irq(phba->host->host_lock); - phba->fc_flag &= ~FC_IGNORE_ERATT; - spin_unlock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); + phba->link_flag &= ~LS_IGNORE_ERATT; + spin_unlock_irq(&phba->hbalock); return 1; } @@ -1698,22 +1940,22 @@ del_timer_sync(&psli->mbox_tmo); if (ha_copy & HA_ERATT) { writel(HA_ERATT, phba->HAregaddr); - phba->stopped = 1; + phba->pport->stopped = 1; } - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - phba->fc_flag &= ~FC_IGNORE_ERATT; - spin_unlock_irq(phba->host->host_lock); + phba->link_flag &= ~LS_IGNORE_ERATT; + spin_unlock_irq(&phba->hbalock); psli->mbox_active = NULL; lpfc_hba_down_post(phba); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; - return (ha_copy & HA_ERATT ? 0 : 1); + return ha_copy & HA_ERATT ? 0 : 1; } int -lpfc_sli_brdreset(struct lpfc_hba * phba) +lpfc_sli_brdreset(struct lpfc_hba *phba) { struct lpfc_sli *psli; struct lpfc_sli_ring *pring; @@ -1725,12 +1967,12 @@ /* Reset HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "%d:0325 Reset HBA Data: x%x x%x\n", phba->brd_no, - phba->hba_state, psli->sli_flag); + phba->pport->port_state, psli->sli_flag); /* perform board reset */ phba->fc_eventTag = 0; - phba->fc_myDID = 0; - phba->fc_prevDID = 0; + phba->pport->fc_myDID = 0; + phba->pport->fc_prevDID = 0; /* Turn off parity checking and serr during the physical reset */ pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value); @@ -1760,12 +2002,12 @@ pring->missbufcnt = 0; } - phba->hba_state = LPFC_WARM_START; + phba->link_state = LPFC_WARM_START; return 0; } int -lpfc_sli_brdrestart(struct lpfc_hba * phba) +lpfc_sli_brdrestart(struct lpfc_hba *phba) { MAILBOX_t *mb; struct lpfc_sli *psli; @@ -1773,14 +2015,14 @@ volatile uint32_t word0; void __iomem *to_slim; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); psli = &phba->sli; /* Restart HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "%d:0337 Restart HBA Data: x%x x%x\n", phba->brd_no, - phba->hba_state, psli->sli_flag); + phba->pport->port_state, psli->sli_flag); word0 = 0; mb = (MAILBOX_t *) &word0; @@ -1794,7 +2036,7 @@ readl(to_slim); /* flush */ /* Only skip post after fc_ffinit is completed */ - if (phba->hba_state) { + if (phba->pport->port_state) { skip_post = 1; word0 = 1; /* This is really setting up word1 */ } else { @@ -1806,10 +2048,10 @@ readl(to_slim); /* flush */ lpfc_sli_brdreset(phba); - phba->stopped = 0; - phba->hba_state = LPFC_INIT_START; + phba->pport->stopped = 0; + phba->link_state = LPFC_INIT_START; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets)); psli->stats_start = get_seconds(); @@ -1843,14 +2085,11 @@ if (i++ >= 20) { /* Adapter failed to init, timeout, status reg */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0436 Adapter failed to init, " "timeout, status reg x%x\n", - phba->brd_no, - status); - phba->hba_state = LPFC_HBA_ERROR; + phba->brd_no, status); + phba->link_state = LPFC_HBA_ERROR; return -ETIMEDOUT; } @@ -1859,14 +2098,12 @@ /* ERROR: During chipset initialization */ /* Adapter failed to init, chipset, status reg */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0437 Adapter failed to init, " "chipset, status reg x%x\n", phba->brd_no, status); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; return -EIO; } @@ -1879,7 +2116,8 @@ } if (i == 15) { - phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */ + /* Do post */ + phba->pport->port_state = LPFC_VPORT_UNKNOWN; lpfc_sli_brdrestart(phba); } /* Read the HBA Host Status Register */ @@ -1890,14 +2128,12 @@ if (status & HS_FFERM) { /* ERROR: During chipset initialization */ /* Adapter failed to init, chipset, status reg */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0438 Adapter failed to init, chipset, " "status reg x%x\n", phba->brd_no, status); - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; return -EIO; } @@ -1911,68 +2147,239 @@ return 0; } -int -lpfc_sli_hba_setup(struct lpfc_hba * phba) +static int +lpfc_sli_hbq_count(void) { - LPFC_MBOXQ_t *pmb; - uint32_t resetcount = 0, rc = 0, done = 0; - - pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!pmb) { - phba->hba_state = LPFC_HBA_ERROR; - return -ENOMEM; - } + return ARRAY_SIZE(lpfc_hbq_defs); +} +static int +lpfc_sli_hbq_entry_count(void) +{ + int hbq_count = lpfc_sli_hbq_count(); + int count = 0; + int i; + + for (i = 0; i < hbq_count; ++i) + count += lpfc_hbq_defs[i]->entry_count; + return count; +} + +int +lpfc_sli_hbq_size(void) +{ + return lpfc_sli_hbq_entry_count() * sizeof(struct lpfc_hbq_entry); +} + +static int +lpfc_sli_hbq_setup(struct lpfc_hba *phba) +{ + int hbq_count = lpfc_sli_hbq_count(); + LPFC_MBOXQ_t *pmb; + MAILBOX_t *pmbox; + uint32_t hbqno; + uint32_t hbq_entry_index; + + /* Get a Mailbox buffer to setup mailbox + * commands for HBA initialization + */ + pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + + if (!pmb) + return -ENOMEM; + + pmbox = &pmb->mb; + + /* Initialize the struct lpfc_sli_hbq structure for each hbq */ + phba->link_state = LPFC_INIT_MBX_CMDS; + + hbq_entry_index = 0; + for (hbqno = 0; hbqno < hbq_count; ++hbqno) { + phba->hbqs[hbqno].next_hbqPutIdx = 0; + phba->hbqs[hbqno].hbqPutIdx = 0; + phba->hbqs[hbqno].local_hbqGetIdx = 0; + phba->hbqs[hbqno].entry_count = + lpfc_hbq_defs[hbqno]->entry_count; + lpfc_config_hbq(phba, lpfc_hbq_defs[hbqno], hbq_entry_index, + pmb); + hbq_entry_index += phba->hbqs[hbqno].entry_count; + + if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) { + /* Adapter failed to init, mbxCmd CFG_RING, + mbxStatus , ring */ + + lpfc_printf_log(phba, KERN_ERR, + LOG_SLI | LOG_VPORT, + "%d:1805 Adapter failed to init. " + "Data: x%x x%x x%x\n", + phba->brd_no, pmbox->mbxCommand, + pmbox->mbxStatus, hbqno); + + phba->link_state = LPFC_HBA_ERROR; + mempool_free(pmb, phba->mbox_mem_pool); + return ENXIO; + } + } + phba->hbq_count = hbq_count; + + mempool_free(pmb, phba->mbox_mem_pool); + + /* Initially populate or replenish the HBQs */ + for (hbqno = 0; hbqno < hbq_count; ++hbqno) { + if (lpfc_sli_hbqbuf_init_hbqs(phba, hbqno)) + return -ENOMEM; + } + return 0; +} + +static int +lpfc_do_config_port(struct lpfc_hba *phba, int sli_mode) +{ + LPFC_MBOXQ_t *pmb; + uint32_t resetcount = 0, rc = 0, done = 0; + + pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!pmb) { + phba->link_state = LPFC_HBA_ERROR; + return -ENOMEM; + } + + phba->sli_rev = sli_mode; while (resetcount < 2 && !done) { - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE; - spin_unlock_irq(phba->host->host_lock); - phba->hba_state = LPFC_STATE_UNKNOWN; + spin_unlock_irq(&phba->hbalock); + phba->pport->port_state = LPFC_VPORT_UNKNOWN; lpfc_sli_brdrestart(phba); msleep(2500); rc = lpfc_sli_chipset_init(phba); if (rc) break; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); resetcount++; - /* Call pre CONFIG_PORT mailbox command initialization. A value of 0 - * means the call was successful. Any other nonzero value is a failure, - * but if ERESTART is returned, the driver may reset the HBA and try - * again. + /* Call pre CONFIG_PORT mailbox command initialization. A + * value of 0 means the call was successful. Any other + * nonzero value is a failure, but if ERESTART is returned, + * the driver may reset the HBA and try again. */ rc = lpfc_config_port_prep(phba); if (rc == -ERESTART) { - phba->hba_state = 0; + phba->link_state = LPFC_LINK_UNKNOWN; continue; } else if (rc) { break; } - phba->hba_state = LPFC_INIT_MBX_CMDS; + phba->link_state = LPFC_INIT_MBX_CMDS; lpfc_config_port(phba, pmb); rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); - if (rc == MBX_SUCCESS) - done = 1; - else { + if (rc != MBX_SUCCESS) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0442 Adapter failed to init, mbxCmd x%x " "CONFIG_PORT, mbxStatus x%x Data: x%x\n", phba->brd_no, pmb->mb.mbxCommand, pmb->mb.mbxStatus, 0); + spin_lock_irq(&phba->hbalock); phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE; + spin_unlock_irq(&phba->hbalock); + rc = -ENXIO; + } else { + done = 1; + phba->max_vpi = (phba->max_vpi && + pmb->mb.un.varCfgPort.gmv) != 0 + ? pmb->mb.un.varCfgPort.max_vpi + : 0; } } - if (!done) + + if (!done) { + rc = -EINVAL; + goto do_prep_failed; + } + + if ((pmb->mb.un.varCfgPort.sli_mode == 3) && + (!pmb->mb.un.varCfgPort.cMA)) { + rc = -ENXIO; + goto do_prep_failed; + } + return rc; + +do_prep_failed: + mempool_free(pmb, phba->mbox_mem_pool); + return rc; +} + +int +lpfc_sli_hba_setup(struct lpfc_hba *phba) +{ + uint32_t rc; + int mode = 3; + + switch (lpfc_sli_mode) { + case 2: + if (phba->cfg_npiv_enable) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, + "%d:1824 NPIV enabled: Override lpfc_sli_mode " + "parameter (%d) to auto (0).\n", + phba->brd_no, lpfc_sli_mode); + break; + } + mode = 2; + break; + case 0: + case 3: + break; + default: + lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, + "%d:1819 Unrecognized lpfc_sli_mode " + "parameter: %d.\n", + phba->brd_no, lpfc_sli_mode); + + break; + } + + rc = lpfc_do_config_port(phba, mode); + if (rc && lpfc_sli_mode == 3) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, + "%d:1820 Unable to select SLI-3. " + "Not supported by adapter.\n", + phba->brd_no); + if (rc && mode != 2) + rc = lpfc_do_config_port(phba, 2); + if (rc) + goto lpfc_sli_hba_setup_error; + + if (phba->sli_rev == 3) { + phba->iocb_cmd_size = SLI3_IOCB_CMD_SIZE; + phba->iocb_rsp_size = SLI3_IOCB_RSP_SIZE; + phba->sli3_options |= LPFC_SLI3_ENABLED; + phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED; + + } else { + phba->iocb_cmd_size = SLI2_IOCB_CMD_SIZE; + phba->iocb_rsp_size = SLI2_IOCB_RSP_SIZE; + phba->sli3_options = 0; + } + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "%d:0444 Firmware in SLI %x mode. Max_vpi %d\n", + phba->brd_no, phba->sli_rev, phba->max_vpi); + rc = lpfc_sli_ring_map(phba); + + if (rc) goto lpfc_sli_hba_setup_error; - rc = lpfc_sli_ring_map(phba, pmb); + /* Init HBQs */ + if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { + rc = lpfc_sli_hbq_setup(phba); if (rc) goto lpfc_sli_hba_setup_error; + } phba->sli.sli_flag |= LPFC_PROCESS_LA; @@ -1980,11 +2387,13 @@ if (rc) goto lpfc_sli_hba_setup_error; - goto lpfc_sli_hba_setup_exit; + return rc; + lpfc_sli_hba_setup_error: - phba->hba_state = LPFC_HBA_ERROR; -lpfc_sli_hba_setup_exit: - mempool_free(pmb, phba->mbox_mem_pool); + phba->link_state = LPFC_HBA_ERROR; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "%d:0445 Firmware initialization failed\n", + phba->brd_no); return rc; } @@ -2004,44 +2413,43 @@ void lpfc_mbox_timeout(unsigned long ptr) { - struct lpfc_hba *phba; + struct lpfc_hba *phba = (struct lpfc_hba *) ptr; unsigned long iflag; + uint32_t tmo_posted; + + spin_lock_irqsave(&phba->pport->work_port_lock, iflag); + tmo_posted = phba->pport->work_port_events & WORKER_MBOX_TMO; + if (!tmo_posted) + phba->pport->work_port_events |= WORKER_MBOX_TMO; + spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag); - phba = (struct lpfc_hba *)ptr; - spin_lock_irqsave(phba->host->host_lock, iflag); - if (!(phba->work_hba_events & WORKER_MBOX_TMO)) { - phba->work_hba_events |= WORKER_MBOX_TMO; + if (!tmo_posted) { + spin_lock_irqsave(&phba->hbalock, iflag); if (phba->work_wait) - wake_up(phba->work_wait); + lpfc_worker_wake_up(phba); + spin_unlock_irqrestore(&phba->hbalock, iflag); } - spin_unlock_irqrestore(phba->host->host_lock, iflag); } void lpfc_mbox_timeout_handler(struct lpfc_hba *phba) { - LPFC_MBOXQ_t *pmbox; - MAILBOX_t *mb; + LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active; + MAILBOX_t *mb = &pmbox->mb; struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; - spin_lock_irq(phba->host->host_lock); - if (!(phba->work_hba_events & WORKER_MBOX_TMO)) { - spin_unlock_irq(phba->host->host_lock); + if (!(phba->pport->work_port_events & WORKER_MBOX_TMO)) { return; } - pmbox = phba->sli.mbox_active; - mb = &pmbox->mb; - /* Mbox cmd timeout */ - lpfc_printf_log(phba, - KERN_ERR, - LOG_MBOX | LOG_SLI, - "%d:0310 Mailbox command x%x timeout Data: x%x x%x x%p\n", + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, + "%d:0310 Mailbox command x%x timeout Data: x%x x%x " + "x%p\n", phba->brd_no, mb->mbxCommand, - phba->hba_state, + phba->pport->port_state, phba->sli.sli_flag, phba->sli.mbox_active); @@ -2049,11 +2457,14 @@ * would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing * it to fail all oustanding SCSI IO. */ - phba->hba_state = LPFC_STATE_UNKNOWN; - phba->work_hba_events &= ~WORKER_MBOX_TMO; - phba->fc_flag |= FC_ESTABLISH_LINK; + spin_lock_irq(&phba->pport->work_port_lock); + phba->pport->work_port_events &= ~WORKER_MBOX_TMO; + spin_unlock_irq(&phba->pport->work_port_lock); + spin_lock_irq(&phba->hbalock); + phba->link_state = LPFC_LINK_UNKNOWN; + phba->pport->fc_flag |= FC_ESTABLISH_LINK; psli->sli_flag &= ~LPFC_SLI2_ACTIVE; - spin_unlock_irq(phba->host->host_lock); + spin_unlock_irq(&phba->hbalock); pring = &psli->ring[psli->fcp_ring]; lpfc_sli_abort_iocb_ring(phba, pring); @@ -2075,10 +2486,10 @@ } int -lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag) +lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag) { MAILBOX_t *mb; - struct lpfc_sli *psli; + struct lpfc_sli *psli = &phba->sli; uint32_t status, evtctr; uint32_t ha_copy; int i; @@ -2086,31 +2497,44 @@ volatile uint32_t word0, ldata; void __iomem *to_slim; + if (pmbox->mbox_cmpl && pmbox->mbox_cmpl != lpfc_sli_def_mbox_cmpl && + pmbox->mbox_cmpl != lpfc_sli_wake_mbox_wait) { + if(!pmbox->vport) { + lpfc_printf_log(phba, KERN_ERR, + LOG_MBOX | LOG_VPORT, + "%d:1806 Mbox x%x failed. No vport\n", + phba->brd_no, + pmbox->mb.mbxCommand); + dump_stack(); + return MBXERR_ERROR; + } + } + + /* If the PCI channel is in offline state, do not post mbox. */ if (unlikely(pci_channel_offline(phba->pcidev))) return MBX_NOT_FINISHED; + spin_lock_irqsave(&phba->hbalock, drvr_flag); psli = &phba->sli; - spin_lock_irqsave(phba->host->host_lock, drvr_flag); - mb = &pmbox->mb; status = MBX_SUCCESS; - if (phba->hba_state == LPFC_HBA_ERROR) { - spin_unlock_irqrestore(phba->host->host_lock, drvr_flag); + if (phba->link_state == LPFC_HBA_ERROR) { + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); /* Mbox command cannot issue */ - LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) - return (MBX_NOT_FINISHED); + LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) + return MBX_NOT_FINISHED; } if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT && !(readl(phba->HCregaddr) & HC_MBINT_ENA)) { - spin_unlock_irqrestore(phba->host->host_lock, drvr_flag); - LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) - return (MBX_NOT_FINISHED); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) + return MBX_NOT_FINISHED; } if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) { @@ -2120,20 +2544,18 @@ */ if (flag & MBX_POLL) { - spin_unlock_irqrestore(phba->host->host_lock, - drvr_flag); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); /* Mbox command cannot issue */ - LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) - return (MBX_NOT_FINISHED); + LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag); + return MBX_NOT_FINISHED; } if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) { - spin_unlock_irqrestore(phba->host->host_lock, - drvr_flag); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); /* Mbox command cannot issue */ - LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) - return (MBX_NOT_FINISHED); + LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag); + return MBX_NOT_FINISHED; } /* Handle STOP IOCB processing flag. This is only meaningful @@ -2157,21 +2579,33 @@ lpfc_mbox_put(phba, pmbox); /* Mbox cmd issue - BUSY */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_MBOX | LOG_SLI, - "%d:0308 Mbox cmd issue - BUSY Data: x%x x%x x%x x%x\n", + lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, + "%d (%d):0308 Mbox cmd issue - BUSY Data: " + "x%x x%x x%x x%x\n", phba->brd_no, - mb->mbxCommand, - phba->hba_state, - psli->sli_flag, - flag); + pmbox->vport ? pmbox->vport->vpi : 0xffffff, + mb->mbxCommand, phba->pport->port_state, + psli->sli_flag, flag); psli->slistat.mbox_busy++; - spin_unlock_irqrestore(phba->host->host_lock, - drvr_flag); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); - return (MBX_BUSY); + if (pmbox->vport) { + lpfc_debugfs_disc_trc(pmbox->vport, + LPFC_DISC_TRC_MBOX_VPORT, + "MBOX Bsy vport: cmd:x%x mb:x%x x%x", + (uint32_t)mb->mbxCommand, + mb->un.varWords[0], mb->un.varWords[1]); + } + else { + lpfc_debugfs_disc_trc(phba->pport, + LPFC_DISC_TRC_MBOX, + "MBOX Bsy: cmd:x%x mb:x%x x%x", + (uint32_t)mb->mbxCommand, + mb->un.varWords[0], mb->un.varWords[1]); + } + + return MBX_BUSY; } /* Handle STOP IOCB processing flag. This is only meaningful @@ -2198,11 +2632,10 @@ if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) && (mb->mbxCommand != MBX_KILL_BOARD)) { psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - spin_unlock_irqrestore(phba->host->host_lock, - drvr_flag); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); /* Mbox command cannot issue */ - LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag); - return (MBX_NOT_FINISHED); + LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag); + return MBX_NOT_FINISHED; } /* timeout active mbox command */ mod_timer(&psli->mbox_tmo, (jiffies + @@ -2210,15 +2643,29 @@ } /* Mailbox cmd issue */ - lpfc_printf_log(phba, - KERN_INFO, - LOG_MBOX | LOG_SLI, - "%d:0309 Mailbox cmd x%x issue Data: x%x x%x x%x\n", - phba->brd_no, - mb->mbxCommand, - phba->hba_state, - psli->sli_flag, - flag); + lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, + "%d (%d):0309 Mailbox cmd x%x issue Data: x%x x%x " + "x%x\n", + phba->brd_no, pmbox->vport ? pmbox->vport->vpi : 0, + mb->mbxCommand, phba->pport->port_state, + psli->sli_flag, flag); + + if (mb->mbxCommand != MBX_HEARTBEAT) { + if (pmbox->vport) { + lpfc_debugfs_disc_trc(pmbox->vport, + LPFC_DISC_TRC_MBOX_VPORT, + "MBOX Send vport: cmd:x%x mb:x%x x%x", + (uint32_t)mb->mbxCommand, + mb->un.varWords[0], mb->un.varWords[1]); + } + else { + lpfc_debugfs_disc_trc(phba->pport, + LPFC_DISC_TRC_MBOX, + "MBOX Send: cmd:x%x mb:x%x x%x", + (uint32_t)mb->mbxCommand, + mb->un.varWords[0], mb->un.varWords[1]); + } + } psli->slistat.mbox_cmd++; evtctr = psli->slistat.mbox_event; @@ -2285,12 +2732,12 @@ /* Wait for command to complete */ while (((word0 & OWN_CHIP) == OWN_CHIP) || (!(ha_copy & HA_MBATT) && - (phba->hba_state > LPFC_WARM_START))) { + (phba->link_state > LPFC_WARM_START))) { if (i-- <= 0) { psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - spin_unlock_irqrestore(phba->host->host_lock, + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); - return (MBX_NOT_FINISHED); + return MBX_NOT_FINISHED; } /* Check if we took a mbox interrupt while we were @@ -2299,12 +2746,12 @@ && (evtctr != psli->slistat.mbox_event)) break; - spin_unlock_irqrestore(phba->host->host_lock, + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); msleep(1); - spin_lock_irqsave(phba->host->host_lock, drvr_flag); + spin_lock_irqsave(&phba->hbalock, drvr_flag); if (psli->sli_flag & LPFC_SLI2_ACTIVE) { /* First copy command data */ @@ -2355,23 +2802,25 @@ status = mb->mbxStatus; } - spin_unlock_irqrestore(phba->host->host_lock, drvr_flag); - return (status); + spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + return status; } -static int -lpfc_sli_ringtx_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, - struct lpfc_iocbq * piocb) +/* + * Caller needs to hold lock. + */ +static void +__lpfc_sli_ringtx_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *piocb) { /* Insert the caller's iocb in the txq tail for later processing. */ list_add_tail(&piocb->list, &pring->txq); pring->txq_cnt++; - return (0); } static struct lpfc_iocbq * lpfc_sli_next_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct lpfc_iocbq ** piocb) + struct lpfc_iocbq **piocb) { struct lpfc_iocbq * nextiocb; @@ -2384,13 +2833,29 @@ return nextiocb; } +/* + * Lockless version of lpfc_sli_issue_iocb. + */ int -lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, +__lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb, uint32_t flag) { struct lpfc_iocbq *nextiocb; IOCB_t *iocb; + if (piocb->iocb_cmpl && (!piocb->vport) && + (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && + (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) { + lpfc_printf_log(phba, KERN_ERR, + LOG_SLI | LOG_VPORT, + "%d:1807 IOCB x%x failed. No vport\n", + phba->brd_no, + piocb->iocb.ulpCommand); + dump_stack(); + return IOCB_ERROR; + } + + /* If the PCI channel is in offline state, do not post iocbs. */ if (unlikely(pci_channel_offline(phba->pcidev))) return IOCB_ERROR; @@ -2398,7 +2863,7 @@ /* * We should never get an IOCB if we are in a < LINK_DOWN state */ - if (unlikely(phba->hba_state < LPFC_LINK_DOWN)) + if (unlikely(phba->link_state < LPFC_LINK_DOWN)) return IOCB_ERROR; /* @@ -2408,7 +2873,7 @@ if (unlikely(pring->flag & LPFC_STOP_IOCB_MBX)) goto iocb_busy; - if (unlikely(phba->hba_state == LPFC_LINK_DOWN)) { + if (unlikely(phba->link_state == LPFC_LINK_DOWN)) { /* * Only CREATE_XRI, CLOSE_XRI, and QUE_RING_BUF * can be issued if the link is not up. @@ -2436,8 +2901,9 @@ * attention events. */ } else if (unlikely(pring->ringno == phba->sli.fcp_ring && - !(phba->sli.sli_flag & LPFC_PROCESS_LA))) + !(phba->sli.sli_flag & LPFC_PROCESS_LA))) { goto iocb_busy; + } while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) && (nextiocb = lpfc_sli_next_iocb(phba, pring, &piocb))) @@ -2459,13 +2925,28 @@ out_busy: if (!(flag & SLI_IOCB_RET_IOCB)) { - lpfc_sli_ringtx_put(phba, pring, piocb); + __lpfc_sli_ringtx_put(phba, pring, piocb); return IOCB_SUCCESS; } return IOCB_BUSY; } + +int +lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *piocb, uint32_t flag) +{ + unsigned long iflags; + int rc; + + spin_lock_irqsave(&phba->hbalock, iflags); + rc = __lpfc_sli_issue_iocb(phba, pring, piocb, flag); + spin_unlock_irqrestore(&phba->hbalock, iflags); + + return rc; +} + static int lpfc_extra_ring_setup( struct lpfc_hba *phba) { @@ -2504,7 +2985,7 @@ int lpfc_sli_setup(struct lpfc_hba *phba) { - int i, totiocb = 0; + int i, totiocbsize = 0; struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; @@ -2529,6 +3010,12 @@ pring->numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES; pring->numCiocb += SLI2_IOCB_CMD_R3XTRA_ENTRIES; pring->numRiocb += SLI2_IOCB_RSP_R3XTRA_ENTRIES; + pring->sizeCiocb = (phba->sli_rev == 3) ? + SLI3_IOCB_CMD_SIZE : + SLI2_IOCB_CMD_SIZE; + pring->sizeRiocb = (phba->sli_rev == 3) ? + SLI3_IOCB_RSP_SIZE : + SLI2_IOCB_RSP_SIZE; pring->iotag_ctr = 0; pring->iotag_max = (phba->cfg_hba_queue_depth * 2); @@ -2539,12 +3026,25 @@ /* numCiocb and numRiocb are used in config_port */ pring->numCiocb = SLI2_IOCB_CMD_R1_ENTRIES; pring->numRiocb = SLI2_IOCB_RSP_R1_ENTRIES; + pring->sizeCiocb = (phba->sli_rev == 3) ? + SLI3_IOCB_CMD_SIZE : + SLI2_IOCB_CMD_SIZE; + pring->sizeRiocb = (phba->sli_rev == 3) ? + SLI3_IOCB_RSP_SIZE : + SLI2_IOCB_RSP_SIZE; + pring->iotag_max = phba->cfg_hba_queue_depth; pring->num_mask = 0; break; case LPFC_ELS_RING: /* ring 2 - ELS / CT */ /* numCiocb and numRiocb are used in config_port */ pring->numCiocb = SLI2_IOCB_CMD_R2_ENTRIES; pring->numRiocb = SLI2_IOCB_RSP_R2_ENTRIES; + pring->sizeCiocb = (phba->sli_rev == 3) ? + SLI3_IOCB_CMD_SIZE : + SLI2_IOCB_CMD_SIZE; + pring->sizeRiocb = (phba->sli_rev == 3) ? + SLI3_IOCB_RSP_SIZE : + SLI2_IOCB_RSP_SIZE; pring->fast_iotag = 0; pring->iotag_ctr = 0; pring->iotag_max = 4096; @@ -2575,14 +3075,16 @@ lpfc_ct_unsol_event; break; } - totiocb += (pring->numCiocb + pring->numRiocb); + totiocbsize += (pring->numCiocb * pring->sizeCiocb) + + (pring->numRiocb * pring->sizeRiocb); } - if (totiocb > MAX_SLI2_IOCB) { + if (totiocbsize > MAX_SLIM_IOCB_SIZE) { /* Too many cmd / rsp ring entries in SLI2 SLIM */ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0462 Too many cmd / rsp ring entries in " - "SLI2 SLIM Data: x%x x%x\n", - phba->brd_no, totiocb, MAX_SLI2_IOCB); + "SLI2 SLIM Data: x%x x%lx\n", + phba->brd_no, totiocbsize, + (unsigned long) MAX_SLIM_IOCB_SIZE); } if (phba->cfg_multi_ring_support == 2) lpfc_extra_ring_setup(phba); @@ -2591,15 +3093,16 @@ } int -lpfc_sli_queue_setup(struct lpfc_hba * phba) +lpfc_sli_queue_setup(struct lpfc_hba *phba) { struct lpfc_sli *psli; struct lpfc_sli_ring *pring; int i; psli = &phba->sli; - spin_lock_irq(phba->host->host_lock); + spin_lock_irq(&phba->hbalock); INIT_LIST_HEAD(&psli->mboxq); + INIT_LIST_HEAD(&psli->mboxq_cmpl); /* Initialize list headers for txq and txcmplq as double linked lists */ for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; @@ -2612,15 +3115,73 @@ INIT_LIST_HEAD(&pring->iocb_continueq); INIT_LIST_HEAD(&pring->postbufq); } - spin_unlock_irq(phba->host->host_lock); - return (1); + spin_unlock_irq(&phba->hbalock); + return 1; } int -lpfc_sli_hba_down(struct lpfc_hba * phba) +lpfc_sli_host_down(struct lpfc_vport *vport) { LIST_HEAD(completions); - struct lpfc_sli *psli; + struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + struct lpfc_iocbq *iocb, *next_iocb; + int i; + unsigned long flags = 0; + uint16_t prev_pring_flag; + + lpfc_cleanup_discovery_resources(vport); + + spin_lock_irqsave(&phba->hbalock, flags); + for (i = 0; i < psli->num_rings; i++) { + pring = &psli->ring[i]; + prev_pring_flag = pring->flag; + if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */ + pring->flag |= LPFC_DEFERRED_RING_EVENT; + /* + * Error everything on the txq since these iocbs have not been + * given to the FW yet. + */ + list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { + if (iocb->vport != vport) + continue; + list_move_tail(&iocb->list, &completions); + pring->txq_cnt--; + } + + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, + list) { + if (iocb->vport != vport) + continue; + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + } + + pring->flag = prev_pring_flag; + } + + spin_unlock_irqrestore(&phba->hbalock, flags); + + while (!list_empty(&completions)) { + list_remove_head(&completions, iocb, struct lpfc_iocbq, list); + + if (!iocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, iocb); + else { + iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT; + iocb->iocb.un.ulpWord[4] = IOERR_SLI_DOWN; + (iocb->iocb_cmpl) (phba, iocb, iocb); + } + } + return 1; +} + +int +lpfc_sli_hba_down(struct lpfc_hba *phba) +{ + LIST_HEAD(completions); + struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; LPFC_MBOXQ_t *pmb; struct lpfc_iocbq *iocb; @@ -2628,12 +3189,14 @@ int i; unsigned long flags = 0; - psli = &phba->sli; lpfc_hba_down_prep(phba); - spin_lock_irqsave(phba->host->host_lock, flags); + lpfc_fabric_abort_hba(phba); + + spin_lock_irqsave(&phba->hbalock, flags); for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; + if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */ pring->flag |= LPFC_DEFERRED_RING_EVENT; /* @@ -2644,51 +3207,50 @@ pring->txq_cnt = 0; } - spin_unlock_irqrestore(phba->host->host_lock, flags); + spin_unlock_irqrestore(&phba->hbalock, flags); while (!list_empty(&completions)) { - iocb = list_get_first(&completions, struct lpfc_iocbq, list); + list_remove_head(&completions, iocb, struct lpfc_iocbq, list); cmd = &iocb->iocb; - list_del(&iocb->list); - if (iocb->iocb_cmpl) { + if (!iocb->iocb_cmpl) + lpfc_sli_release_iocbq(phba, iocb); + else { cmd->ulpStatus = IOSTAT_LOCAL_REJECT; cmd->un.ulpWord[4] = IOERR_SLI_DOWN; (iocb->iocb_cmpl) (phba, iocb, iocb); - } else - lpfc_sli_release_iocbq(phba, iocb); + } } /* Return any active mbox cmds */ del_timer_sync(&psli->mbox_tmo); - spin_lock_irqsave(phba->host->host_lock, flags); - phba->work_hba_events &= ~WORKER_MBOX_TMO; + spin_lock_irqsave(&phba->hbalock, flags); + + spin_lock(&phba->pport->work_port_lock); + phba->pport->work_port_events &= ~WORKER_MBOX_TMO; + spin_unlock(&phba->pport->work_port_lock); + if (psli->mbox_active) { - pmb = psli->mbox_active; - pmb->mb.mbxStatus = MBX_NOT_FINISHED; - if (pmb->mbox_cmpl) { - spin_unlock_irqrestore(phba->host->host_lock, flags); - pmb->mbox_cmpl(phba,pmb); - spin_lock_irqsave(phba->host->host_lock, flags); - } - } - psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; + list_add_tail(&psli->mbox_active->list, &completions); psli->mbox_active = NULL; + psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; + } - /* Return any pending mbox cmds */ - while ((pmb = lpfc_mbox_get(phba)) != NULL) { + /* Return any pending or completed mbox cmds */ + list_splice_init(&phba->sli.mboxq, &completions); + list_splice_init(&phba->sli.mboxq_cmpl, &completions); + INIT_LIST_HEAD(&psli->mboxq); + INIT_LIST_HEAD(&psli->mboxq_cmpl); + + spin_unlock_irqrestore(&phba->hbalock, flags); + + while (!list_empty(&completions)) { + list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list); pmb->mb.mbxStatus = MBX_NOT_FINISHED; if (pmb->mbox_cmpl) { - spin_unlock_irqrestore(phba->host->host_lock, flags); pmb->mbox_cmpl(phba,pmb); - spin_lock_irqsave(phba->host->host_lock, flags); } } - - INIT_LIST_HEAD(&psli->mboxq); - - spin_unlock_irqrestore(phba->host->host_lock, flags); - return 1; } @@ -2710,14 +3272,15 @@ } int -lpfc_sli_ringpostbuf_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, - struct lpfc_dmabuf * mp) +lpfc_sli_ringpostbuf_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_dmabuf *mp) { /* Stick struct lpfc_dmabuf at end of postbufq so driver can look it up later */ + spin_lock_irq(&phba->hbalock); list_add_tail(&mp->list, &pring->postbufq); - pring->postbufq_cnt++; + spin_unlock_irq(&phba->hbalock); return 0; } @@ -2730,14 +3293,17 @@ struct list_head *slp = &pring->postbufq; /* Search postbufq, from the begining, looking for a match on phys */ + spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) { if (mp->phys == phys) { list_del_init(&mp->list); pring->postbufq_cnt--; + spin_unlock_irq(&phba->hbalock); return mp; } } + spin_unlock_irq(&phba->hbalock); lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "%d:0410 Cannot find virtual addr for mapped buf on " "ring %d Data x%llx x%p x%p x%x\n", @@ -2747,92 +3313,110 @@ } static void -lpfc_sli_abort_els_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { - IOCB_t *irsp; + IOCB_t *irsp = &rspiocb->iocb; uint16_t abort_iotag, abort_context; - struct lpfc_iocbq *abort_iocb, *rsp_ab_iocb; + struct lpfc_iocbq *abort_iocb; struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; abort_iocb = NULL; - irsp = &rspiocb->iocb; - - spin_lock_irq(phba->host->host_lock); if (irsp->ulpStatus) { abort_context = cmdiocb->iocb.un.acxri.abortContextTag; abort_iotag = cmdiocb->iocb.un.acxri.abortIoTag; + spin_lock_irq(&phba->hbalock); if (abort_iotag != 0 && abort_iotag <= phba->sli.last_iotag) abort_iocb = phba->sli.iocbq_lookup[abort_iotag]; - lpfc_printf_log(phba, KERN_ERR, LOG_SLI, - "%d:0327 Cannot abort els iocb %p" - " with tag %x context %x\n", - phba->brd_no, abort_iocb, - abort_iotag, abort_context); + lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_SLI, + "%d:0327 Cannot abort els iocb %p " + "with tag %x context %x, abort status %x, " + "abort code %x\n", + phba->brd_no, abort_iocb, abort_iotag, + abort_context, irsp->ulpStatus, + irsp->un.ulpWord[4]); /* * make sure we have the right iocbq before taking it * off the txcmplq and try to call completion routine. */ - if (abort_iocb && - abort_iocb->iocb.ulpContext == abort_context && - abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) { - list_del(&abort_iocb->list); + if (!abort_iocb || + abort_iocb->iocb.ulpContext != abort_context || + (abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) == 0) + spin_unlock_irq(&phba->hbalock); + else { + list_del_init(&abort_iocb->list); pring->txcmplq_cnt--; + spin_unlock_irq(&phba->hbalock); - rsp_ab_iocb = lpfc_sli_get_iocbq(phba); - if (rsp_ab_iocb == NULL) - lpfc_sli_release_iocbq(phba, abort_iocb); - else { - abort_iocb->iocb_flag &= - ~LPFC_DRIVER_ABORTED; - rsp_ab_iocb->iocb.ulpStatus = - IOSTAT_LOCAL_REJECT; - rsp_ab_iocb->iocb.un.ulpWord[4] = - IOERR_SLI_ABORTED; - spin_unlock_irq(phba->host->host_lock); - (abort_iocb->iocb_cmpl) - (phba, abort_iocb, rsp_ab_iocb); - spin_lock_irq(phba->host->host_lock); - lpfc_sli_release_iocbq(phba, rsp_ab_iocb); - } + abort_iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED; + abort_iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT; + abort_iocb->iocb.un.ulpWord[4] = IOERR_SLI_ABORTED; + (abort_iocb->iocb_cmpl)(phba, abort_iocb, abort_iocb); } } lpfc_sli_release_iocbq(phba, cmdiocb); - spin_unlock_irq(phba->host->host_lock); + return; +} + +static void +lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + IOCB_t *irsp = &rspiocb->iocb; + + /* ELS cmd tag completes */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + "%d (X):0133 Ignoring ELS cmd tag x%x completion Data: " + "x%x x%x x%x\n", + phba->brd_no, irsp->ulpIoTag, irsp->ulpStatus, + irsp->un.ulpWord[4], irsp->ulpTimeout); + if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) + lpfc_ct_free_iocb(phba, cmdiocb); + else + lpfc_els_free_iocb(phba, cmdiocb); return; } int -lpfc_sli_issue_abort_iotag(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, - struct lpfc_iocbq * cmdiocb) +lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *cmdiocb) { + struct lpfc_vport *vport = cmdiocb->vport; struct lpfc_iocbq *abtsiocbp; IOCB_t *icmd = NULL; IOCB_t *iabt = NULL; int retval = IOCB_ERROR; - /* There are certain command types we don't want - * to abort. + /* + * There are certain command types we don't want to abort. And we + * don't want to abort commands that are already in the process of + * being aborted. */ icmd = &cmdiocb->iocb; - if ((icmd->ulpCommand == CMD_ABORT_XRI_CN) || - (icmd->ulpCommand == CMD_CLOSE_XRI_CN)) + if (icmd->ulpCommand == CMD_ABORT_XRI_CN || + icmd->ulpCommand == CMD_CLOSE_XRI_CN || + (cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) != 0) return 0; - /* If we're unloading, interrupts are disabled so we - * need to cleanup the iocb here. + /* If we're unloading, don't abort iocb on the ELS ring, but change the + * callback so that nothing happens when it finishes. */ - if (phba->fc_flag & FC_UNLOADING) + if ((vport->load_flag & FC_UNLOADING) && + (pring->ringno == LPFC_ELS_RING)) { + if (cmdiocb->iocb_flag & LPFC_IO_FABRIC) + cmdiocb->fabric_iocb_cmpl = lpfc_ignore_els_cmpl; + else + cmdiocb->iocb_cmpl = lpfc_ignore_els_cmpl; goto abort_iotag_exit; + } /* issue ABTS for this IOCB based on iotag */ - abtsiocbp = lpfc_sli_get_iocbq(phba); + abtsiocbp = __lpfc_sli_get_iocbq(phba); if (abtsiocbp == NULL) return 0; @@ -2848,7 +3432,7 @@ iabt->ulpLe = 1; iabt->ulpClass = icmd->ulpClass; - if (phba->hba_state >= LPFC_LINK_UP) + if (phba->link_state >= LPFC_LINK_UP) iabt->ulpCommand = CMD_ABORT_XRI_CN; else iabt->ulpCommand = CMD_CLOSE_XRI_CN; @@ -2856,32 +3440,20 @@ abtsiocbp->iocb_cmpl = lpfc_sli_abort_els_cmpl; lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "%d:0339 Abort xri x%x, original iotag x%x, abort " - "cmd iotag x%x\n", - phba->brd_no, iabt->un.acxri.abortContextTag, + "%d (%d):0339 Abort xri x%x, original iotag x%x, " + "abort cmd iotag x%x\n", + phba->brd_no, vport->vpi, + iabt->un.acxri.abortContextTag, iabt->un.acxri.abortIoTag, abtsiocbp->iotag); - retval = lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0); + retval = __lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0); abort_iotag_exit: - - /* If we could not issue an abort dequeue the iocb and handle - * the completion here. + /* + * Caller to this routine should check for IOCB_ERROR + * and handle it properly. This routine no longer removes + * iocb off txcmplq and call compl in case of IOCB_ERROR. */ - if (retval == IOCB_ERROR) { - list_del(&cmdiocb->list); - pring->txcmplq_cnt--; - - if (cmdiocb->iocb_cmpl) { - icmd->ulpStatus = IOSTAT_LOCAL_REJECT; - icmd->un.ulpWord[4] = IOERR_SLI_ABORTED; - spin_unlock_irq(phba->host->host_lock); - (cmdiocb->iocb_cmpl) (phba, cmdiocb, cmdiocb); - spin_lock_irq(phba->host->host_lock); - } else - lpfc_sli_release_iocbq(phba, cmdiocb); - } - - return 1; + return retval; } static int @@ -2947,14 +3519,10 @@ } void -lpfc_sli_abort_fcp_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, - struct lpfc_iocbq * rspiocb) +lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) { - unsigned long iflags; - - spin_lock_irqsave(phba->host->host_lock, iflags); lpfc_sli_release_iocbq(phba, cmdiocb); - spin_unlock_irqrestore(phba->host->host_lock, iflags); return; } @@ -2972,8 +3540,8 @@ for (i = 1; i <= phba->sli.last_iotag; i++) { iocbq = phba->sli.iocbq_lookup[i]; - if (lpfc_sli_validate_fcp_iocb (iocbq, tgt_id, lun_id, - 0, abort_cmd) != 0) + if (lpfc_sli_validate_fcp_iocb(iocbq, tgt_id, lun_id, 0, + abort_cmd) != 0) continue; /* issue ABTS for this IOCB based on iotag */ @@ -2989,8 +3557,9 @@ abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag; abtsiocb->iocb.ulpLe = 1; abtsiocb->iocb.ulpClass = cmd->ulpClass; + abtsiocb->vport = phba->pport; - if (phba->hba_state >= LPFC_LINK_UP) + if (lpfc_is_link_up(phba)) abtsiocb->iocb.ulpCommand = CMD_ABORT_XRI_CN; else abtsiocb->iocb.ulpCommand = CMD_CLOSE_XRI_CN; @@ -3016,16 +3585,16 @@ wait_queue_head_t *pdone_q; unsigned long iflags; - spin_lock_irqsave(phba->host->host_lock, iflags); + spin_lock_irqsave(&phba->hbalock, iflags); cmdiocbq->iocb_flag |= LPFC_IO_WAKE; if (cmdiocbq->context2 && rspiocbq) memcpy(&((struct lpfc_iocbq *)cmdiocbq->context2)->iocb, &rspiocbq->iocb, sizeof(IOCB_t)); pdone_q = cmdiocbq->context_un.wait_queue; - spin_unlock_irqrestore(phba->host->host_lock, iflags); if (pdone_q) wake_up(pdone_q); + spin_unlock_irqrestore(&phba->hbalock, iflags); return; } @@ -3035,11 +3604,12 @@ * lpfc_sli_issue_call since the wake routine sets a unique value and by * definition this is a wait function. */ + int -lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba, - struct lpfc_sli_ring * pring, - struct lpfc_iocbq * piocb, - struct lpfc_iocbq * prspiocbq, +lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, + struct lpfc_sli_ring *pring, + struct lpfc_iocbq *piocb, + struct lpfc_iocbq *prspiocbq, uint32_t timeout) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q); @@ -3071,11 +3641,9 @@ retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0); if (retval == IOCB_SUCCESS) { timeout_req = timeout * HZ; - spin_unlock_irq(phba->host->host_lock); timeleft = wait_event_timeout(done_q, piocb->iocb_flag & LPFC_IO_WAKE, timeout_req); - spin_lock_irq(phba->host->host_lock); if (piocb->iocb_flag & LPFC_IO_WAKE) { lpfc_printf_log(phba, KERN_INFO, LOG_SLI, @@ -3117,16 +3685,16 @@ } int -lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq, +lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, uint32_t timeout) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q); int retval; + unsigned long flag; /* The caller must leave context1 empty. */ - if (pmboxq->context1 != 0) { - return (MBX_NOT_FINISHED); - } + if (pmboxq->context1 != 0) + return MBX_NOT_FINISHED; /* setup wake call as IOCB callback */ pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait; @@ -3141,6 +3709,7 @@ pmboxq->mbox_flag & LPFC_MBX_WAKE, timeout * HZ); + spin_lock_irqsave(&phba->hbalock, flag); pmboxq->context1 = NULL; /* * if LPFC_MBX_WAKE flag is set the mailbox is completed @@ -3148,8 +3717,11 @@ */ if (pmboxq->mbox_flag & LPFC_MBX_WAKE) retval = MBX_SUCCESS; - else + else { retval = MBX_TIMEOUT; + pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + } + spin_unlock_irqrestore(&phba->hbalock, flag); } return retval; @@ -3158,12 +3730,25 @@ int lpfc_sli_flush_mbox_queue(struct lpfc_hba * phba) { + struct lpfc_vport *vport = phba->pport; int i = 0; + uint32_t ha_copy; - while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !phba->stopped) { + while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !vport->stopped) { if (i++ > LPFC_MBOX_TMO * 1000) return 1; + /* + * Call lpfc_sli_handle_mb_event only if a mailbox cmd + * did finish. This way we won't get the misleading + * "Stray Mailbox Interrupt" message. + */ + spin_lock_irq(&phba->hbalock); + ha_copy = phba->work_ha; + phba->work_ha &= ~HA_MBATT; + spin_unlock_irq(&phba->hbalock); + + if (ha_copy & HA_MBATT) if (lpfc_sli_handle_mb_event(phba) == 0) i = 0; @@ -3183,6 +3768,13 @@ int i; uint32_t control; + MAILBOX_t *mbox, *pmbox; + struct lpfc_vport *vport; + struct lpfc_nodelist *ndlp; + struct lpfc_dmabuf *mp; + LPFC_MBOXQ_t *pmb; + int rc; + /* * Get the driver's phba structure from the dev_id and * assume the HBA is not interrupting. @@ -3204,7 +3796,7 @@ */ /* Ignore all interrupts during initialization. */ - if (unlikely(phba->hba_state < LPFC_LINK_DOWN)) + if (unlikely(phba->link_state < LPFC_LINK_DOWN)) return IRQ_NONE; /* @@ -3212,16 +3804,16 @@ * Clear Attention Sources, except Error Attention (to * preserve status) and Link Attention */ - spin_lock(phba->host->host_lock); + spin_lock(&phba->hbalock); ha_copy = readl(phba->HAregaddr); /* If somebody is waiting to handle an eratt don't process it * here. The brdkill function will do this. */ - if (phba->fc_flag & FC_IGNORE_ERATT) + if (phba->link_flag & LS_IGNORE_ERATT) ha_copy &= ~HA_ERATT; writel((ha_copy & ~(HA_LATT | HA_ERATT)), phba->HAregaddr); readl(phba->HAregaddr); /* flush */ - spin_unlock(phba->host->host_lock); + spin_unlock(&phba->hbalock); if (unlikely(!ha_copy)) return IRQ_NONE; @@ -3235,36 +3827,41 @@ * Turn off Link Attention interrupts * until CLEAR_LA done */ - spin_lock(phba->host->host_lock); + spin_lock(&phba->hbalock); phba->sli.sli_flag &= ~LPFC_PROCESS_LA; control = readl(phba->HCregaddr); control &= ~HC_LAINT_ENA; writel(control, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - spin_unlock(phba->host->host_lock); + spin_unlock(&phba->hbalock); } else work_ha_copy &= ~HA_LATT; } if (work_ha_copy & ~(HA_ERATT|HA_MBATT|HA_LATT)) { - for (i = 0; i < phba->sli.num_rings; i++) { - if (work_ha_copy & (HA_RXATT << (4*i))) { /* - * Turn off Slow Rings interrupts + * Turn off Slow Rings interrupts, LPFC_ELS_RING is + * the only slow ring. */ - spin_lock(phba->host->host_lock); + status = (work_ha_copy & + (HA_RXMASK << (4*LPFC_ELS_RING))); + status >>= (4*LPFC_ELS_RING); + if (status & HA_RXMASK) { + spin_lock(&phba->hbalock); control = readl(phba->HCregaddr); - control &= ~(HC_R0INT_ENA << i); + if (control & (HC_R0INT_ENA << LPFC_ELS_RING)) { + control &= + ~(HC_R0INT_ENA << LPFC_ELS_RING); writel(control, phba->HCregaddr); readl(phba->HCregaddr); /* flush */ - spin_unlock(phba->host->host_lock); } + spin_unlock(&phba->hbalock); } } if (work_ha_copy & HA_ERATT) { - phba->hba_state = LPFC_HBA_ERROR; + phba->link_state = LPFC_HBA_ERROR; /* * There was a link/board error. Read the * status register to retrieve the error event @@ -3279,14 +3876,108 @@ /* Clear Chip error bit */ writel(HA_ERATT, phba->HAregaddr); readl(phba->HAregaddr); /* flush */ - phba->stopped = 1; + phba->pport->stopped = 1; + } + + if ((work_ha_copy & HA_MBATT) && + (phba->sli.mbox_active)) { + pmb = phba->sli.mbox_active; + pmbox = &pmb->mb; + mbox = &phba->slim2p->mbx; + vport = pmb->vport; + + /* First check out the status word */ + lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof(uint32_t)); + if (pmbox->mbxOwner != OWN_HOST) { + /* + * Stray Mailbox Interrupt, mbxCommand + * mbxStatus + */ + lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | + LOG_SLI, + "%d (%d):0304 Stray Mailbox " + "Interrupt mbxCommand x%x " + "mbxStatus x%x\n", + phba->brd_no, + (vport + ? vport->vpi : 0), + pmbox->mbxCommand, + pmbox->mbxStatus); + } + phba->last_completion_time = jiffies; + del_timer_sync(&phba->sli.mbox_tmo); + + phba->sli.mbox_active = NULL; + if (pmb->mbox_cmpl) { + lpfc_sli_pcimem_bcopy(mbox, pmbox, + MAILBOX_CMD_SIZE); + } + if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) { + pmb->mbox_flag &= ~LPFC_MBX_IMED_UNREG; + + lpfc_debugfs_disc_trc(vport, + LPFC_DISC_TRC_MBOX_VPORT, + "MBOX dflt rpi: : status:x%x rpi:x%x", + (uint32_t)pmbox->mbxStatus, + pmbox->un.varWords[0], 0); + + if ( !pmbox->mbxStatus) { + mp = (struct lpfc_dmabuf *) + (pmb->context1); + ndlp = (struct lpfc_nodelist *) + pmb->context2; + + /* Reg_LOGIN of dflt RPI was successful. + * new lets get rid of the RPI using the + * same mbox buffer. + */ + lpfc_unreg_login(phba, vport->vpi, + pmbox->un.varWords[0], pmb); + pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; + pmb->context1 = mp; + pmb->context2 = ndlp; + pmb->vport = vport; + spin_lock(&phba->hbalock); + phba->sli.sli_flag &= + ~LPFC_SLI_MBOX_ACTIVE; + spin_unlock(&phba->hbalock); + goto send_current_mbox; + } + } + spin_lock(&phba->pport->work_port_lock); + phba->pport->work_port_events &= ~WORKER_MBOX_TMO; + spin_unlock(&phba->pport->work_port_lock); + lpfc_mbox_cmpl_put(phba, pmb); + } + if ((work_ha_copy & HA_MBATT) && + (phba->sli.mbox_active == NULL)) { +send_next_mbox: + spin_lock(&phba->hbalock); + phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; + pmb = lpfc_mbox_get(phba); + spin_unlock(&phba->hbalock); +send_current_mbox: + /* Process next mailbox command if there is one */ + if (pmb != NULL) { + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) { + pmb->mb.mbxStatus = MBX_NOT_FINISHED; + lpfc_mbox_cmpl_put(phba, pmb); + goto send_next_mbox; + } + } else { + /* Turn on IOCB processing */ + for (i = 0; i < phba->sli.num_rings; i++) + lpfc_sli_turn_on_ring(phba, i); + } + } - spin_lock(phba->host->host_lock); + spin_lock(&phba->hbalock); phba->work_ha |= work_ha_copy; if (phba->work_wait) - wake_up(phba->work_wait); - spin_unlock(phba->host->host_lock); + lpfc_worker_wake_up(phba); + spin_unlock(&phba->hbalock); } ha_copy &= ~(phba->work_ha_mask); @@ -3298,7 +3989,7 @@ */ status = (ha_copy & (HA_RXMASK << (4*LPFC_FCP_RING))); status >>= (4*LPFC_FCP_RING); - if (status & HA_RXATT) + if (status & HA_RXMASK) lpfc_sli_handle_fast_ring_event(phba, &phba->sli.ring[LPFC_FCP_RING], status); @@ -3311,7 +4002,7 @@ */ status = (ha_copy & (HA_RXMASK << (4*LPFC_EXTRA_RING))); status >>= (4*LPFC_EXTRA_RING); - if (status & HA_RXATT) { + if (status & HA_RXMASK) { lpfc_sli_handle_fast_ring_event(phba, &phba->sli.ring[LPFC_EXTRA_RING], status); diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.h 2007-12-21 15:36:12.000000000 -0500 @@ -20,6 +20,7 @@ /* forward declaration for LPFC_IOCB_t's use */ struct lpfc_hba; +struct lpfc_vport; /* Define the context types that SLI handles for abort and sums. */ typedef enum _lpfc_ctx_cmd { @@ -43,10 +44,12 @@ #define LPFC_IO_WAKE 2 /* High Priority Queue signal flag */ #define LPFC_IO_FCP 4 /* FCP command -- iocbq in scsi_buf */ #define LPFC_DRIVER_ABORTED 8 /* driver aborted this request */ +#define LPFC_IO_FABRIC 0x10 /* Iocb send using fabric scheduler */ uint8_t abort_count; uint8_t rsvd2; uint32_t drvrTimeout; /* driver timeout in seconds */ + struct lpfc_vport *vport;/* virtual port pointer */ void *context1; /* caller context information */ void *context2; /* caller context information */ void *context3; /* caller context information */ @@ -56,6 +59,8 @@ struct lpfcMboxq *mbox; } context_un; + void (*fabric_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *); void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *); @@ -69,11 +74,13 @@ #define IOCB_TIMEDOUT 3 #define LPFC_MBX_WAKE 1 +#define LPFC_MBX_IMED_UNREG 2 typedef struct lpfcMboxq { /* MBOXQs are used in single linked lists */ struct list_head list; /* ptr to next mailbox command */ MAILBOX_t mb; /* Mailbox cmd */ + struct lpfc_vport *vport;/* virutal port pointer */ void *context1; /* caller context information */ void *context2; /* caller context information */ @@ -135,6 +142,8 @@ uint8_t ringno; /* ring number */ uint16_t numCiocb; /* number of command iocb's per ring */ uint16_t numRiocb; /* number of rsp iocb's per ring */ + uint16_t sizeCiocb; /* Size of command iocb's in this ring */ + uint16_t sizeRiocb; /* Size of response iocb's in this ring */ uint32_t fast_iotag; /* max fastlookup based iotag */ uint32_t iotag_ctr; /* keeps track of the next iotag to use */ @@ -165,6 +174,34 @@ struct lpfc_sli_ring *); }; +/* Structure used for configuring rings to a specific profile or rctl / type */ +struct lpfc_hbq_init { + uint32_t rn; /* Receive buffer notification */ + uint32_t entry_count; /* max # of entries in HBQ */ + uint32_t headerLen; /* 0 if not profile 4 or 5 */ + uint32_t logEntry; /* Set to 1 if this HBQ used for LogEntry */ + uint32_t profile; /* Selection profile 0=all, 7=logentry */ + uint32_t ring_mask; /* Binds HBQ to a ring e.g. Ring0=b0001, + * ring2=b0100 */ + uint32_t hbq_index; /* index of this hbq in ring .HBQs[] */ + + uint32_t seqlenoff; + uint32_t maxlen; + uint32_t seqlenbcnt; + uint32_t cmdcodeoff; + uint32_t cmdmatch[8]; + uint32_t mask_count; /* number of mask entries in prt array */ + struct hbq_mask hbqMasks[6]; + + /* Non-config rings fields to keep track of buffer allocations */ + uint32_t buffer_count; /* number of buffers allocated */ + uint32_t init_count; /* number to allocate when initialized */ + uint32_t add_count; /* number to allocate when starved */ +} ; + +#define LPFC_MAX_HBQ 16 + + /* Structure used to hold SLI statistical counters and info */ struct lpfc_sli_stat { uint64_t mbox_stat_err; /* Mbox cmds completed status error */ @@ -197,6 +234,7 @@ #define LPFC_SLI_MBOX_ACTIVE 0x100 /* HBA mailbox is currently active */ #define LPFC_SLI2_ACTIVE 0x200 /* SLI2 overlay in firmware is active */ #define LPFC_PROCESS_LA 0x400 /* Able to process link attention */ +#define LPFC_BLOCK_MGMT_IO 0x800 /* Don't allow mgmt mbx or iocb cmds */ struct lpfc_sli_ring ring[LPFC_MAX_RING]; int fcp_ring; /* ring used for FCP initiator commands */ @@ -209,6 +247,7 @@ uint16_t mboxq_cnt; /* current length of queue */ uint16_t mboxq_max; /* max length */ LPFC_MBOXQ_t *mbox_active; /* active mboxq information */ + struct list_head mboxq_cmpl; struct timer_list mbox_tmo; /* Hold clk to timeout active mbox cmd */ @@ -221,12 +260,6 @@ struct lpfc_lnk_stat lnk_stat_offsets; }; -/* Given a pointer to the start of the ring, and the slot number of - * the desired iocb entry, calc a pointer to that entry. - * (assume iocb entry size is 32 bytes, or 8 words) - */ -#define IOCB_ENTRY(ring,slot) ((IOCB_t *)(((char *)(ring)) + ((slot) * 32))) - #define LPFC_MBOX_TMO 30 /* Sec tmo for outstanding mbox command */ #define LPFC_MBOX_TMO_FLASH_CMD 300 /* Sec tmo for outstanding FLASH write diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_version.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_version.h 2007-12-21 15:36:12.000000000 -0500 @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.1.12" +#define LPFC_DRIVER_VERSION "8.2.1" #define LPFC_DRIVER_NAME "lpfc" diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.c --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,523 @@ +/******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * + * Copyright (C) 2004-2006 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * Portions Copyright (C) 2004-2005 Christoph Hellwig * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + *******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "lpfc_hw.h" +#include "lpfc_sli.h" +#include "lpfc_disc.h" +#include "lpfc_scsi.h" +#include "lpfc.h" +#include "lpfc_logmsg.h" +#include "lpfc_crtn.h" +#include "lpfc_version.h" +#include "lpfc_vport.h" + +inline void lpfc_vport_set_state(struct lpfc_vport *vport, + enum fc_vport_state new_state) +{ + struct fc_vport *fc_vport = vport->fc_vport; + + if (fc_vport) { + /* + * When the transport defines fc_vport_set state we will replace + * this code with the following line + */ + /* fc_vport_set_state(fc_vport, new_state); */ + if (new_state != FC_VPORT_INITIALIZING) + fc_vport->vport_last_state = fc_vport->vport_state; + fc_vport->vport_state = new_state; + } + + /* for all the error states we will set the invternal state to FAILED */ + switch (new_state) { + case FC_VPORT_NO_FABRIC_SUPP: + case FC_VPORT_NO_FABRIC_RSCS: + case FC_VPORT_FABRIC_LOGOUT: + case FC_VPORT_FABRIC_REJ_WWN: + case FC_VPORT_FAILED: + vport->port_state = LPFC_VPORT_FAILED; + break; + case FC_VPORT_LINKDOWN: + vport->port_state = LPFC_VPORT_UNKNOWN; + break; + default: + /* do nothing */ + break; + } +} + +static int +lpfc_alloc_vpi(struct lpfc_hba *phba) +{ + int vpi; + + spin_lock_irq(&phba->hbalock); + /* Start at bit 1 because vpi zero is reserved for the physical port */ + vpi = find_next_zero_bit(phba->vpi_bmask, (phba->max_vpi + 1), 1); + if (vpi > phba->max_vpi) + vpi = 0; + else + set_bit(vpi, phba->vpi_bmask); + spin_unlock_irq(&phba->hbalock); + return vpi; +} + +static void +lpfc_free_vpi(struct lpfc_hba *phba, int vpi) +{ + spin_lock_irq(&phba->hbalock); + clear_bit(vpi, phba->vpi_bmask); + spin_unlock_irq(&phba->hbalock); +} + +static int +lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport) +{ + LPFC_MBOXQ_t *pmb; + MAILBOX_t *mb; + struct lpfc_dmabuf *mp; + int rc; + + pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!pmb) { + return -ENOMEM; + } + mb = &pmb->mb; + + lpfc_read_sparam(phba, pmb, vport->vpi); + /* + * Grab buffer pointer and clear context1 so we can use + * lpfc_sli_issue_box_wait + */ + mp = (struct lpfc_dmabuf *) pmb->context1; + pmb->context1 = NULL; + + pmb->vport = vport; + rc = lpfc_sli_issue_mbox_wait(phba, pmb, phba->fc_ratov * 2); + if (rc != MBX_SUCCESS) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, + "%d (%d):1818 VPort failed init, mbxCmd x%x " + "READ_SPARM mbxStatus x%x, rc = x%x\n", + phba->brd_no, vport->vpi, + mb->mbxCommand, mb->mbxStatus, rc); + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + if (rc != MBX_TIMEOUT) + mempool_free(pmb, phba->mbox_mem_pool); + return -EIO; + } + + memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); + memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, + sizeof (struct lpfc_name)); + memcpy(&vport->fc_portname, &vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + mempool_free(pmb, phba->mbox_mem_pool); + + return 0; +} + +static int +lpfc_valid_wwn_format(struct lpfc_hba *phba, struct lpfc_name *wwn, + const char *name_type) +{ + /* ensure that IEEE format 1 addresses + * contain zeros in bits 59-48 + */ + if (!((wwn->u.wwn[0] >> 4) == 1 && + ((wwn->u.wwn[0] & 0xf) != 0 || (wwn->u.wwn[1] & 0xf) != 0))) + return 1; + + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1822 Invalid %s: %02x:%02x:%02x:%02x:" + "%02x:%02x:%02x:%02x\n", + phba->brd_no, name_type, + wwn->u.wwn[0], wwn->u.wwn[1], + wwn->u.wwn[2], wwn->u.wwn[3], + wwn->u.wwn[4], wwn->u.wwn[5], + wwn->u.wwn[6], wwn->u.wwn[7]); + return 0; +} + +static int +lpfc_unique_wwpn(struct lpfc_hba *phba, struct lpfc_vport *new_vport) +{ + struct lpfc_vport *vport; + + list_for_each_entry(vport, &phba->port_list, listentry) { + if (vport == new_vport) + continue; + /* If they match, return not unique */ + if (memcmp(&vport->fc_sparam.portName, + &new_vport->fc_sparam.portName, + sizeof(struct lpfc_name)) == 0) + return 0; + } + return 1; +} + +int +lpfc_vport_create(struct fc_vport *fc_vport, bool disable) +{ + struct lpfc_nodelist *ndlp; + struct lpfc_vport *pport = + (struct lpfc_vport *) fc_vport->shost->hostdata; + struct lpfc_hba *phba = pport->phba; + struct lpfc_vport *vport = NULL; + int instance; + int vpi; + int rc = VPORT_ERROR; + + if ((phba->sli_rev < 3) || + !(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1808 Create VPORT failed: " + "NPIV is not enabled: SLImode:%d\n", + phba->brd_no, phba->sli_rev); + rc = VPORT_INVAL; + goto error_out; + } + + vpi = lpfc_alloc_vpi(phba); + if (vpi == 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1809 Create VPORT failed: " + "Max VPORTs (%d) exceeded\n", + phba->brd_no, phba->max_vpi); + rc = VPORT_NORESOURCES; + goto error_out; + } + + + /* Assign an unused board number */ + if ((instance = lpfc_get_instance()) < 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1810 Create VPORT failed: Cannot get " + "instance number\n", phba->brd_no); + lpfc_free_vpi(phba, vpi); + rc = VPORT_NORESOURCES; + goto error_out; + } + + vport = lpfc_create_port(phba, instance, fc_vport); + if (!vport) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1811 Create VPORT failed: vpi x%x\n", + phba->brd_no, vpi); + lpfc_free_vpi(phba, vpi); + rc = VPORT_NORESOURCES; + goto error_out; + } + + vport->vpi = vpi; + lpfc_debugfs_initialize(vport); + + if (lpfc_vport_sparm(phba, vport)) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1813 Create VPORT failed: vpi:%d " + "Cannot get sparam\n", + phba->brd_no, vpi); + lpfc_free_vpi(phba, vpi); + destroy_port(vport); + rc = VPORT_NORESOURCES; + goto error_out; + } + + memcpy(vport->fc_portname.u.wwn, vport->fc_sparam.portName.u.wwn, 8); + memcpy(vport->fc_nodename.u.wwn, vport->fc_sparam.nodeName.u.wwn, 8); + + if (fc_vport->node_name != 0) + u64_to_wwn(fc_vport->node_name, vport->fc_nodename.u.wwn); + if (fc_vport->port_name != 0) + u64_to_wwn(fc_vport->port_name, vport->fc_portname.u.wwn); + + memcpy(&vport->fc_sparam.portName, vport->fc_portname.u.wwn, 8); + memcpy(&vport->fc_sparam.nodeName, vport->fc_nodename.u.wwn, 8); + + if (!lpfc_valid_wwn_format(phba, &vport->fc_sparam.nodeName, "WWNN") || + !lpfc_valid_wwn_format(phba, &vport->fc_sparam.portName, "WWPN")) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1821 Create VPORT failed: vpi:%d " + "Invalid WWN format\n", + phba->brd_no, vpi); + lpfc_free_vpi(phba, vpi); + destroy_port(vport); + rc = VPORT_INVAL; + goto error_out; + } + + if (!lpfc_unique_wwpn(phba, vport)) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1823 Create VPORT failed: vpi:%d " + "Duplicate WWN on HBA\n", + phba->brd_no, vpi); + lpfc_free_vpi(phba, vpi); + destroy_port(vport); + rc = VPORT_INVAL; + goto error_out; + } + + *(struct lpfc_vport **)fc_vport->dd_data = vport; + vport->fc_vport = fc_vport; + + if ((phba->link_state < LPFC_LINK_UP) || + (phba->fc_topology == TOPOLOGY_LOOP)) { + lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN); + rc = VPORT_OK; + goto out; + } + + if (disable) { + rc = VPORT_OK; + goto out; + } + + /* Use the Physical nodes Fabric NDLP to determine if the link is + * up and ready to FDISC. + */ + ndlp = lpfc_findnode_did(phba->pport, Fabric_DID); + if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { + if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) { + lpfc_set_disctmo(vport); + lpfc_initial_fdisc(vport); + } else { + lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0262 No NPIV Fabric " + "support\n", + phba->brd_no, vport->vpi); + } + } else { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + } + rc = VPORT_OK; + +out: + lpfc_host_attrib_init(lpfc_shost_from_vport(vport)); +error_out: + return rc; +} + +int +disable_vport(struct fc_vport *fc_vport) +{ + struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data; + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp = NULL, *next_ndlp = NULL; + long timeout; + + ndlp = lpfc_findnode_did(vport, Fabric_DID); + if (ndlp && phba->link_state >= LPFC_LINK_UP) { + vport->unreg_vpi_cmpl = VPORT_INVAL; + timeout = msecs_to_jiffies(phba->fc_ratov * 2000); + if (!lpfc_issue_els_npiv_logo(vport, ndlp)) + while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout) + timeout = schedule_timeout(timeout); + } + + lpfc_sli_host_down(vport); + + /* Mark all nodes for discovery so we can remove them by + * calling lpfc_cleanup_rpis(vport, 1) + */ + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + continue; + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + } + lpfc_cleanup_rpis(vport, 1); + + lpfc_stop_vport_timers(vport); + lpfc_unreg_all_rpis(vport); + lpfc_unreg_default_rpis(vport); + /* + * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the + * scsi_host_put() to release the vport. + */ + lpfc_mbx_unreg_vpi(vport); + + lpfc_vport_set_state(vport, FC_VPORT_DISABLED); + return VPORT_OK; +} + +int +enable_vport(struct fc_vport *fc_vport) +{ + struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data; + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp = NULL; + + if ((phba->link_state < LPFC_LINK_UP) || + (phba->fc_topology == TOPOLOGY_LOOP)) { + lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN); + return VPORT_OK; + } + + vport->load_flag |= FC_LOADING; + vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + + /* Use the Physical nodes Fabric NDLP to determine if the link is + * up and ready to FDISC. + */ + ndlp = lpfc_findnode_did(phba->pport, Fabric_DID); + if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { + if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) { + lpfc_set_disctmo(vport); + lpfc_initial_fdisc(vport); + } else { + lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP); + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "%d (%d):0264 No NPIV Fabric " + "support\n", + phba->brd_no, vport->vpi); + } + } else { + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + } + + return VPORT_OK; +} + +int +lpfc_vport_disable(struct fc_vport *fc_vport, bool disable) +{ + if (disable) + return disable_vport(fc_vport); + else + return enable_vport(fc_vport); +} + + +int +lpfc_vport_delete(struct fc_vport *fc_vport) +{ + struct lpfc_nodelist *ndlp = NULL; + struct lpfc_nodelist *next_ndlp; + struct Scsi_Host *shost = (struct Scsi_Host *) fc_vport->shost; + struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data; + struct lpfc_hba *phba = vport->phba; + long timeout; + int rc = VPORT_ERROR; + + /* + * This is a bit of a mess. We want to ensure the shost doesn't get + * torn down until we're done with the embedded lpfc_vport structure. + * + * Beyond holding a reference for this function, we also need a + * reference for outstanding I/O requests we schedule during delete + * processing. But once we scsi_remove_host() we can no longer obtain + * a reference through scsi_host_get(). + * + * So we take two references here. We release one reference at the + * bottom of the function -- after delinking the vport. And we + * release the other at the completion of the unreg_vpi that get's + * initiated after we've disposed of all other resources associated + * with the port. + */ + if (!scsi_host_get(shost) || !scsi_host_get(shost)) + return VPORT_INVAL; + + if (vport->port_type == LPFC_PHYSICAL_PORT) { + lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, + "%d:1812 vport_delete failed: Cannot delete " + "physical host\n", phba->brd_no); + goto out; + } + + vport->load_flag |= FC_UNLOADING; + + kfree(vport->vname); + lpfc_debugfs_terminate(vport); + fc_remove_host(lpfc_shost_from_vport(vport)); + scsi_remove_host(lpfc_shost_from_vport(vport)); + + ndlp = lpfc_findnode_did(phba->pport, Fabric_DID); + if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE && + phba->link_state >= LPFC_LINK_UP) { + + /* First look for the Fabric ndlp */ + ndlp = lpfc_findnode_did(vport, Fabric_DID); + if (!ndlp) { + /* Cannot find existing Fabric ndlp, allocate one */ + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) + goto skip_logo; + lpfc_nlp_init(vport, ndlp, Fabric_DID); + } else { + lpfc_dequeue_node(vport, ndlp); + } + vport->unreg_vpi_cmpl = VPORT_INVAL; + timeout = msecs_to_jiffies(phba->fc_ratov * 2000); + if (!lpfc_issue_els_npiv_logo(vport, ndlp)) + while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout) + timeout = schedule_timeout(timeout); + } + +skip_logo: + lpfc_sli_host_down(vport); + + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); + } + + lpfc_stop_vport_timers(vport); + lpfc_unreg_all_rpis(vport); + lpfc_unreg_default_rpis(vport); + /* + * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the + * scsi_host_put() to release the vport. + */ + lpfc_mbx_unreg_vpi(vport); + + lpfc_free_vpi(phba, vport->vpi); + vport->work_port_events = 0; + spin_lock_irq(&phba->hbalock); + list_del_init(&vport->listentry); + spin_unlock_irq(&phba->hbalock); + + rc = VPORT_OK; +out: + scsi_host_put(shost); + return rc; +} + + +EXPORT_SYMBOL(lpfc_vport_create); +EXPORT_SYMBOL(lpfc_vport_delete); diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.h --- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,113 @@ +/******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * + * Copyright (C) 2004-2006 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * Portions Copyright (C) 2004-2005 Christoph Hellwig * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + *******************************************************************/ + +#ifndef _H_LPFC_VPORT +#define _H_LPFC_VPORT + +/* API version values (each will be an individual bit) */ +#define VPORT_API_VERSION_1 0x01 + +/* Values returned via lpfc_vport_getinfo() */ +struct vport_info { + + uint32_t api_versions; + uint8_t linktype; +#define VPORT_TYPE_PHYSICAL 0 +#define VPORT_TYPE_VIRTUAL 1 + + uint8_t state; +#define VPORT_STATE_OFFLINE 0 +#define VPORT_STATE_ACTIVE 1 +#define VPORT_STATE_FAILED 2 + + uint8_t fail_reason; + uint8_t prev_fail_reason; +#define VPORT_FAIL_UNKNOWN 0 +#define VPORT_FAIL_LINKDOWN 1 +#define VPORT_FAIL_FAB_UNSUPPORTED 2 +#define VPORT_FAIL_FAB_NORESOURCES 3 +#define VPORT_FAIL_FAB_LOGOUT 4 +#define VPORT_FAIL_ADAP_NORESOURCES 5 + + uint8_t node_name[8]; /* WWNN */ + uint8_t port_name[8]; /* WWPN */ + + struct Scsi_Host *shost; + +/* Following values are valid only on physical links */ + uint32_t vports_max; + uint32_t vports_inuse; + uint32_t rpi_max; + uint32_t rpi_inuse; +#define VPORT_CNT_INVALID 0xFFFFFFFF +}; + +/* data used in link creation */ +struct vport_data { + uint32_t api_version; + + uint32_t options; +#define VPORT_OPT_AUTORETRY 0x01 + + uint8_t node_name[8]; /* WWNN */ + uint8_t port_name[8]; /* WWPN */ + +/* + * Upon successful creation, vport_shost will point to the new Scsi_Host + * structure for the new virtual link. + */ + struct Scsi_Host *vport_shost; +}; + +/* API function return codes */ +#define VPORT_OK 0 +#define VPORT_ERROR -1 +#define VPORT_INVAL -2 +#define VPORT_NOMEM -3 +#define VPORT_NORESOURCES -4 + +int lpfc_vport_create(struct fc_vport *, bool); +int lpfc_vport_delete(struct fc_vport *); +int lpfc_vport_getinfo(struct Scsi_Host *, struct vport_info *); +int lpfc_vport_tgt_remove(struct Scsi_Host *, uint, uint); + +/* + * queuecommand VPORT-specific return codes. Specified in the host byte code. + * Returned when the virtual link has failed or is not active. + */ +#define DID_VPORT_ERROR 0x0f + +#define VPORT_INFO 0x1 +#define VPORT_CREATE 0x2 +#define VPORT_DELETE 0x4 + +struct vport_cmd_tag { + uint32_t cmd; + struct vport_data cdata; + struct vport_info cinfo; + void *vport; + int vport_num; +}; + +void lpfc_vport_set_state(struct lpfc_vport *vport, + enum fc_vport_state new_state); + +#endif /* H_LPFC_VPORT */ diff -Nurb linux-2.6.22-570/drivers/scsi/mac53c94.c linux-2.6.22-591/drivers/scsi/mac53c94.c --- linux-2.6.22-570/drivers/scsi/mac53c94.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/mac53c94.c 2007-12-21 15:36:12.000000000 -0500 @@ -77,7 +77,7 @@ for (i = 0; i < cmd->cmd_len; ++i) printk(" %.2x", cmd->cmnd[i]); printk("\n" KERN_DEBUG "use_sg=%d request_bufflen=%d request_buffer=%p\n", - cmd->use_sg, cmd->request_bufflen, cmd->request_buffer); + scsi_sg_count(cmd), scsi_bufflen(cmd), scsi_sglist(cmd)); } #endif @@ -173,7 +173,6 @@ writeb(CMD_SELECT, ®s->command); state->phase = selecting; - if (cmd->use_sg > 0 || cmd->request_bufflen != 0) set_dma_cmds(state, cmd); } @@ -262,7 +261,7 @@ writeb(CMD_NOP, ®s->command); /* set DMA controller going if any data to transfer */ if ((stat & (STAT_MSG|STAT_CD)) == 0 - && (cmd->use_sg > 0 || cmd->request_bufflen != 0)) { + && (scsi_sg_count(cmd) > 0 || scsi_bufflen(cmd))) { nb = cmd->SCp.this_residual; if (nb > 0xfff0) nb = 0xfff0; @@ -310,14 +309,7 @@ printk(KERN_DEBUG "intr %x before data xfer complete\n", intr); } writel(RUN << 16, &dma->control); /* stop dma */ - if (cmd->use_sg != 0) { - pci_unmap_sg(state->pdev, - (struct scatterlist *)cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); - } else { - pci_unmap_single(state->pdev, state->dma_addr, - cmd->request_bufflen, cmd->sc_data_direction); - } + scsi_dma_unmap(cmd); /* should check dma status */ writeb(CMD_I_COMPLETE, ®s->command); state->phase = completing; @@ -365,23 +357,23 @@ */ static void set_dma_cmds(struct fsc_state *state, struct scsi_cmnd *cmd) { - int i, dma_cmd, total; + int i, dma_cmd, total, nseg; struct scatterlist *scl; struct dbdma_cmd *dcmds; dma_addr_t dma_addr; u32 dma_len; + nseg = scsi_dma_map(cmd); + BUG_ON(nseg < 0); + if (!nseg) + return; + dma_cmd = cmd->sc_data_direction == DMA_TO_DEVICE ? OUTPUT_MORE : INPUT_MORE; dcmds = state->dma_cmds; - if (cmd->use_sg > 0) { - int nseg; - total = 0; - scl = (struct scatterlist *) cmd->request_buffer; - nseg = pci_map_sg(state->pdev, scl, cmd->use_sg, - cmd->sc_data_direction); - for (i = 0; i < nseg; ++i) { + + scsi_for_each_sg(cmd, scl, nseg, i) { dma_addr = sg_dma_address(scl); dma_len = sg_dma_len(scl); if (dma_len > 0xffff) @@ -391,21 +383,9 @@ st_le16(&dcmds->command, dma_cmd); st_le32(&dcmds->phy_addr, dma_addr); dcmds->xfer_status = 0; - ++scl; - ++dcmds; - } - } else { - total = cmd->request_bufflen; - if (total > 0xffff) - panic("mac53c94: transfer size >= 64k"); - dma_addr = pci_map_single(state->pdev, cmd->request_buffer, - total, cmd->sc_data_direction); - state->dma_addr = dma_addr; - st_le16(&dcmds->req_count, total); - st_le32(&dcmds->phy_addr, dma_addr); - dcmds->xfer_status = 0; ++dcmds; } + dma_cmd += OUTPUT_LAST - OUTPUT_MORE; st_le16(&dcmds[-1].command, dma_cmd); st_le16(&dcmds->command, DBDMA_STOP); diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c linux-2.6.22-591/drivers/scsi/megaraid/megaraid_mbox.c --- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/megaraid/megaraid_mbox.c 2007-12-21 15:36:12.000000000 -0500 @@ -1378,8 +1378,6 @@ { struct scatterlist *sgl; mbox_ccb_t *ccb; - struct page *page; - unsigned long offset; struct scsi_cmnd *scp; int sgcnt; int i; @@ -1388,48 +1386,16 @@ scp = scb->scp; ccb = (mbox_ccb_t *)scb->ccb; + sgcnt = scsi_dma_map(scp); + BUG_ON(sgcnt < 0 || sgcnt > adapter->sglen); + // no mapping required if no data to be transferred - if (!scp->request_buffer || !scp->request_bufflen) + if (!sgcnt) return 0; - if (!scp->use_sg) { /* scatter-gather list not used */ - - page = virt_to_page(scp->request_buffer); - - offset = ((unsigned long)scp->request_buffer & ~PAGE_MASK); - - ccb->buf_dma_h = pci_map_page(adapter->pdev, page, offset, - scp->request_bufflen, - scb->dma_direction); - scb->dma_type = MRAID_DMA_WBUF; - - /* - * We need to handle special 64-bit commands that need a - * minimum of 1 SG - */ - sgcnt = 1; - ccb->sgl64[0].address = ccb->buf_dma_h; - ccb->sgl64[0].length = scp->request_bufflen; - - return sgcnt; - } - - sgl = (struct scatterlist *)scp->request_buffer; - - // The number of sg elements returned must not exceed our limit - sgcnt = pci_map_sg(adapter->pdev, sgl, scp->use_sg, - scb->dma_direction); - - if (sgcnt > adapter->sglen) { - con_log(CL_ANN, (KERN_CRIT - "megaraid critical: too many sg elements:%d\n", - sgcnt)); - BUG(); - } - scb->dma_type = MRAID_DMA_WSG; - for (i = 0; i < sgcnt; i++, sgl++) { + scsi_for_each_sg(scp, sgl, sgcnt, i) { ccb->sgl64[i].address = sg_dma_address(sgl); ccb->sgl64[i].length = sg_dma_len(sgl); } @@ -1489,19 +1455,11 @@ adapter->outstanding_cmds++; - if (scb->dma_direction == PCI_DMA_TODEVICE) { - if (!scb->scp->use_sg) { // sg list not used - pci_dma_sync_single_for_device(adapter->pdev, - ccb->buf_dma_h, - scb->scp->request_bufflen, - PCI_DMA_TODEVICE); - } - else { + if (scb->dma_direction == PCI_DMA_TODEVICE) pci_dma_sync_sg_for_device(adapter->pdev, - scb->scp->request_buffer, - scb->scp->use_sg, PCI_DMA_TODEVICE); - } - } + scsi_sglist(scb->scp), + scsi_sg_count(scb->scp), + PCI_DMA_TODEVICE); mbox->busy = 1; // Set busy mbox->poll = 0; @@ -1624,11 +1582,11 @@ return scb; case MODE_SENSE: - if (scp->use_sg) { + { struct scatterlist *sgl; caddr_t vaddr; - sgl = (struct scatterlist *)scp->request_buffer; + sgl = scsi_sglist(scp); if (sgl->page) { vaddr = (caddr_t) (page_address((&sgl[0])->page) @@ -1642,9 +1600,6 @@ __LINE__)); } } - else { - memset(scp->request_buffer, 0, scp->cmnd[4]); - } scp->result = (DID_OK << 16); return NULL; @@ -1716,7 +1671,7 @@ mbox->cmd = MBOXCMD_PASSTHRU64; scb->dma_direction = scp->sc_data_direction; - pthru->dataxferlen = scp->request_bufflen; + pthru->dataxferlen = scsi_bufflen(scp); pthru->dataxferaddr = ccb->sgl_dma_h; pthru->numsge = megaraid_mbox_mksgl(adapter, scb); @@ -2050,8 +2005,8 @@ memcpy(pthru->cdb, scp->cmnd, scp->cmd_len); - if (scp->request_bufflen) { - pthru->dataxferlen = scp->request_bufflen; + if (scsi_bufflen(scp)) { + pthru->dataxferlen = scsi_bufflen(scp); pthru->dataxferaddr = ccb->sgl_dma_h; pthru->numsge = megaraid_mbox_mksgl(adapter, scb); } @@ -2099,8 +2054,8 @@ memcpy(epthru->cdb, scp->cmnd, scp->cmd_len); - if (scp->request_bufflen) { - epthru->dataxferlen = scp->request_bufflen; + if (scsi_bufflen(scp)) { + epthru->dataxferlen = scsi_bufflen(scp); epthru->dataxferaddr = ccb->sgl_dma_h; epthru->numsge = megaraid_mbox_mksgl(adapter, scb); } @@ -2266,37 +2221,13 @@ ccb = (mbox_ccb_t *)scb->ccb; - switch (scb->dma_type) { - - case MRAID_DMA_WBUF: - if (scb->dma_direction == PCI_DMA_FROMDEVICE) { - pci_dma_sync_single_for_cpu(adapter->pdev, - ccb->buf_dma_h, - scb->scp->request_bufflen, - PCI_DMA_FROMDEVICE); - } - - pci_unmap_page(adapter->pdev, ccb->buf_dma_h, - scb->scp->request_bufflen, scb->dma_direction); - - break; - - case MRAID_DMA_WSG: - if (scb->dma_direction == PCI_DMA_FROMDEVICE) { + if (scb->dma_direction == PCI_DMA_FROMDEVICE) pci_dma_sync_sg_for_cpu(adapter->pdev, - scb->scp->request_buffer, - scb->scp->use_sg, PCI_DMA_FROMDEVICE); - } - - pci_unmap_sg(adapter->pdev, scb->scp->request_buffer, - scb->scp->use_sg, scb->dma_direction); - - break; - - default: - break; - } + scsi_sglist(scb->scp), + scsi_sg_count(scb->scp), + PCI_DMA_FROMDEVICE); + scsi_dma_unmap(scb->scp); return; } @@ -2399,25 +2330,17 @@ if (scp->cmnd[0] == INQUIRY && status == 0 && islogical == 0 && IS_RAID_CH(raid_dev, scb->dev_channel)) { - if (scp->use_sg) { - sgl = (struct scatterlist *) - scp->request_buffer; - + sgl = scsi_sglist(scp); if (sgl->page) { c = *(unsigned char *) (page_address((&sgl[0])->page) + (&sgl[0])->offset); - } - else { + } else { con_log(CL_ANN, (KERN_WARNING "megaraid mailbox: invalid sg:%d\n", __LINE__)); c = 0; } - } - else { - c = *(uint8_t *)scp->request_buffer; - } if ((c & 0x1F ) == TYPE_DISK) { pdev_index = (scb->dev_channel * 16) + diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c linux-2.6.22-591/drivers/scsi/megaraid/megaraid_sas.c --- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/megaraid/megaraid_sas.c 2007-12-21 15:36:12.000000000 -0500 @@ -433,34 +433,15 @@ int sge_count; struct scatterlist *os_sgl; - /* - * Return 0 if there is no data transfer - */ - if (!scp->request_buffer || !scp->request_bufflen) - return 0; + sge_count = scsi_dma_map(scp); + BUG_ON(sge_count < 0); - if (!scp->use_sg) { - mfi_sgl->sge32[0].phys_addr = pci_map_single(instance->pdev, - scp-> - request_buffer, - scp-> - request_bufflen, - scp-> - sc_data_direction); - mfi_sgl->sge32[0].length = scp->request_bufflen; - - return 1; - } - - os_sgl = (struct scatterlist *)scp->request_buffer; - sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg, - scp->sc_data_direction); - - for (i = 0; i < sge_count; i++, os_sgl++) { + if (sge_count) { + scsi_for_each_sg(scp, os_sgl, sge_count, i) { mfi_sgl->sge32[i].length = sg_dma_len(os_sgl); mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl); } - + } return sge_count; } @@ -481,35 +462,15 @@ int sge_count; struct scatterlist *os_sgl; - /* - * Return 0 if there is no data transfer - */ - if (!scp->request_buffer || !scp->request_bufflen) - return 0; - - if (!scp->use_sg) { - mfi_sgl->sge64[0].phys_addr = pci_map_single(instance->pdev, - scp-> - request_buffer, - scp-> - request_bufflen, - scp-> - sc_data_direction); + sge_count = scsi_dma_map(scp); + BUG_ON(sge_count < 0); - mfi_sgl->sge64[0].length = scp->request_bufflen; - - return 1; - } - - os_sgl = (struct scatterlist *)scp->request_buffer; - sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg, - scp->sc_data_direction); - - for (i = 0; i < sge_count; i++, os_sgl++) { + if (sge_count) { + scsi_for_each_sg(scp, os_sgl, sge_count, i) { mfi_sgl->sge64[i].length = sg_dma_len(os_sgl); mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl); } - + } return sge_count; } @@ -593,7 +554,7 @@ pthru->cdb_len = scp->cmd_len; pthru->timeout = 0; pthru->flags = flags; - pthru->data_xfer_len = scp->request_bufflen; + pthru->data_xfer_len = scsi_bufflen(scp); memcpy(pthru->cdb, scp->cmnd, scp->cmd_len); @@ -1195,45 +1156,6 @@ } /** - * megasas_unmap_sgbuf - Unmap SG buffers - * @instance: Adapter soft state - * @cmd: Completed command - */ -static void -megasas_unmap_sgbuf(struct megasas_instance *instance, struct megasas_cmd *cmd) -{ - dma_addr_t buf_h; - u8 opcode; - - if (cmd->scmd->use_sg) { - pci_unmap_sg(instance->pdev, cmd->scmd->request_buffer, - cmd->scmd->use_sg, cmd->scmd->sc_data_direction); - return; - } - - if (!cmd->scmd->request_bufflen) - return; - - opcode = cmd->frame->hdr.cmd; - - if ((opcode == MFI_CMD_LD_READ) || (opcode == MFI_CMD_LD_WRITE)) { - if (IS_DMA64) - buf_h = cmd->frame->io.sgl.sge64[0].phys_addr; - else - buf_h = cmd->frame->io.sgl.sge32[0].phys_addr; - } else { - if (IS_DMA64) - buf_h = cmd->frame->pthru.sgl.sge64[0].phys_addr; - else - buf_h = cmd->frame->pthru.sgl.sge32[0].phys_addr; - } - - pci_unmap_single(instance->pdev, buf_h, cmd->scmd->request_bufflen, - cmd->scmd->sc_data_direction); - return; -} - -/** * megasas_complete_cmd - Completes a command * @instance: Adapter soft state * @cmd: Command to be completed @@ -1281,7 +1203,7 @@ atomic_dec(&instance->fw_outstanding); - megasas_unmap_sgbuf(instance, cmd); + scsi_dma_unmap(cmd->scmd); cmd->scmd->scsi_done(cmd->scmd); megasas_return_cmd(instance, cmd); @@ -1329,7 +1251,7 @@ atomic_dec(&instance->fw_outstanding); - megasas_unmap_sgbuf(instance, cmd); + scsi_dma_unmap(cmd->scmd); cmd->scmd->scsi_done(cmd->scmd); megasas_return_cmd(instance, cmd); diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid.c linux-2.6.22-591/drivers/scsi/megaraid.c --- linux-2.6.22-570/drivers/scsi/megaraid.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/megaraid.c 2007-12-21 15:36:12.000000000 -0500 @@ -523,10 +523,8 @@ /* * filter the internal and ioctl commands */ - if((cmd->cmnd[0] == MEGA_INTERNAL_CMD)) { - return cmd->request_buffer; - } - + if((cmd->cmnd[0] == MEGA_INTERNAL_CMD)) + return (scb_t *)cmd->host_scribble; /* * We know what channels our logical drives are on - mega_find_card() @@ -657,22 +655,14 @@ case MODE_SENSE: { char *buf; - - if (cmd->use_sg) { struct scatterlist *sg; - sg = (struct scatterlist *)cmd->request_buffer; - buf = kmap_atomic(sg->page, KM_IRQ0) + - sg->offset; - } else - buf = cmd->request_buffer; - memset(buf, 0, cmd->cmnd[4]); - if (cmd->use_sg) { - struct scatterlist *sg; + sg = scsi_sglist(cmd); + buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; - sg = (struct scatterlist *)cmd->request_buffer; + memset(buf, 0, cmd->cmnd[4]); kunmap_atomic(buf - sg->offset, KM_IRQ0); - } + cmd->result = (DID_OK << 16); cmd->scsi_done(cmd); return NULL; @@ -1551,24 +1541,16 @@ islogical = adapter->logdrv_chan[cmd->device->channel]; if( cmd->cmnd[0] == INQUIRY && !islogical ) { - if( cmd->use_sg ) { - sgl = (struct scatterlist *) - cmd->request_buffer; - + sgl = scsi_sglist(cmd); if( sgl->page ) { c = *(unsigned char *) page_address((&sgl[0])->page) + (&sgl[0])->offset; - } - else { + } else { printk(KERN_WARNING "megaraid: invalid sg.\n"); c = 0; } - } - else { - c = *(u8 *)cmd->request_buffer; - } if(IS_RAID_CH(adapter, cmd->device->channel) && ((c & 0x1F ) == TYPE_DISK)) { @@ -1704,30 +1686,14 @@ static void mega_free_scb(adapter_t *adapter, scb_t *scb) { - unsigned long length; - switch( scb->dma_type ) { case MEGA_DMA_TYPE_NONE: break; - case MEGA_BULK_DATA: - if (scb->cmd->use_sg == 0) - length = scb->cmd->request_bufflen; - else { - struct scatterlist *sgl = - (struct scatterlist *)scb->cmd->request_buffer; - length = sgl->length; - } - pci_unmap_page(adapter->dev, scb->dma_h_bulkdata, - length, scb->dma_direction); - break; - case MEGA_SGLIST: - pci_unmap_sg(adapter->dev, scb->cmd->request_buffer, - scb->cmd->use_sg, scb->dma_direction); + scsi_dma_unmap(scb->cmd); break; - default: break; } @@ -1767,80 +1733,33 @@ static int mega_build_sglist(adapter_t *adapter, scb_t *scb, u32 *buf, u32 *len) { - struct scatterlist *sgl; - struct page *page; - unsigned long offset; - unsigned int length; + struct scatterlist *sg; Scsi_Cmnd *cmd; int sgcnt; int idx; cmd = scb->cmd; - /* Scatter-gather not used */ - if( cmd->use_sg == 0 || (cmd->use_sg == 1 && - !adapter->has_64bit_addr)) { - - if (cmd->use_sg == 0) { - page = virt_to_page(cmd->request_buffer); - offset = offset_in_page(cmd->request_buffer); - length = cmd->request_bufflen; - } else { - sgl = (struct scatterlist *)cmd->request_buffer; - page = sgl->page; - offset = sgl->offset; - length = sgl->length; - } - - scb->dma_h_bulkdata = pci_map_page(adapter->dev, - page, offset, - length, - scb->dma_direction); - scb->dma_type = MEGA_BULK_DATA; - - /* - * We need to handle special 64-bit commands that need a - * minimum of 1 SG - */ - if( adapter->has_64bit_addr ) { - scb->sgl64[0].address = scb->dma_h_bulkdata; - scb->sgl64[0].length = length; - *buf = (u32)scb->sgl_dma_addr; - *len = (u32)length; - return 1; - } - else { - *buf = (u32)scb->dma_h_bulkdata; - *len = (u32)length; - } - return 0; - } - - sgl = (struct scatterlist *)cmd->request_buffer; - /* * Copy Scatter-Gather list info into controller structure. * * The number of sg elements returned must not exceed our limit */ - sgcnt = pci_map_sg(adapter->dev, sgl, cmd->use_sg, - scb->dma_direction); + sgcnt = scsi_dma_map(cmd); scb->dma_type = MEGA_SGLIST; - BUG_ON(sgcnt > adapter->sglen); + BUG_ON(sgcnt > adapter->sglen || sgcnt < 0); *len = 0; - for( idx = 0; idx < sgcnt; idx++, sgl++ ) { - - if( adapter->has_64bit_addr ) { - scb->sgl64[idx].address = sg_dma_address(sgl); - *len += scb->sgl64[idx].length = sg_dma_len(sgl); - } - else { - scb->sgl[idx].address = sg_dma_address(sgl); - *len += scb->sgl[idx].length = sg_dma_len(sgl); + scsi_for_each_sg(cmd, sg, sgcnt, idx) { + if (adapter->has_64bit_addr) { + scb->sgl64[idx].address = sg_dma_address(sg); + *len += scb->sgl64[idx].length = sg_dma_len(sg); + } else { + scb->sgl[idx].address = sg_dma_address(sg); + *len += scb->sgl[idx].length = sg_dma_len(sg); } } @@ -3571,7 +3490,7 @@ /* * The user passthru structure */ - upthru = (mega_passthru __user *)MBOX(uioc)->xferaddr; + upthru = (mega_passthru __user *)(unsigned long)MBOX(uioc)->xferaddr; /* * Copy in the user passthru here. @@ -3623,7 +3542,7 @@ /* * Get the user data */ - if( copy_from_user(data, (char __user *)uxferaddr, + if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr, pthru->dataxferlen) ) { rval = (-EFAULT); goto freemem_and_return; @@ -3649,7 +3568,7 @@ * Is data going up-stream */ if( pthru->dataxferlen && (uioc.flags & UIOC_RD) ) { - if( copy_to_user((char __user *)uxferaddr, data, + if( copy_to_user((char __user *)(unsigned long) uxferaddr, data, pthru->dataxferlen) ) { rval = (-EFAULT); } @@ -3702,7 +3621,7 @@ /* * Get the user data */ - if( copy_from_user(data, (char __user *)uxferaddr, + if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr, uioc.xferlen) ) { pci_free_consistent(pdev, @@ -3742,7 +3661,7 @@ * Is data going up-stream */ if( uioc.xferlen && (uioc.flags & UIOC_RD) ) { - if( copy_to_user((char __user *)uxferaddr, data, + if( copy_to_user((char __user *)(unsigned long) uxferaddr, data, uioc.xferlen) ) { rval = (-EFAULT); @@ -4494,7 +4413,7 @@ scmd->device = sdev; scmd->device->host = adapter->host; - scmd->request_buffer = (void *)scb; + scmd->host_scribble = (void *)scb; scmd->cmnd[0] = MEGA_INTERNAL_CMD; scb->state |= SCB_ACTIVE; diff -Nurb linux-2.6.22-570/drivers/scsi/mesh.c linux-2.6.22-591/drivers/scsi/mesh.c --- linux-2.6.22-570/drivers/scsi/mesh.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/mesh.c 2007-12-21 15:36:12.000000000 -0500 @@ -421,7 +421,7 @@ for (i = 0; i < cmd->cmd_len; ++i) printk(" %x", cmd->cmnd[i]); printk(" use_sg=%d buffer=%p bufflen=%u\n", - cmd->use_sg, cmd->request_buffer, cmd->request_bufflen); + scsi_sg_count(cmd), scsi_sglist(cmd), scsi_bufflen(cmd)); } #endif if (ms->dma_started) @@ -602,13 +602,16 @@ cmd->result += (cmd->SCp.Message << 8); if (DEBUG_TARGET(cmd)) { printk(KERN_DEBUG "mesh_done: result = %x, data_ptr=%d, buflen=%d\n", - cmd->result, ms->data_ptr, cmd->request_bufflen); + cmd->result, ms->data_ptr, scsi_bufflen(cmd)); +#if 0 + /* needs to use sg? */ if ((cmd->cmnd[0] == 0 || cmd->cmnd[0] == 0x12 || cmd->cmnd[0] == 3) && cmd->request_buffer != 0) { unsigned char *b = cmd->request_buffer; printk(KERN_DEBUG "buffer = %x %x %x %x %x %x %x %x\n", b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]); } +#endif } cmd->SCp.this_residual -= ms->data_ptr; mesh_completed(ms, cmd); @@ -1265,15 +1268,18 @@ dcmds = ms->dma_cmds; dtot = 0; if (cmd) { - cmd->SCp.this_residual = cmd->request_bufflen; - if (cmd->use_sg > 0) { int nseg; + + cmd->SCp.this_residual = scsi_bufflen(cmd); + + nseg = scsi_dma_map(cmd); + BUG_ON(nseg < 0); + + if (nseg) { total = 0; - scl = (struct scatterlist *) cmd->request_buffer; off = ms->data_ptr; - nseg = pci_map_sg(ms->pdev, scl, cmd->use_sg, - cmd->sc_data_direction); - for (i = 0; i data_ptr < cmd->request_bufflen) { - dtot = cmd->request_bufflen - ms->data_ptr; - if (dtot > 0xffff) - panic("mesh: transfer size >= 64k"); - st_le16(&dcmds->req_count, dtot); - /* XXX Use pci DMA API here ... */ - st_le32(&dcmds->phy_addr, - virt_to_phys(cmd->request_buffer) + ms->data_ptr); - dcmds->xfer_status = 0; - ++dcmds; } } if (dtot == 0) { @@ -1356,18 +1352,14 @@ dumplog(ms, ms->conn_tgt); dumpslog(ms); #endif /* MESH_DBG */ - } else if (cmd && cmd->request_bufflen != 0 && - ms->data_ptr > cmd->request_bufflen) { + } else if (cmd && scsi_bufflen(cmd) && + ms->data_ptr > scsi_bufflen(cmd)) { printk(KERN_DEBUG "mesh: target %d overrun, " "data_ptr=%x total=%x goes_out=%d\n", - ms->conn_tgt, ms->data_ptr, cmd->request_bufflen, + ms->conn_tgt, ms->data_ptr, scsi_bufflen(cmd), ms->tgts[ms->conn_tgt].data_goes_out); } - if (cmd->use_sg != 0) { - struct scatterlist *sg; - sg = (struct scatterlist *)cmd->request_buffer; - pci_unmap_sg(ms->pdev, sg, cmd->use_sg, cmd->sc_data_direction); - } + scsi_dma_unmap(cmd); ms->dma_started = 0; } diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.c linux-2.6.22-591/drivers/scsi/mvme16x.c --- linux-2.6.22-570/drivers/scsi/mvme16x.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/mvme16x.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,78 +0,0 @@ -/* - * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux. - * - * Based on work by Alan Hourihane - */ -#include -#include -#include - -#include -#include -#include -#include - -#include "scsi.h" -#include -#include "53c7xx.h" -#include "mvme16x.h" - -#include - - -int mvme16x_scsi_detect(struct scsi_host_template *tpnt) -{ - static unsigned char called = 0; - int clock; - long long options; - - if (!MACH_IS_MVME16x) - return 0; - if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) { - printk ("SCSI detection disabled, SCSI chip not present\n"); - return 0; - } - if (called) - return 0; - - tpnt->proc_name = "MVME16x"; - - options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT; - - clock = 66000000; /* 66MHz SCSI Clock */ - - ncr53c7xx_init(tpnt, 0, 710, (unsigned long)0xfff47000, - 0, MVME16x_IRQ_SCSI, DMA_NONE, - options, clock); - called = 1; - return 1; -} - -static int mvme16x_scsi_release(struct Scsi_Host *shost) -{ - if (shost->irq) - free_irq(shost->irq, NULL); - if (shost->dma_channel != 0xff) - free_dma(shost->dma_channel); - if (shost->io_port && shost->n_io_port) - release_region(shost->io_port, shost->n_io_port); - scsi_unregister(shost); - return 0; -} - -static struct scsi_host_template driver_template = { - .name = "MVME16x NCR53c710 SCSI", - .detect = mvme16x_scsi_detect, - .release = mvme16x_scsi_release, - .queuecommand = NCR53c7xx_queue_command, - .abort = NCR53c7xx_abort, - .reset = NCR53c7xx_reset, - .can_queue = 24, - .this_id = 7, - .sg_tablesize = 63, - .cmd_per_lun = 3, - .use_clustering = DISABLE_CLUSTERING -}; - - -#include "scsi_module.c" diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.h linux-2.6.22-591/drivers/scsi/mvme16x.h --- linux-2.6.22-570/drivers/scsi/mvme16x.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/mvme16x.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,24 +0,0 @@ -#ifndef MVME16x_SCSI_H -#define MVME16x_SCSI_H - -#include - -int mvme16x_scsi_detect(struct scsi_host_template *); -const char *NCR53c7x0_info(void); -int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int NCR53c7xx_abort(Scsi_Cmnd *); -int NCR53c7x0_release (struct Scsi_Host *); -int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int); -void NCR53c7x0_intr(int irq, void *dev_id); - -#ifndef CMD_PER_LUN -#define CMD_PER_LUN 3 -#endif - -#ifndef CAN_QUEUE -#define CAN_QUEUE 24 -#endif - -#include - -#endif /* MVME16x_SCSI_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c linux-2.6.22-591/drivers/scsi/mvme16x_scsi.c --- linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/mvme16x_scsi.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,158 @@ +/* + * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux. + * + * Based on work by Alan Hourihane + * + * Rewritten to use 53c700.c by Kars de Jong + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "53c700.h" + +MODULE_AUTHOR("Kars de Jong "); +MODULE_DESCRIPTION("MVME16x NCR53C710 driver"); +MODULE_LICENSE("GPL"); + +static struct scsi_host_template mvme16x_scsi_driver_template = { + .name = "MVME16x NCR53c710 SCSI", + .proc_name = "MVME16x", + .this_id = 7, + .module = THIS_MODULE, +}; + +static struct platform_device *mvme16x_scsi_device; + +static __devinit int +mvme16x_probe(struct device *dev) +{ + struct Scsi_Host * host = NULL; + struct NCR_700_Host_Parameters *hostdata; + + if (!MACH_IS_MVME16x) + goto out; + + if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) { + printk(KERN_INFO "mvme16x-scsi: detection disabled, " + "SCSI chip not present\n"); + goto out; + } + + hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); + if (hostdata == NULL) { + printk(KERN_ERR "mvme16x-scsi: " + "Failed to allocate host data\n"); + goto out; + } + memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); + + /* Fill in the required pieces of hostdata */ + hostdata->base = (void __iomem *)0xfff47000UL; + hostdata->clock = 50; /* XXX - depends on the CPU clock! */ + hostdata->chip710 = 1; + hostdata->dmode_extra = DMODE_FC2; + hostdata->dcntl_extra = EA_710; + hostdata->ctest7_extra = CTEST7_TT1; + + /* and register the chip */ + host = NCR_700_detect(&mvme16x_scsi_driver_template, hostdata, dev); + if (!host) { + printk(KERN_ERR "mvme16x-scsi: No host detected; " + "board configuration problem?\n"); + goto out_free; + } + host->this_id = 7; + host->base = 0xfff47000UL; + host->irq = MVME16x_IRQ_SCSI; + if (request_irq(host->irq, NCR_700_intr, 0, "mvme16x-scsi", host)) { + printk(KERN_ERR "mvme16x-scsi: request_irq failed\n"); + goto out_put_host; + } + + /* Enable scsi chip ints */ + { + volatile unsigned long v; + + /* Enable scsi interrupts at level 4 in PCCchip2 */ + v = in_be32(0xfff4202c); + v = (v & ~0xff) | 0x10 | 4; + out_be32(0xfff4202c, v); + } + + scsi_scan_host(host); + + return 0; + + out_put_host: + scsi_host_put(host); + out_free: + kfree(hostdata); + out: + return -ENODEV; +} + +static __devexit int +mvme16x_device_remove(struct device *dev) +{ + struct Scsi_Host *host = dev_to_shost(dev); + struct NCR_700_Host_Parameters *hostdata = shost_priv(host); + + /* Disable scsi chip ints */ + { + volatile unsigned long v; + + v = in_be32(0xfff4202c); + v &= ~0x10; + out_be32(0xfff4202c, v); + } + scsi_remove_host(host); + NCR_700_release(host); + kfree(hostdata); + free_irq(host->irq, host); + + return 0; +} + +static struct device_driver mvme16x_scsi_driver = { + .name = "mvme16x-scsi", + .bus = &platform_bus_type, + .probe = mvme16x_probe, + .remove = __devexit_p(mvme16x_device_remove), +}; + +static int __init mvme16x_scsi_init(void) +{ + int err; + + err = driver_register(&mvme16x_scsi_driver); + if (err) + return err; + + mvme16x_scsi_device = platform_device_register_simple("mvme16x-scsi", + -1, NULL, 0); + if (IS_ERR(mvme16x_scsi_device)) { + driver_unregister(&mvme16x_scsi_driver); + return PTR_ERR(mvme16x_scsi_device); + } + + return 0; +} + +static void __exit mvme16x_scsi_exit(void) +{ + platform_device_unregister(mvme16x_scsi_device); + driver_unregister(&mvme16x_scsi_driver); +} + +module_init(mvme16x_scsi_init); +module_exit(mvme16x_scsi_exit); diff -Nurb linux-2.6.22-570/drivers/scsi/nsp32.c linux-2.6.22-591/drivers/scsi/nsp32.c --- linux-2.6.22-570/drivers/scsi/nsp32.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/nsp32.c 2007-12-21 15:36:12.000000000 -0500 @@ -49,10 +49,6 @@ #include #include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -# include -#endif - #include "nsp32.h" @@ -199,17 +195,9 @@ static void __exit exit_nsp32 (void); /* struct struct scsi_host_template */ -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) static int nsp32_proc_info (struct Scsi_Host *, char *, char **, off_t, int, int); -#else -static int nsp32_proc_info (char *, char **, off_t, int, int, int); -#endif -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) static int nsp32_detect (struct pci_dev *pdev); -#else -static int nsp32_detect (struct scsi_host_template *); -#endif static int nsp32_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *)); static const char *nsp32_info (struct Scsi_Host *); @@ -296,15 +284,7 @@ .eh_abort_handler = nsp32_eh_abort, .eh_bus_reset_handler = nsp32_eh_bus_reset, .eh_host_reset_handler = nsp32_eh_host_reset, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74)) - .detect = nsp32_detect, - .release = nsp32_release, -#endif -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,2)) - .use_new_eh_code = 1, -#else /* .highmem_io = 1, */ -#endif }; #include "nsp32_io.h" @@ -739,7 +719,7 @@ command = 0; command |= (TRANSFER_GO | ALL_COUNTER_CLR); if (data->trans_method & NSP32_TRANSFER_BUSMASTER) { - if (SCpnt->request_bufflen > 0) { + if (scsi_bufflen(SCpnt) > 0) { command |= BM_START; } } else if (data->trans_method & NSP32_TRANSFER_MMIO) { @@ -888,31 +868,28 @@ static int nsp32_setup_sg_table(struct scsi_cmnd *SCpnt) { nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata; - struct scatterlist *sgl; + struct scatterlist *sg; nsp32_sgtable *sgt = data->cur_lunt->sglun->sgt; int num, i; u32_le l; - if (SCpnt->request_bufflen == 0) { - return TRUE; - } - if (sgt == NULL) { nsp32_dbg(NSP32_DEBUG_SGLIST, "SGT == null"); return FALSE; } - if (SCpnt->use_sg) { - sgl = (struct scatterlist *)SCpnt->request_buffer; - num = pci_map_sg(data->Pci, sgl, SCpnt->use_sg, - SCpnt->sc_data_direction); - for (i = 0; i < num; i++) { + num = scsi_dma_map(SCpnt); + if (!num) + return TRUE; + else if (num < 0) + return FALSE; + else { + scsi_for_each_sg(SCpnt, sg, num, i) { /* * Build nsp32_sglist, substitute sg dma addresses. */ - sgt[i].addr = cpu_to_le32(sg_dma_address(sgl)); - sgt[i].len = cpu_to_le32(sg_dma_len(sgl)); - sgl++; + sgt[i].addr = cpu_to_le32(sg_dma_address(sg)); + sgt[i].len = cpu_to_le32(sg_dma_len(sg)); if (le32_to_cpu(sgt[i].len) > 0x10000) { nsp32_msg(KERN_ERR, @@ -929,23 +906,6 @@ /* set end mark */ l = le32_to_cpu(sgt[num-1].len); sgt[num-1].len = cpu_to_le32(l | SGTEND); - - } else { - SCpnt->SCp.have_data_in = pci_map_single(data->Pci, - SCpnt->request_buffer, SCpnt->request_bufflen, - SCpnt->sc_data_direction); - - sgt[0].addr = cpu_to_le32(SCpnt->SCp.have_data_in); - sgt[0].len = cpu_to_le32(SCpnt->request_bufflen | SGTEND); /* set end mark */ - - if (SCpnt->request_bufflen > 0x10000) { - nsp32_msg(KERN_ERR, - "can't transfer over 64KB at a time, size=0x%lx", SCpnt->request_bufflen); - return FALSE; - } - nsp32_dbg(NSP32_DEBUG_SGLIST, "single : addr 0x%lx len=0x%lx", - le32_to_cpu(sgt[0].addr), - le32_to_cpu(sgt[0].len )); } return TRUE; @@ -962,7 +922,7 @@ "enter. target: 0x%x LUN: 0x%x cmnd: 0x%x cmndlen: 0x%x " "use_sg: 0x%x reqbuf: 0x%lx reqlen: 0x%x", SCpnt->device->id, SCpnt->device->lun, SCpnt->cmnd[0], SCpnt->cmd_len, - SCpnt->use_sg, SCpnt->request_buffer, SCpnt->request_bufflen); + scsi_sg_count(SCpnt), scsi_sglist(SCpnt), scsi_bufflen(SCpnt)); if (data->CurrentSC != NULL) { nsp32_msg(KERN_ERR, "Currentsc != NULL. Cancel this command request"); @@ -994,10 +954,10 @@ data->CurrentSC = SCpnt; SCpnt->SCp.Status = CHECK_CONDITION; SCpnt->SCp.Message = 0; - SCpnt->resid = SCpnt->request_bufflen; + scsi_set_resid(SCpnt, scsi_bufflen(SCpnt)); - SCpnt->SCp.ptr = (char *) SCpnt->request_buffer; - SCpnt->SCp.this_residual = SCpnt->request_bufflen; + SCpnt->SCp.ptr = (char *)scsi_sglist(SCpnt); + SCpnt->SCp.this_residual = scsi_bufflen(SCpnt); SCpnt->SCp.buffer = NULL; SCpnt->SCp.buffers_residual = 0; @@ -1210,13 +1170,9 @@ unsigned long flags; int ret; int handled = 0; - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) struct Scsi_Host *host = data->Host; + spin_lock_irqsave(host->host_lock, flags); -#else - spin_lock_irqsave(&io_request_lock, flags); -#endif /* * IRQ check, then enable IRQ mask @@ -1312,7 +1268,7 @@ } if ((auto_stat & DATA_IN_PHASE) && - (SCpnt->resid > 0) && + (scsi_get_resid(SCpnt) > 0) && ((nsp32_read2(base, FIFO_REST_CNT) & FIFO_REST_MASK) != 0)) { printk( "auto+fifo\n"); //nsp32_pio_read(SCpnt); @@ -1333,7 +1289,7 @@ nsp32_dbg(NSP32_DEBUG_INTR, "SSACK=0x%lx", nsp32_read4(base, SAVED_SACK_CNT)); - SCpnt->resid = 0; /* all data transfered! */ + scsi_set_resid(SCpnt, 0); /* all data transfered! */ } /* @@ -1480,11 +1436,7 @@ nsp32_write2(base, IRQ_CONTROL, 0); out2: -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) spin_unlock_irqrestore(host->host_lock, flags); -#else - spin_unlock_irqrestore(&io_request_lock, flags); -#endif nsp32_dbg(NSP32_DEBUG_INTR, "exit"); @@ -1499,28 +1451,15 @@ nsp32_dbg(NSP32_DEBUG_PROC, "buffer=0x%p pos=0x%p length=%d %d\n", buffer, pos, length, length - (pos - buffer));\ } \ } while(0) -static int nsp32_proc_info( -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) - struct Scsi_Host *host, -#endif - char *buffer, - char **start, - off_t offset, - int length, -#if !(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) - int hostno, -#endif - int inout) + +static int nsp32_proc_info(struct Scsi_Host *host, char *buffer, char **start, + off_t offset, int length, int inout) { char *pos = buffer; int thislength; unsigned long flags; nsp32_hw_data *data; -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) int hostno; -#else - struct Scsi_Host *host; -#endif unsigned int base; unsigned char mode_reg; int id, speed; @@ -1531,15 +1470,7 @@ return -EINVAL; } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) hostno = host->host_no; -#else - /* search this HBA host */ - host = scsi_host_hn_get(hostno); - if (host == NULL) { - return -ESRCH; - } -#endif data = (nsp32_hw_data *)host->hostdata; base = host->io_port; @@ -1626,25 +1557,8 @@ nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata; unsigned int base = SCpnt->device->host->io_port; - /* - * unmap pci - */ - if (SCpnt->request_bufflen == 0) { - goto skip; - } - - if (SCpnt->use_sg) { - pci_unmap_sg(data->Pci, - (struct scatterlist *)SCpnt->request_buffer, - SCpnt->use_sg, SCpnt->sc_data_direction); - } else { - pci_unmap_single(data->Pci, - (u32)SCpnt->SCp.have_data_in, - SCpnt->request_bufflen, - SCpnt->sc_data_direction); - } + scsi_dma_unmap(SCpnt); - skip: /* * clear TRANSFERCONTROL_BM_START */ @@ -1800,7 +1714,7 @@ SCpnt->SCp.Message = 0; nsp32_dbg(NSP32_DEBUG_BUSFREE, "normal end stat=0x%x resid=0x%x\n", - SCpnt->SCp.Status, SCpnt->resid); + SCpnt->SCp.Status, scsi_get_resid(SCpnt)); SCpnt->result = (DID_OK << 16) | (SCpnt->SCp.Message << 8) | (SCpnt->SCp.Status << 0); @@ -1844,7 +1758,7 @@ unsigned int restlen, sentlen; u32_le len, addr; - nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", SCpnt->resid); + nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", scsi_get_resid(SCpnt)); /* adjust saved SACK count with 4 byte start address boundary */ s_sacklen -= le32_to_cpu(sgt[old_entry].addr) & 3; @@ -1888,12 +1802,12 @@ return; last: - if (SCpnt->resid < sentlen) { + if (scsi_get_resid(SCpnt) < sentlen) { nsp32_msg(KERN_ERR, "resid underflow"); } - SCpnt->resid -= sentlen; - nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", SCpnt->resid); + scsi_set_resid(SCpnt, scsi_get_resid(SCpnt) - sentlen); + nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", scsi_get_resid(SCpnt)); /* update hostdata and lun */ @@ -2022,7 +1936,7 @@ transfer = 0; transfer |= (TRANSFER_GO | ALL_COUNTER_CLR); if (data->trans_method & NSP32_TRANSFER_BUSMASTER) { - if (SCpnt->request_bufflen > 0) { + if (scsi_bufflen(SCpnt) > 0) { transfer |= BM_START; } } else if (data->trans_method & NSP32_TRANSFER_MMIO) { @@ -2674,17 +2588,7 @@ * 0x900-0xbff: (map same 0x800-0x8ff I/O port image repeatedly) * 0xc00-0xfff: CardBus status registers */ -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) -#define DETECT_OK 0 -#define DETECT_NG 1 -#define PCIDEV pdev static int nsp32_detect(struct pci_dev *pdev) -#else -#define DETECT_OK 1 -#define DETECT_NG 0 -#define PCIDEV (data->Pci) -static int nsp32_detect(struct scsi_host_template *sht) -#endif { struct Scsi_Host *host; /* registered host structure */ struct resource *res; @@ -2697,11 +2601,7 @@ /* * register this HBA as SCSI device */ -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) host = scsi_host_alloc(&nsp32_template, sizeof(nsp32_hw_data)); -#else - host = scsi_register(sht, sizeof(nsp32_hw_data)); -#endif if (host == NULL) { nsp32_msg (KERN_ERR, "failed to scsi register"); goto err; @@ -2719,9 +2619,6 @@ host->unique_id = data->BaseAddress; host->n_io_port = data->NumAddress; host->base = (unsigned long)data->MmioAddress; -#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,63)) - scsi_set_pci_device(host, PCIDEV); -#endif data->Host = host; spin_lock_init(&(data->Lock)); @@ -2776,7 +2673,7 @@ /* * setup DMA */ - if (pci_set_dma_mask(PCIDEV, DMA_32BIT_MASK) != 0) { + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) { nsp32_msg (KERN_ERR, "failed to set PCI DMA mask"); goto scsi_unregister; } @@ -2784,7 +2681,7 @@ /* * allocate autoparam DMA resource. */ - data->autoparam = pci_alloc_consistent(PCIDEV, sizeof(nsp32_autoparam), &(data->auto_paddr)); + data->autoparam = pci_alloc_consistent(pdev, sizeof(nsp32_autoparam), &(data->auto_paddr)); if (data->autoparam == NULL) { nsp32_msg(KERN_ERR, "failed to allocate DMA memory"); goto scsi_unregister; @@ -2793,7 +2690,7 @@ /* * allocate scatter-gather DMA resource. */ - data->sg_list = pci_alloc_consistent(PCIDEV, NSP32_SG_TABLE_SIZE, + data->sg_list = pci_alloc_consistent(pdev, NSP32_SG_TABLE_SIZE, &(data->sg_paddr)); if (data->sg_list == NULL) { nsp32_msg(KERN_ERR, "failed to allocate DMA memory"); @@ -2883,16 +2780,14 @@ goto free_irq; } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) - ret = scsi_add_host(host, &PCIDEV->dev); + ret = scsi_add_host(host, &pdev->dev); if (ret) { nsp32_msg(KERN_ERR, "failed to add scsi host"); goto free_region; } scsi_scan_host(host); -#endif - pci_set_drvdata(PCIDEV, host); - return DETECT_OK; + pci_set_drvdata(pdev, host); + return 0; free_region: release_region(host->io_port, host->n_io_port); @@ -2901,22 +2796,19 @@ free_irq(host->irq, data); free_sg_list: - pci_free_consistent(PCIDEV, NSP32_SG_TABLE_SIZE, + pci_free_consistent(pdev, NSP32_SG_TABLE_SIZE, data->sg_list, data->sg_paddr); free_autoparam: - pci_free_consistent(PCIDEV, sizeof(nsp32_autoparam), + pci_free_consistent(pdev, sizeof(nsp32_autoparam), data->autoparam, data->auto_paddr); scsi_unregister: scsi_host_put(host); err: - return DETECT_NG; + return 1; } -#undef DETECT_OK -#undef DETECT_NG -#undef PCIDEV static int nsp32_release(struct Scsi_Host *host) { @@ -3525,11 +3417,7 @@ pci_set_master(pdev); -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) ret = nsp32_detect(pdev); -#else - ret = scsi_register_host(&nsp32_template); -#endif nsp32_msg(KERN_INFO, "irq: %i mmio: %p+0x%lx slot: %s model: %s", pdev->irq, @@ -3544,25 +3432,17 @@ static void __devexit nsp32_remove(struct pci_dev *pdev) { -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) struct Scsi_Host *host = pci_get_drvdata(pdev); -#endif nsp32_dbg(NSP32_DEBUG_REGISTER, "enter"); -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) scsi_remove_host(host); nsp32_release(host); scsi_host_put(host); -#else - scsi_unregister_host(&nsp32_template); -#endif } - - static struct pci_driver nsp32_driver = { .name = "nsp32", .id_table = nsp32_pci_table, diff -Nurb linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c linux-2.6.22-591/drivers/scsi/pcmcia/sym53c500_cs.c --- linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/pcmcia/sym53c500_cs.c 2007-12-21 15:36:12.000000000 -0500 @@ -370,8 +370,6 @@ DEB(unsigned char seq_reg;) unsigned char status, int_reg; unsigned char pio_status; - struct scatterlist *sglist; - unsigned int sgcount; int port_base = dev->io_port; struct sym53c500_data *data = (struct sym53c500_data *)dev->hostdata; @@ -434,20 +432,19 @@ switch (status & 0x07) { /* scsi phase */ case 0x00: /* DATA-OUT */ if (int_reg & 0x10) { /* Target requesting info transfer */ + struct scatterlist *sg; + int i; + curSC->SCp.phase = data_out; VDEB(printk("SYM53C500: Data-Out phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); - LOAD_DMA_COUNT(port_base, curSC->request_bufflen); /* Max transfer size */ + LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG); - if (!curSC->use_sg) /* Don't use scatter-gather */ - SYM53C500_pio_write(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen); - else { /* use scatter-gather */ - sgcount = curSC->use_sg; - sglist = curSC->request_buffer; - while (sgcount--) { - SYM53C500_pio_write(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length); - sglist++; - } + + scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { + SYM53C500_pio_write(fast_pio, port_base, + page_address(sg->page) + sg->offset, + sg->length); } REG0(port_base); } @@ -455,20 +452,19 @@ case 0x01: /* DATA-IN */ if (int_reg & 0x10) { /* Target requesting info transfer */ + struct scatterlist *sg; + int i; + curSC->SCp.phase = data_in; VDEB(printk("SYM53C500: Data-In phase\n")); outb(FLUSH_FIFO, port_base + CMD_REG); - LOAD_DMA_COUNT(port_base, curSC->request_bufflen); /* Max transfer size */ + LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG); - if (!curSC->use_sg) /* Don't use scatter-gather */ - SYM53C500_pio_read(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen); - else { /* Use scatter-gather */ - sgcount = curSC->use_sg; - sglist = curSC->request_buffer; - while (sgcount--) { - SYM53C500_pio_read(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length); - sglist++; - } + + scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { + SYM53C500_pio_read(fast_pio, port_base, + page_address(sg->page) + sg->offset, + sg->length); } REG0(port_base); } @@ -578,7 +574,7 @@ DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->device->id, - SCpnt->device->lun, SCpnt->request_bufflen)); + SCpnt->device->lun, scsi_bufflen(SCpnt))); VDEB(for (i = 0; i < SCpnt->cmd_len; i++) printk("cmd[%d]=%02x ", i, SCpnt->cmnd[i])); diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_attr.c --- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_attr.c 2007-12-21 15:36:12.000000000 -0500 @@ -11,8 +11,9 @@ /* SYSFS attributes --------------------------------------------------------- */ static ssize_t -qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_fw_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -31,8 +32,9 @@ } static ssize_t -qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_fw_dump(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -73,7 +75,6 @@ .attr = { .name = "fw_dump", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 0, .read = qla2x00_sysfs_read_fw_dump, @@ -81,8 +82,9 @@ }; static ssize_t -qla2x00_sysfs_read_nvram(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_nvram(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -101,8 +103,9 @@ } static ssize_t -qla2x00_sysfs_write_nvram(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_nvram(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -149,7 +152,6 @@ .attr = { .name = "nvram", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 512, .read = qla2x00_sysfs_read_nvram, @@ -157,8 +159,9 @@ }; static ssize_t -qla2x00_sysfs_read_optrom(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_optrom(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -176,8 +179,9 @@ } static ssize_t -qla2x00_sysfs_write_optrom(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_optrom(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -198,7 +202,6 @@ .attr = { .name = "optrom", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = OPTROM_SIZE_24XX, .read = qla2x00_sysfs_read_optrom, @@ -206,8 +209,9 @@ }; static ssize_t -qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -279,15 +283,15 @@ .attr = { .name = "optrom_ctl", .mode = S_IWUSR, - .owner = THIS_MODULE, }, .size = 0, .write = qla2x00_sysfs_write_optrom_ctl, }; static ssize_t -qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_vpd(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -305,8 +309,9 @@ } static ssize_t -qla2x00_sysfs_write_vpd(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_write_vpd(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -327,7 +332,6 @@ .attr = { .name = "vpd", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = 0, .read = qla2x00_sysfs_read_vpd, @@ -335,8 +339,9 @@ }; static ssize_t -qla2x00_sysfs_read_sfp(struct kobject *kobj, char *buf, loff_t off, - size_t count) +qla2x00_sysfs_read_sfp(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, struct device, kobj))); @@ -375,7 +380,6 @@ .attr = { .name = "sfp", .mode = S_IRUSR | S_IWUSR, - .owner = THIS_MODULE, }, .size = SFP_DEV_SIZE * 2, .read = qla2x00_sysfs_read_sfp, diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_dbg.c --- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_dbg.c 2007-12-21 15:36:12.000000000 -0500 @@ -1411,9 +1411,9 @@ printk("0x%02x ", cmd->cmnd[i]); } printk("\n seg_cnt=%d, allowed=%d, retries=%d\n", - cmd->use_sg, cmd->allowed, cmd->retries); + scsi_sg_count(cmd), cmd->allowed, cmd->retries); printk(" request buffer=0x%p, request buffer len=0x%x\n", - cmd->request_buffer, cmd->request_bufflen); + scsi_sglist(cmd), scsi_bufflen(cmd)); printk(" tag=%d, transfersize=0x%x\n", cmd->tag, cmd->transfersize); printk(" serial_number=%lx, SP=%p\n", cmd->serial_number, sp); diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_iocb.c --- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_iocb.c 2007-12-21 15:36:12.000000000 -0500 @@ -155,6 +155,8 @@ uint32_t *cur_dsd; scsi_qla_host_t *ha; struct scsi_cmnd *cmd; + struct scatterlist *sg; + int i; cmd = sp->cmd; @@ -163,7 +165,7 @@ __constant_cpu_to_le32(COMMAND_TYPE); /* No data transfer */ - if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { + if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { cmd_pkt->byte_count = __constant_cpu_to_le32(0); return; } @@ -177,13 +179,8 @@ cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address; /* Load data segments */ - if (cmd->use_sg != 0) { - struct scatterlist *cur_seg; - struct scatterlist *end_seg; - - cur_seg = (struct scatterlist *)cmd->request_buffer; - end_seg = cur_seg + tot_dsds; - while (cur_seg < end_seg) { + + scsi_for_each_sg(cmd, sg, tot_dsds, i) { cont_entry_t *cont_pkt; /* Allocate additional continuation packets? */ @@ -197,15 +194,9 @@ avail_dsds = 7; } - *cur_dsd++ = cpu_to_le32(sg_dma_address(cur_seg)); - *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg)); + *cur_dsd++ = cpu_to_le32(sg_dma_address(sg)); + *cur_dsd++ = cpu_to_le32(sg_dma_len(sg)); avail_dsds--; - - cur_seg++; - } - } else { - *cur_dsd++ = cpu_to_le32(sp->dma_handle); - *cur_dsd++ = cpu_to_le32(cmd->request_bufflen); } } @@ -224,6 +215,8 @@ uint32_t *cur_dsd; scsi_qla_host_t *ha; struct scsi_cmnd *cmd; + struct scatterlist *sg; + int i; cmd = sp->cmd; @@ -232,7 +225,7 @@ __constant_cpu_to_le32(COMMAND_A64_TYPE); /* No data transfer */ - if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { + if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { cmd_pkt->byte_count = __constant_cpu_to_le32(0); return; } @@ -246,13 +239,7 @@ cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address; /* Load data segments */ - if (cmd->use_sg != 0) { - struct scatterlist *cur_seg; - struct scatterlist *end_seg; - - cur_seg = (struct scatterlist *)cmd->request_buffer; - end_seg = cur_seg + tot_dsds; - while (cur_seg < end_seg) { + scsi_for_each_sg(cmd, sg, tot_dsds, i) { dma_addr_t sle_dma; cont_a64_entry_t *cont_pkt; @@ -267,18 +254,11 @@ avail_dsds = 5; } - sle_dma = sg_dma_address(cur_seg); + sle_dma = sg_dma_address(sg); *cur_dsd++ = cpu_to_le32(LSD(sle_dma)); *cur_dsd++ = cpu_to_le32(MSD(sle_dma)); - *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg)); + *cur_dsd++ = cpu_to_le32(sg_dma_len(sg)); avail_dsds--; - - cur_seg++; - } - } else { - *cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle)); - *cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle)); - *cur_dsd++ = cpu_to_le32(cmd->request_bufflen); } } @@ -291,7 +271,7 @@ int qla2x00_start_scsi(srb_t *sp) { - int ret; + int ret, nseg; unsigned long flags; scsi_qla_host_t *ha; struct scsi_cmnd *cmd; @@ -299,7 +279,6 @@ uint32_t index; uint32_t handle; cmd_entry_t *cmd_pkt; - struct scatterlist *sg; uint16_t cnt; uint16_t req_cnt; uint16_t tot_dsds; @@ -337,23 +316,10 @@ goto queuing_error; /* Map the sg table so we have an accurate count of sg entries needed */ - if (cmd->use_sg) { - sg = (struct scatterlist *) cmd->request_buffer; - tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg, - cmd->sc_data_direction); - if (tot_dsds == 0) + nseg = scsi_dma_map(cmd); + if (nseg < 0) goto queuing_error; - } else if (cmd->request_bufflen) { - dma_addr_t req_dma; - - req_dma = pci_map_single(ha->pdev, cmd->request_buffer, - cmd->request_bufflen, cmd->sc_data_direction); - if (dma_mapping_error(req_dma)) - goto queuing_error; - - sp->dma_handle = req_dma; - tot_dsds = 1; - } + tot_dsds = nseg; /* Calculate the number of request entries needed. */ req_cnt = ha->isp_ops.calc_req_entries(tot_dsds); @@ -391,7 +357,7 @@ /* Load SCSI command packet. */ memcpy(cmd_pkt->scsi_cdb, cmd->cmnd, cmd->cmd_len); - cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen); + cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd)); /* Build IOCB segments */ ha->isp_ops.build_iocbs(sp, cmd_pkt, tot_dsds); @@ -423,14 +389,9 @@ return (QLA_SUCCESS); queuing_error: - if (cmd->use_sg && tot_dsds) { - sg = (struct scatterlist *) cmd->request_buffer; - pci_unmap_sg(ha->pdev, sg, cmd->use_sg, - cmd->sc_data_direction); - } else if (tot_dsds) { - pci_unmap_single(ha->pdev, sp->dma_handle, - cmd->request_bufflen, cmd->sc_data_direction); - } + if (tot_dsds) + scsi_dma_unmap(cmd); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return (QLA_FUNCTION_FAILED); @@ -642,6 +603,8 @@ uint32_t *cur_dsd; scsi_qla_host_t *ha; struct scsi_cmnd *cmd; + struct scatterlist *sg; + int i; cmd = sp->cmd; @@ -650,7 +613,7 @@ __constant_cpu_to_le32(COMMAND_TYPE_7); /* No data transfer */ - if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { + if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { cmd_pkt->byte_count = __constant_cpu_to_le32(0); return; } @@ -670,13 +633,8 @@ cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address; /* Load data segments */ - if (cmd->use_sg != 0) { - struct scatterlist *cur_seg; - struct scatterlist *end_seg; - - cur_seg = (struct scatterlist *)cmd->request_buffer; - end_seg = cur_seg + tot_dsds; - while (cur_seg < end_seg) { + + scsi_for_each_sg(cmd, sg, tot_dsds, i) { dma_addr_t sle_dma; cont_a64_entry_t *cont_pkt; @@ -691,18 +649,11 @@ avail_dsds = 5; } - sle_dma = sg_dma_address(cur_seg); + sle_dma = sg_dma_address(sg); *cur_dsd++ = cpu_to_le32(LSD(sle_dma)); *cur_dsd++ = cpu_to_le32(MSD(sle_dma)); - *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg)); + *cur_dsd++ = cpu_to_le32(sg_dma_len(sg)); avail_dsds--; - - cur_seg++; - } - } else { - *cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle)); - *cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle)); - *cur_dsd++ = cpu_to_le32(cmd->request_bufflen); } } @@ -716,7 +667,7 @@ int qla24xx_start_scsi(srb_t *sp) { - int ret; + int ret, nseg; unsigned long flags; scsi_qla_host_t *ha; struct scsi_cmnd *cmd; @@ -724,7 +675,6 @@ uint32_t index; uint32_t handle; struct cmd_type_7 *cmd_pkt; - struct scatterlist *sg; uint16_t cnt; uint16_t req_cnt; uint16_t tot_dsds; @@ -762,23 +712,10 @@ goto queuing_error; /* Map the sg table so we have an accurate count of sg entries needed */ - if (cmd->use_sg) { - sg = (struct scatterlist *) cmd->request_buffer; - tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg, - cmd->sc_data_direction); - if (tot_dsds == 0) - goto queuing_error; - } else if (cmd->request_bufflen) { - dma_addr_t req_dma; - - req_dma = pci_map_single(ha->pdev, cmd->request_buffer, - cmd->request_bufflen, cmd->sc_data_direction); - if (dma_mapping_error(req_dma)) + nseg = scsi_dma_map(cmd); + if (nseg < 0) goto queuing_error; - - sp->dma_handle = req_dma; - tot_dsds = 1; - } + tot_dsds = nseg; req_cnt = qla24xx_calc_iocbs(tot_dsds); if (ha->req_q_cnt < (req_cnt + 2)) { @@ -821,7 +758,7 @@ memcpy(cmd_pkt->fcp_cdb, cmd->cmnd, cmd->cmd_len); host_to_fcp_swap(cmd_pkt->fcp_cdb, sizeof(cmd_pkt->fcp_cdb)); - cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen); + cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd)); /* Build IOCB segments */ qla24xx_build_scsi_iocbs(sp, cmd_pkt, tot_dsds); @@ -853,14 +790,9 @@ return QLA_SUCCESS; queuing_error: - if (cmd->use_sg && tot_dsds) { - sg = (struct scatterlist *) cmd->request_buffer; - pci_unmap_sg(ha->pdev, sg, cmd->use_sg, - cmd->sc_data_direction); - } else if (tot_dsds) { - pci_unmap_single(ha->pdev, sp->dma_handle, - cmd->request_bufflen, cmd->sc_data_direction); - } + if (tot_dsds) + scsi_dma_unmap(cmd); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_FUNCTION_FAILED; diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_isr.c --- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_isr.c 2007-12-21 15:36:12.000000000 -0500 @@ -889,11 +889,11 @@ } if (scsi_status & (SS_RESIDUAL_UNDER | SS_RESIDUAL_OVER)) { resid = resid_len; - cp->resid = resid; + scsi_set_resid(cp, resid); CMD_RESID_LEN(cp) = resid; if (!lscsi_status && - ((unsigned)(cp->request_bufflen - resid) < + ((unsigned)(scsi_bufflen(cp) - resid) < cp->underflow)) { qla_printk(KERN_INFO, ha, "scsi(%ld:%d:%d:%d): Mid-layer underflow " @@ -901,7 +901,7 @@ "error status.\n", ha->host_no, cp->device->channel, cp->device->id, cp->device->lun, resid, - cp->request_bufflen); + scsi_bufflen(cp)); cp->result = DID_ERROR << 16; break; @@ -963,7 +963,7 @@ resid = fw_resid_len; if (scsi_status & SS_RESIDUAL_UNDER) { - cp->resid = resid; + scsi_set_resid(cp, resid); CMD_RESID_LEN(cp) = resid; } else { DEBUG2(printk(KERN_INFO @@ -1046,14 +1046,14 @@ "retrying command.\n", ha->host_no, cp->device->channel, cp->device->id, cp->device->lun, resid, - cp->request_bufflen)); + scsi_bufflen(cp))); cp->result = DID_BUS_BUSY << 16; break; } /* Handle mid-layer underflow */ - if ((unsigned)(cp->request_bufflen - resid) < + if ((unsigned)(scsi_bufflen(cp) - resid) < cp->underflow) { qla_printk(KERN_INFO, ha, "scsi(%ld:%d:%d:%d): Mid-layer underflow " @@ -1061,7 +1061,7 @@ "error status.\n", ha->host_no, cp->device->channel, cp->device->id, cp->device->lun, resid, - cp->request_bufflen); + scsi_bufflen(cp)); cp->result = DID_ERROR << 16; break; @@ -1084,7 +1084,7 @@ DEBUG2(printk(KERN_INFO "PID=0x%lx req=0x%x xtra=0x%x -- returning DID_ERROR " "status!\n", - cp->serial_number, cp->request_bufflen, resid_len)); + cp->serial_number, scsi_bufflen(cp), resid_len)); cp->result = DID_ERROR << 16; break; @@ -1633,7 +1633,7 @@ uint16_t entry; uint16_t index; const char *name; - irqreturn_t (*handler)(int, void *); + irq_handler_t handler; }; static struct qla_init_msix_entry imsix_entries[QLA_MSIX_ENTRIES] = { diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_os.c --- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_os.c 2007-12-21 15:36:12.000000000 -0500 @@ -2426,13 +2426,7 @@ struct scsi_cmnd *cmd = sp->cmd; if (sp->flags & SRB_DMA_VALID) { - if (cmd->use_sg) { - dma_unmap_sg(&ha->pdev->dev, cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); - } else if (cmd->request_bufflen) { - dma_unmap_single(&ha->pdev->dev, sp->dma_handle, - cmd->request_bufflen, cmd->sc_data_direction); - } + scsi_dma_unmap(cmd); sp->flags &= ~SRB_DMA_VALID; } CMD_SP(cmd) = NULL; diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_dbg.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_dbg.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,176 +6,9 @@ */ #include "ql4_def.h" -#include - -#if 0 - -static void qla4xxx_print_srb_info(struct srb * srb) -{ - printk("%s: srb = 0x%p, flags=0x%02x\n", __func__, srb, srb->flags); - printk("%s: cmd = 0x%p, saved_dma_handle = 0x%lx\n", - __func__, srb->cmd, (unsigned long) srb->dma_handle); - printk("%s: fw_ddb_index = %d, lun = %d\n", - __func__, srb->fw_ddb_index, srb->cmd->device->lun); - printk("%s: iocb_tov = %d\n", - __func__, srb->iocb_tov); - printk("%s: cc_stat = 0x%x, r_start = 0x%lx, u_start = 0x%lx\n\n", - __func__, srb->cc_stat, srb->r_start, srb->u_start); -} - -void qla4xxx_print_scsi_cmd(struct scsi_cmnd *cmd) -{ - printk("SCSI Command = 0x%p, Handle=0x%p\n", cmd, cmd->host_scribble); - printk(" b=%d, t=%02xh, l=%02xh, cmd_len = %02xh\n", - cmd->device->channel, cmd->device->id, cmd->device->lun, - cmd->cmd_len); - scsi_print_command(cmd); - printk(" seg_cnt = %d\n", cmd->use_sg); - printk(" request buffer = 0x%p, request buffer len = 0x%x\n", - cmd->request_buffer, cmd->request_bufflen); - if (cmd->use_sg) { - struct scatterlist *sg; - sg = (struct scatterlist *)cmd->request_buffer; - printk(" SG buffer: \n"); - qla4xxx_dump_buffer((caddr_t) sg, - (cmd->use_sg * sizeof(*sg))); - } - printk(" tag = %d, transfersize = 0x%x \n", cmd->tag, - cmd->transfersize); - printk(" Pid = %d, SP = 0x%p\n", (int)cmd->pid, cmd->SCp.ptr); - printk(" underflow size = 0x%x, direction=0x%x\n", cmd->underflow, - cmd->sc_data_direction); - printk(" Current time (jiffies) = 0x%lx, " - "timeout expires = 0x%lx\n", jiffies, cmd->eh_timeout.expires); - qla4xxx_print_srb_info((struct srb *) cmd->SCp.ptr); -} - -void __dump_registers(struct scsi_qla_host *ha) -{ - uint8_t i; - for (i = 0; i < MBOX_REG_COUNT; i++) { - printk(KERN_INFO "0x%02X mailbox[%d] = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, mailbox[i]), i, - readw(&ha->reg->mailbox[i])); - } - printk(KERN_INFO "0x%02X flash_address = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, flash_address), - readw(&ha->reg->flash_address)); - printk(KERN_INFO "0x%02X flash_data = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, flash_data), - readw(&ha->reg->flash_data)); - printk(KERN_INFO "0x%02X ctrl_status = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, ctrl_status), - readw(&ha->reg->ctrl_status)); - if (is_qla4010(ha)) { - printk(KERN_INFO "0x%02X nvram = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, u1.isp4010.nvram), - readw(&ha->reg->u1.isp4010.nvram)); - } - - else if (is_qla4022(ha) | is_qla4032(ha)) { - printk(KERN_INFO "0x%02X intr_mask = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u1.isp4022.intr_mask), - readw(&ha->reg->u1.isp4022.intr_mask)); - printk(KERN_INFO "0x%02X nvram = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, u1.isp4022.nvram), - readw(&ha->reg->u1.isp4022.nvram)); - printk(KERN_INFO "0x%02X semaphore = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u1.isp4022.semaphore), - readw(&ha->reg->u1.isp4022.semaphore)); - } - printk(KERN_INFO "0x%02X req_q_in = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, req_q_in), - readw(&ha->reg->req_q_in)); - printk(KERN_INFO "0x%02X rsp_q_out = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, rsp_q_out), - readw(&ha->reg->rsp_q_out)); - if (is_qla4010(ha)) { - printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4010.ext_hw_conf), - readw(&ha->reg->u2.isp4010.ext_hw_conf)); - printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4010.port_ctrl), - readw(&ha->reg->u2.isp4010.port_ctrl)); - printk(KERN_INFO "0x%02X port_status = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4010.port_status), - readw(&ha->reg->u2.isp4010.port_status)); - printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4010.req_q_out), - readw(&ha->reg->u2.isp4010.req_q_out)); - printk(KERN_INFO "0x%02X gp_out = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_out), - readw(&ha->reg->u2.isp4010.gp_out)); - printk(KERN_INFO "0x%02X gp_in = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_in), - readw(&ha->reg->u2.isp4010.gp_in)); - printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4010.port_err_status), - readw(&ha->reg->u2.isp4010.port_err_status)); - } - - else if (is_qla4022(ha) | is_qla4032(ha)) { - printk(KERN_INFO "Page 0 Registers:\n"); - printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4022.p0.ext_hw_conf), - readw(&ha->reg->u2.isp4022.p0.ext_hw_conf)); - printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4022.p0.port_ctrl), - readw(&ha->reg->u2.isp4022.p0.port_ctrl)); - printk(KERN_INFO "0x%02X port_status = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4022.p0.port_status), - readw(&ha->reg->u2.isp4022.p0.port_status)); - printk(KERN_INFO "0x%02X gp_out = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4022.p0.gp_out), - readw(&ha->reg->u2.isp4022.p0.gp_out)); - printk(KERN_INFO "0x%02X gp_in = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, u2.isp4022.p0.gp_in), - readw(&ha->reg->u2.isp4022.p0.gp_in)); - printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4022.p0.port_err_status), - readw(&ha->reg->u2.isp4022.p0.port_err_status)); - printk(KERN_INFO "Page 1 Registers:\n"); - writel(HOST_MEM_CFG_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT), - &ha->reg->ctrl_status); - printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n", - (uint8_t) offsetof(struct isp_reg, - u2.isp4022.p1.req_q_out), - readw(&ha->reg->u2.isp4022.p1.req_q_out)); - writel(PORT_CTRL_STAT_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT), - &ha->reg->ctrl_status); - } -} - -void qla4xxx_dump_mbox_registers(struct scsi_qla_host *ha) -{ - unsigned long flags = 0; - int i = 0; - spin_lock_irqsave(&ha->hardware_lock, flags); - for (i = 1; i < MBOX_REG_COUNT; i++) - printk(KERN_INFO " Mailbox[%d] = %08x\n", i, - readw(&ha->reg->mailbox[i])); - spin_unlock_irqrestore(&ha->hardware_lock, flags); -} - -void qla4xxx_dump_registers(struct scsi_qla_host *ha) -{ - unsigned long flags = 0; - spin_lock_irqsave(&ha->hardware_lock, flags); - __dump_registers(ha); - spin_unlock_irqrestore(&ha->hardware_lock, flags); -} +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" void qla4xxx_dump_buffer(void *b, uint32_t size) { @@ -198,4 +31,3 @@ printk(KERN_DEBUG "\n"); } -#endif /* 0 */ diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_def.h --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_def.h 2007-12-21 15:36:12.000000000 -0500 @@ -122,8 +122,7 @@ #define ISCSI_IPADDR_SIZE 4 /* IP address size */ #define ISCSI_ALIAS_SIZE 32 /* ISCSI Alais name size */ -#define ISCSI_NAME_SIZE 255 /* ISCSI Name size - - * usually a string */ +#define ISCSI_NAME_SIZE 0xE0 /* ISCSI Name size */ #define LSDW(x) ((u32)((u64)(x))) #define MSDW(x) ((u32)((((u64)(x)) >> 16) >> 16)) @@ -187,7 +186,19 @@ u_long u_start; /* Time when we handed the cmd to F/W */ }; - /* +/* + * Asynchronous Event Queue structure + */ +struct aen { + uint32_t mbox_sts[MBOX_AEN_REG_COUNT]; +}; + +struct ql4_aen_log { + int count; + struct aen entry[MAX_AEN_ENTRIES]; +}; + +/* * Device Database (DDB) structure */ struct ddb_entry { @@ -254,13 +265,6 @@ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ #define DF_FO_MASKED 3 -/* - * Asynchronous Event Queue structure - */ -struct aen { - uint32_t mbox_sts[MBOX_AEN_REG_COUNT]; -}; - #include "ql4_fw.h" #include "ql4_nvram.h" @@ -270,20 +274,17 @@ */ struct scsi_qla_host { /* Linux adapter configuration data */ - struct Scsi_Host *host; /* pointer to host data */ - uint32_t tot_ddbs; unsigned long flags; #define AF_ONLINE 0 /* 0x00000001 */ #define AF_INIT_DONE 1 /* 0x00000002 */ #define AF_MBOX_COMMAND 2 /* 0x00000004 */ #define AF_MBOX_COMMAND_DONE 3 /* 0x00000008 */ -#define AF_INTERRUPTS_ON 6 /* 0x00000040 Not Used */ +#define AF_INTERRUPTS_ON 6 /* 0x00000040 */ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ #define AF_LINK_UP 8 /* 0x00000100 */ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ -#define AF_ISNS_CMD_IN_PROCESS 12 /* 0x00001000 */ -#define AF_ISNS_CMD_DONE 13 /* 0x00002000 */ +#define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ unsigned long dpc_flags; @@ -296,6 +297,9 @@ #define DPC_AEN 9 /* 0x00000200 */ #define DPC_GET_DHCP_IP_ADDR 15 /* 0x00008000 */ + struct Scsi_Host *host; /* pointer to host data */ + uint32_t tot_ddbs; + uint16_t iocb_cnt; uint16_t iocb_hiwat; @@ -344,6 +348,7 @@ uint32_t firmware_version[2]; uint32_t patch_number; uint32_t build_number; + uint32_t board_id; /* --- From Init_FW --- */ /* init_cb_t *init_cb; */ @@ -363,7 +368,6 @@ /* --- From GetFwState --- */ uint32_t firmware_state; - uint32_t board_id; uint32_t addl_fw_state; /* Linux kernel thread */ @@ -414,6 +418,8 @@ uint16_t aen_out; struct aen aen_q[MAX_AEN_ENTRIES]; + struct ql4_aen_log aen_log;/* tracks all aens */ + /* This mutex protects several threads to do mailbox commands * concurrently. */ @@ -585,10 +591,4 @@ #define FLUSH_DDB_CHANGED_AENS 1 #define RELOGIN_DDB_CHANGED_AENS 2 -#include "ql4_version.h" -#include "ql4_glbl.h" -#include "ql4_dbg.h" -#include "ql4_inline.h" - - #endif /*_QLA4XXX_H */ diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_fw.h --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_fw.h 2007-12-21 15:36:12.000000000 -0500 @@ -20,143 +20,23 @@ *************************************************************************/ struct port_ctrl_stat_regs { - __le32 ext_hw_conf; /* 80 x50 R/W */ - __le32 intChipConfiguration; /* 84 x54 */ - __le32 port_ctrl; /* 88 x58 */ - __le32 port_status; /* 92 x5c */ - __le32 HostPrimMACHi; /* 96 x60 */ - __le32 HostPrimMACLow; /* 100 x64 */ - __le32 HostSecMACHi; /* 104 x68 */ - __le32 HostSecMACLow; /* 108 x6c */ - __le32 EPPrimMACHi; /* 112 x70 */ - __le32 EPPrimMACLow; /* 116 x74 */ - __le32 EPSecMACHi; /* 120 x78 */ - __le32 EPSecMACLow; /* 124 x7c */ - __le32 HostPrimIPHi; /* 128 x80 */ - __le32 HostPrimIPMidHi; /* 132 x84 */ - __le32 HostPrimIPMidLow; /* 136 x88 */ - __le32 HostPrimIPLow; /* 140 x8c */ - __le32 HostSecIPHi; /* 144 x90 */ - __le32 HostSecIPMidHi; /* 148 x94 */ - __le32 HostSecIPMidLow; /* 152 x98 */ - __le32 HostSecIPLow; /* 156 x9c */ - __le32 EPPrimIPHi; /* 160 xa0 */ - __le32 EPPrimIPMidHi; /* 164 xa4 */ - __le32 EPPrimIPMidLow; /* 168 xa8 */ - __le32 EPPrimIPLow; /* 172 xac */ - __le32 EPSecIPHi; /* 176 xb0 */ - __le32 EPSecIPMidHi; /* 180 xb4 */ - __le32 EPSecIPMidLow; /* 184 xb8 */ - __le32 EPSecIPLow; /* 188 xbc */ - __le32 IPReassemblyTimeout; /* 192 xc0 */ - __le32 EthMaxFramePayload; /* 196 xc4 */ - __le32 TCPMaxWindowSize; /* 200 xc8 */ - __le32 TCPCurrentTimestampHi; /* 204 xcc */ - __le32 TCPCurrentTimestampLow; /* 208 xd0 */ - __le32 LocalRAMAddress; /* 212 xd4 */ - __le32 LocalRAMData; /* 216 xd8 */ - __le32 PCSReserved1; /* 220 xdc */ - __le32 gp_out; /* 224 xe0 */ - __le32 gp_in; /* 228 xe4 */ - __le32 ProbeMuxAddr; /* 232 xe8 */ - __le32 ProbeMuxData; /* 236 xec */ - __le32 ERMQueueBaseAddr0; /* 240 xf0 */ - __le32 ERMQueueBaseAddr1; /* 244 xf4 */ - __le32 MACConfiguration; /* 248 xf8 */ - __le32 port_err_status; /* 252 xfc COR */ + __le32 ext_hw_conf; /* 0x50 R/W */ + __le32 rsrvd0; /* 0x54 */ + __le32 port_ctrl; /* 0x58 */ + __le32 port_status; /* 0x5c */ + __le32 rsrvd1[32]; /* 0x60-0xdf */ + __le32 gp_out; /* 0xe0 */ + __le32 gp_in; /* 0xe4 */ + __le32 rsrvd2[5]; /* 0xe8-0xfb */ + __le32 port_err_status; /* 0xfc */ }; struct host_mem_cfg_regs { - __le32 NetRequestQueueOut; /* 80 x50 */ - __le32 NetRequestQueueOutAddrHi; /* 84 x54 */ - __le32 NetRequestQueueOutAddrLow; /* 88 x58 */ - __le32 NetRequestQueueBaseAddrHi; /* 92 x5c */ - __le32 NetRequestQueueBaseAddrLow; /* 96 x60 */ - __le32 NetRequestQueueLength; /* 100 x64 */ - __le32 NetResponseQueueIn; /* 104 x68 */ - __le32 NetResponseQueueInAddrHi; /* 108 x6c */ - __le32 NetResponseQueueInAddrLow; /* 112 x70 */ - __le32 NetResponseQueueBaseAddrHi; /* 116 x74 */ - __le32 NetResponseQueueBaseAddrLow; /* 120 x78 */ - __le32 NetResponseQueueLength; /* 124 x7c */ - __le32 req_q_out; /* 128 x80 */ - __le32 RequestQueueOutAddrHi; /* 132 x84 */ - __le32 RequestQueueOutAddrLow; /* 136 x88 */ - __le32 RequestQueueBaseAddrHi; /* 140 x8c */ - __le32 RequestQueueBaseAddrLow; /* 144 x90 */ - __le32 RequestQueueLength; /* 148 x94 */ - __le32 ResponseQueueIn; /* 152 x98 */ - __le32 ResponseQueueInAddrHi; /* 156 x9c */ - __le32 ResponseQueueInAddrLow; /* 160 xa0 */ - __le32 ResponseQueueBaseAddrHi; /* 164 xa4 */ - __le32 ResponseQueueBaseAddrLow; /* 168 xa8 */ - __le32 ResponseQueueLength; /* 172 xac */ - __le32 NetRxLargeBufferQueueOut; /* 176 xb0 */ - __le32 NetRxLargeBufferQueueBaseAddrHi; /* 180 xb4 */ - __le32 NetRxLargeBufferQueueBaseAddrLow; /* 184 xb8 */ - __le32 NetRxLargeBufferQueueLength; /* 188 xbc */ - __le32 NetRxLargeBufferLength; /* 192 xc0 */ - __le32 NetRxSmallBufferQueueOut; /* 196 xc4 */ - __le32 NetRxSmallBufferQueueBaseAddrHi; /* 200 xc8 */ - __le32 NetRxSmallBufferQueueBaseAddrLow; /* 204 xcc */ - __le32 NetRxSmallBufferQueueLength; /* 208 xd0 */ - __le32 NetRxSmallBufferLength; /* 212 xd4 */ - __le32 HMCReserved0[10]; /* 216 xd8 */ -}; - -struct local_ram_cfg_regs { - __le32 BufletSize; /* 80 x50 */ - __le32 BufletMaxCount; /* 84 x54 */ - __le32 BufletCurrCount; /* 88 x58 */ - __le32 BufletPauseThresholdCount; /* 92 x5c */ - __le32 BufletTCPWinThresholdHi; /* 96 x60 */ - __le32 BufletTCPWinThresholdLow; /* 100 x64 */ - __le32 IPHashTableBaseAddr; /* 104 x68 */ - __le32 IPHashTableSize; /* 108 x6c */ - __le32 TCPHashTableBaseAddr; /* 112 x70 */ - __le32 TCPHashTableSize; /* 116 x74 */ - __le32 NCBAreaBaseAddr; /* 120 x78 */ - __le32 NCBMaxCount; /* 124 x7c */ - __le32 NCBCurrCount; /* 128 x80 */ - __le32 DRBAreaBaseAddr; /* 132 x84 */ - __le32 DRBMaxCount; /* 136 x88 */ - __le32 DRBCurrCount; /* 140 x8c */ - __le32 LRCReserved[28]; /* 144 x90 */ -}; - -struct prot_stat_regs { - __le32 MACTxFrameCount; /* 80 x50 R */ - __le32 MACTxByteCount; /* 84 x54 R */ - __le32 MACRxFrameCount; /* 88 x58 R */ - __le32 MACRxByteCount; /* 92 x5c R */ - __le32 MACCRCErrCount; /* 96 x60 R */ - __le32 MACEncErrCount; /* 100 x64 R */ - __le32 MACRxLengthErrCount; /* 104 x68 R */ - __le32 IPTxPacketCount; /* 108 x6c R */ - __le32 IPTxByteCount; /* 112 x70 R */ - __le32 IPTxFragmentCount; /* 116 x74 R */ - __le32 IPRxPacketCount; /* 120 x78 R */ - __le32 IPRxByteCount; /* 124 x7c R */ - __le32 IPRxFragmentCount; /* 128 x80 R */ - __le32 IPDatagramReassemblyCount; /* 132 x84 R */ - __le32 IPV6RxPacketCount; /* 136 x88 R */ - __le32 IPErrPacketCount; /* 140 x8c R */ - __le32 IPReassemblyErrCount; /* 144 x90 R */ - __le32 TCPTxSegmentCount; /* 148 x94 R */ - __le32 TCPTxByteCount; /* 152 x98 R */ - __le32 TCPRxSegmentCount; /* 156 x9c R */ - __le32 TCPRxByteCount; /* 160 xa0 R */ - __le32 TCPTimerExpCount; /* 164 xa4 R */ - __le32 TCPRxAckCount; /* 168 xa8 R */ - __le32 TCPTxAckCount; /* 172 xac R */ - __le32 TCPRxErrOOOCount; /* 176 xb0 R */ - __le32 PSReserved0; /* 180 xb4 */ - __le32 TCPRxWindowProbeUpdateCount; /* 184 xb8 R */ - __le32 ECCErrCorrectionCount; /* 188 xbc R */ - __le32 PSReserved1[16]; /* 192 xc0 */ + __le32 rsrvd0[12]; /* 0x50-0x79 */ + __le32 req_q_out; /* 0x80 */ + __le32 rsrvd1[31]; /* 0x84-0xFF */ }; - /* remote register set (access via PCI memory read/write) */ struct isp_reg { #define MBOX_REG_COUNT 8 @@ -207,11 +87,7 @@ union { struct port_ctrl_stat_regs p0; struct host_mem_cfg_regs p1; - struct local_ram_cfg_regs p2; - struct prot_stat_regs p3; - __le32 r_union[44]; }; - } __attribute__ ((packed)) isp4022; } u2; }; /* 256 x100 */ @@ -296,6 +172,7 @@ /* ISP Semaphore definitions */ /* ISP General Purpose Output definitions */ +#define GPOR_TOPCAT_RESET 0x00000004 /* shadow registers (DMA'd from HA to system memory. read only) */ struct shadow_regs { @@ -337,6 +214,7 @@ /* Mailbox command definitions */ #define MBOX_CMD_ABOUT_FW 0x0009 +#define MBOX_CMD_PING 0x000B #define MBOX_CMD_LUN_RESET 0x0016 #define MBOX_CMD_GET_MANAGEMENT_DATA 0x001E #define MBOX_CMD_GET_FW_STATUS 0x001F @@ -364,6 +242,17 @@ #define MBOX_CMD_GET_FW_STATE 0x0069 #define MBOX_CMD_GET_INIT_FW_CTRL_BLOCK_DEFAULTS 0x006A #define MBOX_CMD_RESTORE_FACTORY_DEFAULTS 0x0087 +#define MBOX_CMD_SET_ACB 0x0088 +#define MBOX_CMD_GET_ACB 0x0089 +#define MBOX_CMD_DISABLE_ACB 0x008A +#define MBOX_CMD_GET_IPV6_NEIGHBOR_CACHE 0x008B +#define MBOX_CMD_GET_IPV6_DEST_CACHE 0x008C +#define MBOX_CMD_GET_IPV6_DEF_ROUTER_LIST 0x008D +#define MBOX_CMD_GET_IPV6_LCL_PREFIX_LIST 0x008E +#define MBOX_CMD_SET_IPV6_NEIGHBOR_CACHE 0x0090 +#define MBOX_CMD_GET_IP_ADDR_STATE 0x0091 +#define MBOX_CMD_SEND_IPV6_ROUTER_SOL 0x0092 +#define MBOX_CMD_GET_DB_ENTRY_CURRENT_IP_ADDR 0x0093 /* Mailbox 1 */ #define FW_STATE_READY 0x0000 @@ -409,6 +298,16 @@ #define MBOX_ASTS_DHCP_LEASE_EXPIRED 0x801D #define MBOX_ASTS_DHCP_LEASE_ACQUIRED 0x801F #define MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED 0x8021 +#define MBOX_ASTS_DUPLICATE_IP 0x8025 +#define MBOX_ASTS_ARP_COMPLETE 0x8026 +#define MBOX_ASTS_SUBNET_STATE_CHANGE 0x8027 +#define MBOX_ASTS_RESPONSE_QUEUE_FULL 0x8028 +#define MBOX_ASTS_IP_ADDR_STATE_CHANGED 0x8029 +#define MBOX_ASTS_IPV6_PREFIX_EXPIRED 0x802B +#define MBOX_ASTS_IPV6_ND_PREFIX_IGNORED 0x802C +#define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D +#define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E + #define ISNS_EVENT_DATA_RECEIVED 0x0000 #define ISNS_EVENT_CONNECTION_OPENED 0x0001 #define ISNS_EVENT_CONNECTION_FAILED 0x0002 @@ -418,137 +317,166 @@ /*************************************************************************/ /* Host Adapter Initialization Control Block (from host) */ -struct init_fw_ctrl_blk { - uint8_t Version; /* 00 */ - uint8_t Control; /* 01 */ +struct addr_ctrl_blk { + uint8_t version; /* 00 */ + uint8_t control; /* 01 */ - uint16_t FwOptions; /* 02-03 */ + uint16_t fw_options; /* 02-03 */ #define FWOPT_HEARTBEAT_ENABLE 0x1000 #define FWOPT_SESSION_MODE 0x0040 #define FWOPT_INITIATOR_MODE 0x0020 #define FWOPT_TARGET_MODE 0x0010 - uint16_t ExecThrottle; /* 04-05 */ - uint8_t RetryCount; /* 06 */ - uint8_t RetryDelay; /* 07 */ - uint16_t MaxEthFrPayloadSize; /* 08-09 */ - uint16_t AddFwOptions; /* 0A-0B */ - - uint8_t HeartbeatInterval; /* 0C */ - uint8_t InstanceNumber; /* 0D */ - uint16_t RES2; /* 0E-0F */ - uint16_t ReqQConsumerIndex; /* 10-11 */ - uint16_t ComplQProducerIndex; /* 12-13 */ - uint16_t ReqQLen; /* 14-15 */ - uint16_t ComplQLen; /* 16-17 */ - uint32_t ReqQAddrLo; /* 18-1B */ - uint32_t ReqQAddrHi; /* 1C-1F */ - uint32_t ComplQAddrLo; /* 20-23 */ - uint32_t ComplQAddrHi; /* 24-27 */ - uint32_t ShadowRegBufAddrLo; /* 28-2B */ - uint32_t ShadowRegBufAddrHi; /* 2C-2F */ - - uint16_t iSCSIOptions; /* 30-31 */ - - uint16_t TCPOptions; /* 32-33 */ - - uint16_t IPOptions; /* 34-35 */ - - uint16_t MaxPDUSize; /* 36-37 */ - uint16_t RcvMarkerInt; /* 38-39 */ - uint16_t SndMarkerInt; /* 3A-3B */ - uint16_t InitMarkerlessInt; /* 3C-3D */ - uint16_t FirstBurstSize; /* 3E-3F */ - uint16_t DefaultTime2Wait; /* 40-41 */ - uint16_t DefaultTime2Retain; /* 42-43 */ - uint16_t MaxOutStndngR2T; /* 44-45 */ - uint16_t KeepAliveTimeout; /* 46-47 */ - uint16_t PortNumber; /* 48-49 */ - uint16_t MaxBurstSize; /* 4A-4B */ - uint32_t RES4; /* 4C-4F */ - uint8_t IPAddr[4]; /* 50-53 */ - uint8_t RES5[12]; /* 54-5F */ - uint8_t SubnetMask[4]; /* 60-63 */ - uint8_t RES6[12]; /* 64-6F */ - uint8_t GatewayIPAddr[4]; /* 70-73 */ - uint8_t RES7[12]; /* 74-7F */ - uint8_t PriDNSIPAddr[4]; /* 80-83 */ - uint8_t SecDNSIPAddr[4]; /* 84-87 */ - uint8_t RES8[8]; /* 88-8F */ - uint8_t Alias[32]; /* 90-AF */ - uint8_t TargAddr[8]; /* B0-B7 *//* /FIXME: Remove?? */ - uint8_t CHAPNameSecretsTable[8]; /* B8-BF */ - uint8_t EthernetMACAddr[6]; /* C0-C5 */ - uint16_t TargetPortalGroup; /* C6-C7 */ - uint8_t SendScale; /* C8 */ - uint8_t RecvScale; /* C9 */ - uint8_t TypeOfService; /* CA */ - uint8_t Time2Live; /* CB */ - uint16_t VLANPriority; /* CC-CD */ - uint16_t Reserved8; /* CE-CF */ - uint8_t SecIPAddr[4]; /* D0-D3 */ - uint8_t Reserved9[12]; /* D4-DF */ - uint8_t iSNSIPAddr[4]; /* E0-E3 */ - uint16_t iSNSServerPortNumber; /* E4-E5 */ - uint8_t Reserved10[10]; /* E6-EF */ - uint8_t SLPDAIPAddr[4]; /* F0-F3 */ - uint8_t Reserved11[12]; /* F4-FF */ - uint8_t iSCSINameString[256]; /* 100-1FF */ + uint16_t exec_throttle; /* 04-05 */ + uint8_t zio_count; /* 06 */ + uint8_t res0; /* 07 */ + uint16_t eth_mtu_size; /* 08-09 */ + uint16_t add_fw_options; /* 0A-0B */ + + uint8_t hb_interval; /* 0C */ + uint8_t inst_num; /* 0D */ + uint16_t res1; /* 0E-0F */ + uint16_t rqq_consumer_idx; /* 10-11 */ + uint16_t compq_producer_idx; /* 12-13 */ + uint16_t rqq_len; /* 14-15 */ + uint16_t compq_len; /* 16-17 */ + uint32_t rqq_addr_lo; /* 18-1B */ + uint32_t rqq_addr_hi; /* 1C-1F */ + uint32_t compq_addr_lo; /* 20-23 */ + uint32_t compq_addr_hi; /* 24-27 */ + uint32_t shdwreg_addr_lo; /* 28-2B */ + uint32_t shdwreg_addr_hi; /* 2C-2F */ + + uint16_t iscsi_opts; /* 30-31 */ + uint16_t ipv4_tcp_opts; /* 32-33 */ + uint16_t ipv4_ip_opts; /* 34-35 */ + + uint16_t iscsi_max_pdu_size; /* 36-37 */ + uint8_t ipv4_tos; /* 38 */ + uint8_t ipv4_ttl; /* 39 */ + uint8_t acb_version; /* 3A */ + uint8_t res2; /* 3B */ + uint16_t def_timeout; /* 3C-3D */ + uint16_t iscsi_fburst_len; /* 3E-3F */ + uint16_t iscsi_def_time2wait; /* 40-41 */ + uint16_t iscsi_def_time2retain; /* 42-43 */ + uint16_t iscsi_max_outstnd_r2t; /* 44-45 */ + uint16_t conn_ka_timeout; /* 46-47 */ + uint16_t ipv4_port; /* 48-49 */ + uint16_t iscsi_max_burst_len; /* 4A-4B */ + uint32_t res5; /* 4C-4F */ + uint8_t ipv4_addr[4]; /* 50-53 */ + uint16_t ipv4_vlan_tag; /* 54-55 */ + uint8_t ipv4_addr_state; /* 56 */ + uint8_t ipv4_cacheid; /* 57 */ + uint8_t res6[8]; /* 58-5F */ + uint8_t ipv4_subnet[4]; /* 60-63 */ + uint8_t res7[12]; /* 64-6F */ + uint8_t ipv4_gw_addr[4]; /* 70-73 */ + uint8_t res8[0xc]; /* 74-7F */ + uint8_t pri_dns_srvr_ip[4];/* 80-83 */ + uint8_t sec_dns_srvr_ip[4];/* 84-87 */ + uint16_t min_eph_port; /* 88-89 */ + uint16_t max_eph_port; /* 8A-8B */ + uint8_t res9[4]; /* 8C-8F */ + uint8_t iscsi_alias[32];/* 90-AF */ + uint8_t res9_1[0x16]; /* B0-C5 */ + uint16_t tgt_portal_grp;/* C6-C7 */ + uint8_t abort_timer; /* C8 */ + uint8_t ipv4_tcp_wsf; /* C9 */ + uint8_t res10[6]; /* CA-CF */ + uint8_t ipv4_sec_ip_addr[4]; /* D0-D3 */ + uint8_t ipv4_dhcp_vid_len; /* D4 */ + uint8_t ipv4_dhcp_vid[11]; /* D5-DF */ + uint8_t res11[20]; /* E0-F3 */ + uint8_t ipv4_dhcp_alt_cid_len; /* F4 */ + uint8_t ipv4_dhcp_alt_cid[11]; /* F5-FF */ + uint8_t iscsi_name[224]; /* 100-1DF */ + uint8_t res12[32]; /* 1E0-1FF */ + uint32_t cookie; /* 200-203 */ + uint16_t ipv6_port; /* 204-205 */ + uint16_t ipv6_opts; /* 206-207 */ + uint16_t ipv6_addtl_opts; /* 208-209 */ + uint16_t ipv6_tcp_opts; /* 20A-20B */ + uint8_t ipv6_tcp_wsf; /* 20C */ + uint16_t ipv6_flow_lbl; /* 20D-20F */ + uint8_t ipv6_gw_addr[16]; /* 210-21F */ + uint16_t ipv6_vlan_tag; /* 220-221 */ + uint8_t ipv6_lnk_lcl_addr_state;/* 222 */ + uint8_t ipv6_addr0_state; /* 223 */ + uint8_t ipv6_addr1_state; /* 224 */ + uint8_t ipv6_gw_state; /* 225 */ + uint8_t ipv6_traffic_class; /* 226 */ + uint8_t ipv6_hop_limit; /* 227 */ + uint8_t ipv6_if_id[8]; /* 228-22F */ + uint8_t ipv6_addr0[16]; /* 230-23F */ + uint8_t ipv6_addr1[16]; /* 240-24F */ + uint32_t ipv6_nd_reach_time; /* 250-253 */ + uint32_t ipv6_nd_rexmit_timer; /* 254-257 */ + uint32_t ipv6_nd_stale_timeout; /* 258-25B */ + uint8_t ipv6_dup_addr_detect_count; /* 25C */ + uint8_t ipv6_cache_id; /* 25D */ + uint8_t res13[18]; /* 25E-26F */ + uint32_t ipv6_gw_advrt_mtu; /* 270-273 */ + uint8_t res14[140]; /* 274-2FF */ +}; + +struct init_fw_ctrl_blk { + struct addr_ctrl_blk pri; + struct addr_ctrl_blk sec; }; /*************************************************************************/ struct dev_db_entry { - uint8_t options; /* 00 */ + uint16_t options; /* 00-01 */ #define DDB_OPT_DISC_SESSION 0x10 #define DDB_OPT_TARGET 0x02 /* device is a target */ - uint8_t control; /* 01 */ - - uint16_t exeThrottle; /* 02-03 */ - uint16_t exeCount; /* 04-05 */ - uint8_t retryCount; /* 06 */ - uint8_t retryDelay; /* 07 */ - uint16_t iSCSIOptions; /* 08-09 */ - - uint16_t TCPOptions; /* 0A-0B */ - - uint16_t IPOptions; /* 0C-0D */ - - uint16_t maxPDUSize; /* 0E-0F */ - uint16_t rcvMarkerInt; /* 10-11 */ - uint16_t sndMarkerInt; /* 12-13 */ - uint16_t iSCSIMaxSndDataSegLen; /* 14-15 */ - uint16_t firstBurstSize; /* 16-17 */ - uint16_t minTime2Wait; /* 18-19 : RA :default_time2wait */ - uint16_t maxTime2Retain; /* 1A-1B */ - uint16_t maxOutstndngR2T; /* 1C-1D */ - uint16_t keepAliveTimeout; /* 1E-1F */ - uint8_t ISID[6]; /* 20-25 big-endian, must be converted + uint16_t exec_throttle; /* 02-03 */ + uint16_t exec_count; /* 04-05 */ + uint16_t res0; /* 06-07 */ + uint16_t iscsi_options; /* 08-09 */ + uint16_t tcp_options; /* 0A-0B */ + uint16_t ip_options; /* 0C-0D */ + uint16_t iscsi_max_rcv_data_seg_len; /* 0E-0F */ + uint32_t res1; /* 10-13 */ + uint16_t iscsi_max_snd_data_seg_len; /* 14-15 */ + uint16_t iscsi_first_burst_len; /* 16-17 */ + uint16_t iscsi_def_time2wait; /* 18-19 */ + uint16_t iscsi_def_time2retain; /* 1A-1B */ + uint16_t iscsi_max_outsnd_r2t; /* 1C-1D */ + uint16_t ka_timeout; /* 1E-1F */ + uint8_t isid[6]; /* 20-25 big-endian, must be converted * to little-endian */ - uint16_t TSID; /* 26-27 */ - uint16_t portNumber; /* 28-29 */ - uint16_t maxBurstSize; /* 2A-2B */ - uint16_t taskMngmntTimeout; /* 2C-2D */ - uint16_t reserved1; /* 2E-2F */ - uint8_t ipAddr[0x10]; /* 30-3F */ - uint8_t iSCSIAlias[0x20]; /* 40-5F */ - uint8_t targetAddr[0x20]; /* 60-7F */ - uint8_t userID[0x20]; /* 80-9F */ - uint8_t password[0x20]; /* A0-BF */ - uint8_t iscsiName[0x100]; /* C0-1BF : xxzzy Make this a + uint16_t tsid; /* 26-27 */ + uint16_t port; /* 28-29 */ + uint16_t iscsi_max_burst_len; /* 2A-2B */ + uint16_t def_timeout; /* 2C-2D */ + uint16_t res2; /* 2E-2F */ + uint8_t ip_addr[0x10]; /* 30-3F */ + uint8_t iscsi_alias[0x20]; /* 40-5F */ + uint8_t tgt_addr[0x20]; /* 60-7F */ + uint16_t mss; /* 80-81 */ + uint16_t res3; /* 82-83 */ + uint16_t lcl_port; /* 84-85 */ + uint8_t ipv4_tos; /* 86 */ + uint16_t ipv6_flow_lbl; /* 87-89 */ + uint8_t res4[0x36]; /* 8A-BF */ + uint8_t iscsi_name[0xE0]; /* C0-19F : xxzzy Make this a * pointer to a string so we * don't have to reserve soooo * much RAM */ - uint16_t ddbLink; /* 1C0-1C1 */ - uint16_t CHAPTableIndex; /* 1C2-1C3 */ - uint16_t TargetPortalGroup; /* 1C4-1C5 */ - uint16_t reserved2[2]; /* 1C6-1C7 */ - uint32_t statSN; /* 1C8-1CB */ - uint32_t expStatSN; /* 1CC-1CF */ - uint16_t reserved3[0x2C]; /* 1D0-1FB */ - uint16_t ddbValidCookie; /* 1FC-1FD */ - uint16_t ddbValidSize; /* 1FE-1FF */ + uint8_t ipv6_addr[0x10];/* 1A0-1AF */ + uint8_t res5[0x10]; /* 1B0-1BF */ + uint16_t ddb_link; /* 1C0-1C1 */ + uint16_t chap_tbl_idx; /* 1C2-1C3 */ + uint16_t tgt_portal_grp; /* 1C4-1C5 */ + uint8_t tcp_xmt_wsf; /* 1C6 */ + uint8_t tcp_rcv_wsf; /* 1C7 */ + uint32_t stat_sn; /* 1C8-1CB */ + uint32_t exp_stat_sn; /* 1CC-1CF */ + uint8_t res6[0x30]; /* 1D0-1FF */ }; /*************************************************************************/ diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_glbl.h --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_glbl.h 2007-12-21 15:36:12.000000000 -0500 @@ -8,6 +8,9 @@ #ifndef __QLA4x_GBL_H #define __QLA4x_GBL_H +struct iscsi_cls_conn; + +void qla4xxx_hw_reset(struct scsi_qla_host *ha); int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a); int qla4xxx_send_tgts(struct scsi_qla_host *ha, char *ip, uint16_t port); int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb); @@ -58,11 +61,13 @@ void qla4xxx_interrupt_service_routine(struct scsi_qla_host * ha, uint32_t intr_status); int qla4xxx_init_rings(struct scsi_qla_host * ha); -struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha, uint32_t index); +struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha, + uint32_t index); void qla4xxx_srb_compl(struct scsi_qla_host *ha, struct srb *srb); int qla4xxx_reinitialize_ddb_list(struct scsi_qla_host * ha); int qla4xxx_process_ddb_changed(struct scsi_qla_host * ha, uint32_t fw_ddb_index, uint32_t state); +void qla4xxx_dump_buffer(void *b, uint32_t size); extern int ql4xextended_error_logging; extern int ql4xdiscoverywait; diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_init.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_init.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,6 +6,9 @@ */ #include "ql4_def.h" +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" static struct ddb_entry * qla4xxx_alloc_ddb(struct scsi_qla_host *ha, uint32_t fw_ddb_index); @@ -300,12 +303,12 @@ if (!qla4xxx_fw_ready(ha)) return status; - set_bit(AF_ONLINE, &ha->flags); return qla4xxx_get_firmware_status(ha); } static struct ddb_entry* qla4xxx_get_ddb_entry(struct scsi_qla_host *ha, - uint32_t fw_ddb_index) + uint32_t fw_ddb_index, + uint32_t *new_tgt) { struct dev_db_entry *fw_ddb_entry = NULL; dma_addr_t fw_ddb_entry_dma; @@ -313,6 +316,7 @@ int found = 0; uint32_t device_state; + *new_tgt = 0; /* Make sure the dma buffer is valid */ fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), @@ -337,7 +341,7 @@ DEBUG2(printk("scsi%ld: %s: Looking for ddb[%d]\n", ha->host_no, __func__, fw_ddb_index)); list_for_each_entry(ddb_entry, &ha->ddb_list, list) { - if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsiName, + if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsi_name, ISCSI_NAME_SIZE) == 0) { found++; break; @@ -348,6 +352,7 @@ DEBUG2(printk("scsi%ld: %s: ddb[%d] not found - allocating " "new ddb\n", ha->host_no, __func__, fw_ddb_index)); + *new_tgt = 1; ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index); } @@ -409,26 +414,26 @@ } status = QLA_SUCCESS; - ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->TSID); + ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->tsid); ddb_entry->task_mgmt_timeout = - le16_to_cpu(fw_ddb_entry->taskMngmntTimeout); + le16_to_cpu(fw_ddb_entry->def_timeout); ddb_entry->CmdSn = 0; - ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exeThrottle); + ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exec_throttle); ddb_entry->default_relogin_timeout = - le16_to_cpu(fw_ddb_entry->taskMngmntTimeout); - ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->minTime2Wait); + le16_to_cpu(fw_ddb_entry->def_timeout); + ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->iscsi_def_time2wait); /* Update index in case it changed */ ddb_entry->fw_ddb_index = fw_ddb_index; ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry; - ddb_entry->port = le16_to_cpu(fw_ddb_entry->portNumber); - ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->TargetPortalGroup); - memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsiName[0], + ddb_entry->port = le16_to_cpu(fw_ddb_entry->port); + ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp); + memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsi_name[0], min(sizeof(ddb_entry->iscsi_name), - sizeof(fw_ddb_entry->iscsiName))); - memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ipAddr[0], - min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ipAddr))); + sizeof(fw_ddb_entry->iscsi_name))); + memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ip_addr[0], + min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr))); DEBUG2(printk("scsi%ld: %s: ddb[%d] - State= %x status= %d.\n", ha->host_no, __func__, fw_ddb_index, @@ -495,6 +500,7 @@ uint32_t ddb_state; uint32_t conn_err, err_code; struct ddb_entry *ddb_entry; + uint32_t new_tgt; dev_info(&ha->pdev->dev, "Initializing DDBs ...\n"); for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES; @@ -526,8 +532,19 @@ "completed " "or access denied failure\n", ha->host_no, __func__)); - } else + } else { qla4xxx_set_ddb_entry(ha, fw_ddb_index, 0); + if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, + NULL, 0, NULL, &next_fw_ddb_index, + &ddb_state, &conn_err, NULL, NULL) + == QLA_ERROR) { + DEBUG2(printk("scsi%ld: %s:" + "get_ddb_entry %d failed\n", + ha->host_no, + __func__, fw_ddb_index)); + return QLA_ERROR; + } + } } if (ddb_state != DDB_DS_SESSION_ACTIVE) @@ -540,7 +557,7 @@ ha->host_no, __func__, fw_ddb_index)); /* Add DDB to internal our ddb list. */ - ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index); + ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt); if (ddb_entry == NULL) { DEBUG2(printk("scsi%ld: %s: Unable to allocate memory " "for device at fw_ddb_index %d\n", @@ -865,21 +882,20 @@ static void qla4x00_pci_config(struct scsi_qla_host *ha) { - uint16_t w, mwi; + uint16_t w; + int status; dev_info(&ha->pdev->dev, "Configuring PCI space...\n"); pci_set_master(ha->pdev); - mwi = 0; - if (pci_set_mwi(ha->pdev)) - mwi = PCI_COMMAND_INVALIDATE; + status = pci_set_mwi(ha->pdev); /* * We want to respect framework's setting of PCI configuration space * command register and also want to make sure that all bits of * interest to us are properly set in command register. */ pci_read_config_word(ha->pdev, PCI_COMMAND, &w); - w |= mwi | (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + w |= PCI_COMMAND_PARITY | PCI_COMMAND_SERR; w &= ~PCI_COMMAND_INTX_DISABLE; pci_write_config_word(ha->pdev, PCI_COMMAND, w); } @@ -911,6 +927,9 @@ writel(set_rmask(NVR_WRITE_ENABLE), &ha->reg->u1.isp4022.nvram); + writel(2, &ha->reg->mailbox[6]); + readl(&ha->reg->mailbox[6]); + writel(set_rmask(CSR_BOOT_ENABLE), &ha->reg->ctrl_status); readl(&ha->reg->ctrl_status); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -958,25 +977,25 @@ return status; } -int ql4xxx_lock_drvr_wait(struct scsi_qla_host *ha) +int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a) { -#define QL4_LOCK_DRVR_WAIT 30 +#define QL4_LOCK_DRVR_WAIT 60 #define QL4_LOCK_DRVR_SLEEP 1 int drvr_wait = QL4_LOCK_DRVR_WAIT; while (drvr_wait) { - if (ql4xxx_lock_drvr(ha) == 0) { + if (ql4xxx_lock_drvr(a) == 0) { ssleep(QL4_LOCK_DRVR_SLEEP); if (drvr_wait) { DEBUG2(printk("scsi%ld: %s: Waiting for " - "Global Init Semaphore(%d)...n", - ha->host_no, + "Global Init Semaphore(%d)...\n", + a->host_no, __func__, drvr_wait)); } drvr_wait -= QL4_LOCK_DRVR_SLEEP; } else { DEBUG2(printk("scsi%ld: %s: Global Init Semaphore " - "acquired.n", ha->host_no, __func__)); + "acquired\n", a->host_no, __func__)); return QLA_SUCCESS; } } @@ -1125,17 +1144,17 @@ /* Initialize the Host adapter request/response queues and firmware */ if (qla4xxx_start_firmware(ha) == QLA_ERROR) - return status; + goto exit_init_hba; if (qla4xxx_validate_mac_address(ha) == QLA_ERROR) - return status; + goto exit_init_hba; if (qla4xxx_init_local_data(ha) == QLA_ERROR) - return status; + goto exit_init_hba; status = qla4xxx_init_firmware(ha); if (status == QLA_ERROR) - return status; + goto exit_init_hba; /* * FW is waiting to get an IP address from DHCP server: Skip building @@ -1143,12 +1162,12 @@ * followed by 0x8014 aen" to trigger the tgt discovery process. */ if (ha->firmware_state & FW_STATE_DHCP_IN_PROGRESS) - return status; + goto exit_init_online; /* Skip device discovery if ip and subnet is zero */ if (memcmp(ha->ip_address, ip_address, IP_ADDR_LEN) == 0 || memcmp(ha->subnet_mask, ip_address, IP_ADDR_LEN) == 0) - return status; + goto exit_init_online; if (renew_ddb_list == PRESERVE_DDB_LIST) { /* @@ -1177,9 +1196,10 @@ ha->host_no)); } - exit_init_hba: +exit_init_online: + set_bit(AF_ONLINE, &ha->flags); +exit_init_hba: return status; - } /** @@ -1193,9 +1213,10 @@ uint32_t fw_ddb_index) { struct ddb_entry * ddb_entry; + uint32_t new_tgt; /* First allocate a device structure */ - ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index); + ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt); if (ddb_entry == NULL) { DEBUG2(printk(KERN_WARNING "scsi%ld: Unable to allocate memory to add " @@ -1203,6 +1224,18 @@ return; } + if (!new_tgt && (ddb_entry->fw_ddb_index != fw_ddb_index)) { + /* Target has been bound to a new fw_ddb_index */ + qla4xxx_free_ddb(ha, ddb_entry); + ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index); + if (ddb_entry == NULL) { + DEBUG2(printk(KERN_WARNING + "scsi%ld: Unable to allocate memory" + " to add fw_ddb_index %d\n", + ha->host_no, fw_ddb_index)); + return; + } + } if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) == QLA_ERROR) { ha->fw_ddb_index_map[fw_ddb_index] = diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_iocb.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_iocb.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,6 +6,10 @@ */ #include "ql4_def.h" +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" + #include @@ -141,11 +145,13 @@ uint16_t avail_dsds; struct data_seg_a64 *cur_dsd; struct scsi_cmnd *cmd; + struct scatterlist *sg; + int i; cmd = srb->cmd; ha = srb->ha; - if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { + if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { /* No data being transferred */ cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0); return; @@ -154,14 +160,7 @@ avail_dsds = COMMAND_SEG; cur_dsd = (struct data_seg_a64 *) & (cmd_entry->dataseg[0]); - /* Load data segments */ - if (cmd->use_sg) { - struct scatterlist *cur_seg; - struct scatterlist *end_seg; - - cur_seg = (struct scatterlist *)cmd->request_buffer; - end_seg = cur_seg + tot_dsds; - while (cur_seg < end_seg) { + scsi_for_each_sg(cmd, sg, tot_dsds, i) { dma_addr_t sle_dma; /* Allocate additional continuation packets? */ @@ -175,19 +174,13 @@ avail_dsds = CONTINUE_SEG; } - sle_dma = sg_dma_address(cur_seg); + sle_dma = sg_dma_address(sg); cur_dsd->base.addrLow = cpu_to_le32(LSDW(sle_dma)); cur_dsd->base.addrHigh = cpu_to_le32(MSDW(sle_dma)); - cur_dsd->count = cpu_to_le32(sg_dma_len(cur_seg)); + cur_dsd->count = cpu_to_le32(sg_dma_len(sg)); avail_dsds--; cur_dsd++; - cur_seg++; - } - } else { - cur_dsd->base.addrLow = cpu_to_le32(LSDW(srb->dma_handle)); - cur_dsd->base.addrHigh = cpu_to_le32(MSDW(srb->dma_handle)); - cur_dsd->count = cpu_to_le32(cmd->request_bufflen); } } @@ -204,8 +197,8 @@ struct scsi_cmnd *cmd = srb->cmd; struct ddb_entry *ddb_entry; struct command_t3_entry *cmd_entry; - struct scatterlist *sg = NULL; + int nseg; uint16_t tot_dsds; uint16_t req_cnt; @@ -233,24 +226,11 @@ index = (uint32_t)cmd->request->tag; /* Calculate the number of request entries needed. */ - if (cmd->use_sg) { - sg = (struct scatterlist *)cmd->request_buffer; - tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg, - cmd->sc_data_direction); - if (tot_dsds == 0) + nseg = scsi_dma_map(cmd); + if (nseg < 0) goto queuing_error; - } else if (cmd->request_bufflen) { - dma_addr_t req_dma; + tot_dsds = nseg; - req_dma = pci_map_single(ha->pdev, cmd->request_buffer, - cmd->request_bufflen, - cmd->sc_data_direction); - if (dma_mapping_error(req_dma)) - goto queuing_error; - - srb->dma_handle = req_dma; - tot_dsds = 1; - } req_cnt = qla4xxx_calc_request_entries(tot_dsds); if (ha->req_q_count < (req_cnt + 2)) { @@ -279,7 +259,7 @@ int_to_scsilun(cmd->device->lun, &cmd_entry->lun); cmd_entry->cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn); - cmd_entry->ttlByteCnt = cpu_to_le32(cmd->request_bufflen); + cmd_entry->ttlByteCnt = cpu_to_le32(scsi_bufflen(cmd)); memcpy(cmd_entry->cdb, cmd->cmnd, cmd->cmd_len); cmd_entry->dataSegCnt = cpu_to_le16(tot_dsds); cmd_entry->hdr.entryCount = req_cnt; @@ -289,13 +269,13 @@ * transferred, as the data direction bit is sometimed filled * in when there is no data to be transferred */ cmd_entry->control_flags = CF_NO_DATA; - if (cmd->request_bufflen) { + if (scsi_bufflen(cmd)) { if (cmd->sc_data_direction == DMA_TO_DEVICE) cmd_entry->control_flags = CF_WRITE; else if (cmd->sc_data_direction == DMA_FROM_DEVICE) cmd_entry->control_flags = CF_READ; - ha->bytes_xfered += cmd->request_bufflen; + ha->bytes_xfered += scsi_bufflen(cmd); if (ha->bytes_xfered & ~0xFFFFF){ ha->total_mbytes_xferred += ha->bytes_xfered >> 20; ha->bytes_xfered &= 0xFFFFF; @@ -359,14 +339,9 @@ return QLA_SUCCESS; queuing_error: + if (tot_dsds) + scsi_dma_unmap(cmd); - if (cmd->use_sg && tot_dsds) { - sg = (struct scatterlist *) cmd->request_buffer; - pci_unmap_sg(ha->pdev, sg, cmd->use_sg, - cmd->sc_data_direction); - } else if (tot_dsds) - pci_unmap_single(ha->pdev, srb->dma_handle, - cmd->request_bufflen, cmd->sc_data_direction); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_ERROR; diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_isr.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_isr.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,6 +6,9 @@ */ #include "ql4_def.h" +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" /** * qla2x00_process_completed_request() - Process a Fast Post response. @@ -92,7 +95,7 @@ if (sts_entry->iscsiFlags & (ISCSI_FLAG_RESIDUAL_OVER|ISCSI_FLAG_RESIDUAL_UNDER)) - cmd->resid = residual; + scsi_set_resid(cmd, residual); cmd->result = DID_OK << 16 | scsi_status; @@ -176,14 +179,14 @@ * Firmware detected a SCSI transport underrun * condition */ - cmd->resid = residual; + scsi_set_resid(cmd, residual); DEBUG2(printk("scsi%ld:%d:%d:%d: %s: UNDERRUN status " "detected, xferlen = 0x%x, residual = " "0x%x\n", ha->host_no, cmd->device->channel, cmd->device->id, cmd->device->lun, __func__, - cmd->request_bufflen, + scsi_bufflen(cmd), residual)); } @@ -227,7 +230,7 @@ if ((sts_entry->iscsiFlags & ISCSI_FLAG_RESIDUAL_UNDER) == 0) { cmd->result = DID_BUS_BUSY << 16; - } else if ((cmd->request_bufflen - residual) < + } else if ((scsi_bufflen(cmd) - residual) < cmd->underflow) { /* * Handle mid-layer underflow??? @@ -248,7 +251,7 @@ cmd->device->channel, cmd->device->id, cmd->device->lun, __func__, - cmd->request_bufflen, residual)); + scsi_bufflen(cmd), residual)); cmd->result = DID_ERROR << 16; } else { @@ -417,6 +420,7 @@ uint32_t mbox_status) { int i; + uint32_t mbox_stat2, mbox_stat3; if ((mbox_status == MBOX_STS_BUSY) || (mbox_status == MBOX_STS_INTERMEDIATE_COMPLETION) || @@ -437,6 +441,12 @@ } else if (mbox_status >> 12 == MBOX_ASYNC_EVENT_STATUS) { /* Immediately process the AENs that don't require much work. * Only queue the database_changed AENs */ + if (ha->aen_log.count < MAX_AEN_ENTRIES) { + for (i = 0; i < MBOX_AEN_REG_COUNT; i++) + ha->aen_log.entry[ha->aen_log.count].mbox_sts[i] = + readl(&ha->reg->mailbox[i]); + ha->aen_log.count++; + } switch (mbox_status) { case MBOX_ASTS_SYSTEM_ERROR: /* Log Mailbox registers */ @@ -493,6 +503,16 @@ mbox_status)); break; + case MBOX_ASTS_IP_ADDR_STATE_CHANGED: + mbox_stat2 = readl(&ha->reg->mailbox[2]); + mbox_stat3 = readl(&ha->reg->mailbox[3]); + + if ((mbox_stat3 == 5) && (mbox_stat2 == 3)) + set_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags); + else if ((mbox_stat3 == 2) && (mbox_stat2 == 5)) + set_bit(DPC_RESET_HA, &ha->dpc_flags); + break; + case MBOX_ASTS_MAC_ADDRESS_CHANGED: case MBOX_ASTS_DNS: /* No action */ @@ -518,11 +538,6 @@ /* Queue AEN information and process it in the DPC * routine */ if (ha->aen_q_count > 0) { - /* advance pointer */ - if (ha->aen_in == (MAX_AEN_ENTRIES - 1)) - ha->aen_in = 0; - else - ha->aen_in++; /* decrement available counter */ ha->aen_q_count--; @@ -542,6 +557,10 @@ ha->aen_q[ha->aen_in].mbox_sts[2], ha->aen_q[ha->aen_in].mbox_sts[3], ha->aen_q[ha->aen_in]. mbox_sts[4])); + /* advance pointer */ + ha->aen_in++; + if (ha->aen_in == MAX_AEN_ENTRIES) + ha->aen_in = 0; /* The DPC routine will process the aen */ set_bit(DPC_AEN, &ha->dpc_flags); @@ -724,25 +743,24 @@ spin_lock_irqsave(&ha->hardware_lock, flags); while (ha->aen_out != ha->aen_in) { - /* Advance pointers for next entry */ - if (ha->aen_out == (MAX_AEN_ENTRIES - 1)) - ha->aen_out = 0; - else - ha->aen_out++; - - ha->aen_q_count++; aen = &ha->aen_q[ha->aen_out]; - /* copy aen information to local structure */ for (i = 0; i < MBOX_AEN_REG_COUNT; i++) mbox_sts[i] = aen->mbox_sts[i]; + ha->aen_q_count++; + ha->aen_out++; + + if (ha->aen_out == MAX_AEN_ENTRIES) + ha->aen_out = 0; + spin_unlock_irqrestore(&ha->hardware_lock, flags); - DEBUG(printk("scsi%ld: AEN[%d] %04x, index [%d] state=%04x " - "mod=%x conerr=%08x \n", ha->host_no, ha->aen_out, - mbox_sts[0], mbox_sts[2], mbox_sts[3], - mbox_sts[1], mbox_sts[4])); + DEBUG2(printk("qla4xxx(%ld): AEN[%d]=0x%08x, mbx1=0x%08x mbx2=0x%08x" + " mbx3=0x%08x mbx4=0x%08x\n", ha->host_no, + (ha->aen_out ? (ha->aen_out-1): (MAX_AEN_ENTRIES-1)), + mbox_sts[0], mbox_sts[1], mbox_sts[2], + mbox_sts[3], mbox_sts[4])); switch (mbox_sts[0]) { case MBOX_ASTS_DATABASE_CHANGED: @@ -792,6 +810,5 @@ spin_lock_irqsave(&ha->hardware_lock, flags); } spin_unlock_irqrestore(&ha->hardware_lock, flags); - } diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_mbx.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_mbx.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,6 +6,9 @@ */ #include "ql4_def.h" +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" /** @@ -169,84 +172,6 @@ return status; } - -#if 0 - -/** - * qla4xxx_issue_iocb - issue mailbox iocb command - * @ha: adapter state pointer. - * @buffer: buffer pointer. - * @phys_addr: physical address of buffer. - * @size: size of buffer. - * - * Issues iocbs via mailbox commands. - * TARGET_QUEUE_LOCK must be released. - * ADAPTER_STATE_LOCK must be released. - **/ -int -qla4xxx_issue_iocb(struct scsi_qla_host * ha, void *buffer, - dma_addr_t phys_addr, size_t size) -{ - uint32_t mbox_cmd[MBOX_REG_COUNT]; - uint32_t mbox_sts[MBOX_REG_COUNT]; - int status; - - memset(&mbox_cmd, 0, sizeof(mbox_cmd)); - memset(&mbox_sts, 0, sizeof(mbox_sts)); - mbox_cmd[0] = MBOX_CMD_EXECUTE_IOCB_A64; - mbox_cmd[1] = 0; - mbox_cmd[2] = LSDW(phys_addr); - mbox_cmd[3] = MSDW(phys_addr); - status = qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]); - return status; -} - -int qla4xxx_conn_close_sess_logout(struct scsi_qla_host * ha, - uint16_t fw_ddb_index, - uint16_t connection_id, - uint16_t option) -{ - uint32_t mbox_cmd[MBOX_REG_COUNT]; - uint32_t mbox_sts[MBOX_REG_COUNT]; - - memset(&mbox_cmd, 0, sizeof(mbox_cmd)); - memset(&mbox_sts, 0, sizeof(mbox_sts)); - mbox_cmd[0] = MBOX_CMD_CONN_CLOSE_SESS_LOGOUT; - mbox_cmd[1] = fw_ddb_index; - mbox_cmd[2] = connection_id; - mbox_cmd[3] = LOGOUT_OPTION_RELOGIN; - if (qla4xxx_mailbox_command(ha, 4, 2, &mbox_cmd[0], &mbox_sts[0]) != - QLA_SUCCESS) { - DEBUG2(printk("scsi%ld: %s: MBOX_CMD_CONN_CLOSE_SESS_LOGOUT " - "option %04x failed sts %04X %04X", - ha->host_no, __func__, - option, mbox_sts[0], mbox_sts[1])); - if (mbox_sts[0] == 0x4005) - DEBUG2(printk("%s reason %04X\n", __func__, - mbox_sts[1])); - } - return QLA_SUCCESS; -} - -int qla4xxx_clear_database_entry(struct scsi_qla_host * ha, - uint16_t fw_ddb_index) -{ - uint32_t mbox_cmd[MBOX_REG_COUNT]; - uint32_t mbox_sts[MBOX_REG_COUNT]; - - memset(&mbox_cmd, 0, sizeof(mbox_cmd)); - memset(&mbox_sts, 0, sizeof(mbox_sts)); - mbox_cmd[0] = MBOX_CMD_CLEAR_DATABASE_ENTRY; - mbox_cmd[1] = fw_ddb_index; - if (qla4xxx_mailbox_command(ha, 2, 5, &mbox_cmd[0], &mbox_sts[0]) != - QLA_SUCCESS) - return QLA_ERROR; - - return QLA_SUCCESS; -} - -#endif /* 0 */ - /** * qla4xxx_initialize_fw_cb - initializes firmware control block. * @ha: Pointer to host adapter structure. @@ -272,10 +197,13 @@ /* Get Initialize Firmware Control Block. */ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK; mbox_cmd[2] = LSDW(init_fw_cb_dma); mbox_cmd[3] = MSDW(init_fw_cb_dma); - if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) != + mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk); + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { dma_free_coherent(&ha->pdev->dev, sizeof(struct init_fw_ctrl_blk), @@ -287,51 +215,56 @@ qla4xxx_init_rings(ha); /* Fill in the request and response queue information. */ - init_fw_cb->ReqQConsumerIndex = cpu_to_le16(ha->request_out); - init_fw_cb->ComplQProducerIndex = cpu_to_le16(ha->response_in); - init_fw_cb->ReqQLen = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH); - init_fw_cb->ComplQLen = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH); - init_fw_cb->ReqQAddrLo = cpu_to_le32(LSDW(ha->request_dma)); - init_fw_cb->ReqQAddrHi = cpu_to_le32(MSDW(ha->request_dma)); - init_fw_cb->ComplQAddrLo = cpu_to_le32(LSDW(ha->response_dma)); - init_fw_cb->ComplQAddrHi = cpu_to_le32(MSDW(ha->response_dma)); - init_fw_cb->ShadowRegBufAddrLo = + init_fw_cb->pri.rqq_consumer_idx = cpu_to_le16(ha->request_out); + init_fw_cb->pri.compq_producer_idx = cpu_to_le16(ha->response_in); + init_fw_cb->pri.rqq_len = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH); + init_fw_cb->pri.compq_len = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH); + init_fw_cb->pri.rqq_addr_lo = cpu_to_le32(LSDW(ha->request_dma)); + init_fw_cb->pri.rqq_addr_hi = cpu_to_le32(MSDW(ha->request_dma)); + init_fw_cb->pri.compq_addr_lo = cpu_to_le32(LSDW(ha->response_dma)); + init_fw_cb->pri.compq_addr_hi = cpu_to_le32(MSDW(ha->response_dma)); + init_fw_cb->pri.shdwreg_addr_lo = cpu_to_le32(LSDW(ha->shadow_regs_dma)); - init_fw_cb->ShadowRegBufAddrHi = + init_fw_cb->pri.shdwreg_addr_hi = cpu_to_le32(MSDW(ha->shadow_regs_dma)); /* Set up required options. */ - init_fw_cb->FwOptions |= + init_fw_cb->pri.fw_options |= __constant_cpu_to_le16(FWOPT_SESSION_MODE | FWOPT_INITIATOR_MODE); - init_fw_cb->FwOptions &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE); + init_fw_cb->pri.fw_options &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE); /* Save some info in adapter structure. */ - ha->firmware_options = le16_to_cpu(init_fw_cb->FwOptions); - ha->tcp_options = le16_to_cpu(init_fw_cb->TCPOptions); - ha->heartbeat_interval = init_fw_cb->HeartbeatInterval; - memcpy(ha->ip_address, init_fw_cb->IPAddr, - min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr))); - memcpy(ha->subnet_mask, init_fw_cb->SubnetMask, - min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask))); - memcpy(ha->gateway, init_fw_cb->GatewayIPAddr, - min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr))); - memcpy(ha->name_string, init_fw_cb->iSCSINameString, + ha->firmware_options = le16_to_cpu(init_fw_cb->pri.fw_options); + ha->tcp_options = le16_to_cpu(init_fw_cb->pri.ipv4_tcp_opts); + ha->heartbeat_interval = init_fw_cb->pri.hb_interval; + memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr, + min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr))); + memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet, + min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet))); + memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr, + min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr))); + memcpy(ha->name_string, init_fw_cb->pri.iscsi_name, min(sizeof(ha->name_string), - sizeof(init_fw_cb->iSCSINameString))); - memcpy(ha->alias, init_fw_cb->Alias, - min(sizeof(ha->alias), sizeof(init_fw_cb->Alias))); + sizeof(init_fw_cb->pri.iscsi_name))); + /*memcpy(ha->alias, init_fw_cb->Alias, + min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/ /* Save Command Line Paramater info */ - ha->port_down_retry_count = le16_to_cpu(init_fw_cb->KeepAliveTimeout); + ha->port_down_retry_count = le16_to_cpu(init_fw_cb->pri.conn_ka_timeout); ha->discovery_wait = ql4xdiscoverywait; /* Send Initialize Firmware Control Block. */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_INITIALIZE_FIRMWARE; mbox_cmd[1] = 0; mbox_cmd[2] = LSDW(init_fw_cb_dma); mbox_cmd[3] = MSDW(init_fw_cb_dma); - if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) == + mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk); + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) == QLA_SUCCESS) status = QLA_SUCCESS; else { @@ -368,12 +301,14 @@ /* Get Initialize Firmware Control Block. */ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + memset(init_fw_cb, 0, sizeof(struct init_fw_ctrl_blk)); mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK; mbox_cmd[2] = LSDW(init_fw_cb_dma); mbox_cmd[3] = MSDW(init_fw_cb_dma); + mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk); - if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) != + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: Failed to get init_fw_ctrl_blk\n", ha->host_no, __func__)); @@ -384,12 +319,12 @@ } /* Save IP Address. */ - memcpy(ha->ip_address, init_fw_cb->IPAddr, - min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr))); - memcpy(ha->subnet_mask, init_fw_cb->SubnetMask, - min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask))); - memcpy(ha->gateway, init_fw_cb->GatewayIPAddr, - min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr))); + memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr, + min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr))); + memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet, + min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet))); + memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr, + min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr))); dma_free_coherent(&ha->pdev->dev, sizeof(struct init_fw_ctrl_blk), init_fw_cb, init_fw_cb_dma); @@ -409,8 +344,10 @@ /* Get firmware version */ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_GET_FW_STATE; - if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 4, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATE failed w/ " "status %04X\n", ha->host_no, __func__, @@ -438,8 +375,10 @@ /* Get firmware version */ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_GET_FW_STATUS; - if (qla4xxx_mailbox_command(ha, 1, 3, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATUS failed w/ " "status %04X\n", ha->host_no, __func__, @@ -491,11 +430,14 @@ } memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY; mbox_cmd[1] = (uint32_t) fw_ddb_index; mbox_cmd[2] = LSDW(fw_ddb_entry_dma); mbox_cmd[3] = MSDW(fw_ddb_entry_dma); - if (qla4xxx_mailbox_command(ha, 4, 7, &mbox_cmd[0], &mbox_sts[0]) == + mbox_cmd[4] = sizeof(struct dev_db_entry); + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 7, &mbox_cmd[0], &mbox_sts[0]) == QLA_ERROR) { DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_DATABASE_ENTRY failed" " with status 0x%04X\n", ha->host_no, __func__, @@ -512,11 +454,11 @@ dev_info(&ha->pdev->dev, "DDB[%d] MB0 %04x Tot %d Next %d " "State %04x ConnErr %08x %d.%d.%d.%d:%04d \"%s\"\n", fw_ddb_index, mbox_sts[0], mbox_sts[2], mbox_sts[3], - mbox_sts[4], mbox_sts[5], fw_ddb_entry->ipAddr[0], - fw_ddb_entry->ipAddr[1], fw_ddb_entry->ipAddr[2], - fw_ddb_entry->ipAddr[3], - le16_to_cpu(fw_ddb_entry->portNumber), - fw_ddb_entry->iscsiName); + mbox_sts[4], mbox_sts[5], fw_ddb_entry->ip_addr[0], + fw_ddb_entry->ip_addr[1], fw_ddb_entry->ip_addr[2], + fw_ddb_entry->ip_addr[3], + le16_to_cpu(fw_ddb_entry->port), + fw_ddb_entry->iscsi_name); } if (num_valid_ddb_entries) *num_valid_ddb_entries = mbox_sts[2]; @@ -571,35 +513,10 @@ mbox_cmd[1] = (uint32_t) fw_ddb_index; mbox_cmd[2] = LSDW(fw_ddb_entry_dma); mbox_cmd[3] = MSDW(fw_ddb_entry_dma); - return qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]); -} - -#if 0 -int qla4xxx_conn_open_session_login(struct scsi_qla_host * ha, - uint16_t fw_ddb_index) -{ - int status = QLA_ERROR; - uint32_t mbox_cmd[MBOX_REG_COUNT]; - uint32_t mbox_sts[MBOX_REG_COUNT]; - - /* Do not wait for completion. The firmware will send us an - * ASTS_DATABASE_CHANGED (0x8014) to notify us of the login status. - */ - memset(&mbox_cmd, 0, sizeof(mbox_cmd)); - memset(&mbox_sts, 0, sizeof(mbox_sts)); - mbox_cmd[0] = MBOX_CMD_CONN_OPEN_SESS_LOGIN; - mbox_cmd[1] = (uint32_t) fw_ddb_index; - mbox_cmd[2] = 0; - mbox_cmd[3] = 0; - mbox_cmd[4] = 0; - status = qla4xxx_mailbox_command(ha, 4, 0, &mbox_cmd[0], &mbox_sts[0]); - DEBUG2(printk("%s fw_ddb_index=%d status=%d mbx0_1=0x%x :0x%x\n", - __func__, fw_ddb_index, status, mbox_sts[0], - mbox_sts[1]);) + mbox_cmd[4] = sizeof(struct dev_db_entry); - return status; + return qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]); } -#endif /* 0 */ /** * qla4xxx_get_crash_record - retrieves crash record. @@ -614,12 +531,14 @@ struct crash_record *crash_record = NULL; dma_addr_t crash_record_dma = 0; uint32_t crash_record_size = 0; + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_cmd)); /* Get size of crash record. */ mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD; - if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve size!\n", ha->host_no, __func__)); @@ -639,11 +558,15 @@ goto exit_get_crash_record; /* Get Crash Record. */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_cmd)); + mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD; mbox_cmd[2] = LSDW(crash_record_dma); mbox_cmd[3] = MSDW(crash_record_dma); mbox_cmd[4] = crash_record_size; - if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) goto exit_get_crash_record; @@ -655,7 +578,6 @@ crash_record, crash_record_dma); } -#if 0 /** * qla4xxx_get_conn_event_log - retrieves connection event log * @ha: Pointer to host adapter structure. @@ -678,7 +600,8 @@ /* Get size of crash record. */ mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG; - if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) goto exit_get_event_log; @@ -693,10 +616,14 @@ goto exit_get_event_log; /* Get Crash Record. */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_cmd)); + mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG; mbox_cmd[2] = LSDW(event_log_dma); mbox_cmd[3] = MSDW(event_log_dma); - if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve event " "log!\n", ha->host_no, __func__)); @@ -745,7 +672,6 @@ dma_free_coherent(&ha->pdev->dev, event_log_size, event_log, event_log_dma); } -#endif /* 0 */ /** * qla4xxx_reset_lun - issues LUN Reset @@ -773,11 +699,13 @@ */ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_LUN_RESET; mbox_cmd[1] = ddb_entry->fw_ddb_index; mbox_cmd[2] = lun << 8; mbox_cmd[5] = 0x01; /* Immediate Command Enable */ - qla4xxx_mailbox_command(ha, 6, 1, &mbox_cmd[0], &mbox_sts[0]); + + qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]); if (mbox_sts[0] != MBOX_STS_COMMAND_COMPLETE && mbox_sts[0] != MBOX_STS_COMMAND_ERROR) status = QLA_ERROR; @@ -794,12 +722,14 @@ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_READ_FLASH; mbox_cmd[1] = LSDW(dma_addr); mbox_cmd[2] = MSDW(dma_addr); mbox_cmd[3] = offset; mbox_cmd[4] = len; - if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 2, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: MBOX_CMD_READ_FLASH, failed w/ " "status %04X %04X, offset %08x, len %08x\n", ha->host_no, @@ -825,8 +755,10 @@ /* Get firmware version. */ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); memset(&mbox_sts, 0, sizeof(mbox_sts)); + mbox_cmd[0] = MBOX_CMD_ABOUT_FW; - if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) != + + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: MBOX_CMD_ABOUT_FW failed w/ " "status %04X\n", ha->host_no, __func__, mbox_sts[0])); @@ -855,7 +787,7 @@ mbox_cmd[2] = LSDW(dma_addr); mbox_cmd[3] = MSDW(dma_addr); - if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) != + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { DEBUG2(printk("scsi%ld: %s: failed status %04X\n", ha->host_no, __func__, mbox_sts[0])); @@ -875,7 +807,7 @@ mbox_cmd[0] = MBOX_CMD_REQUEST_DATABASE_ENTRY; mbox_cmd[1] = MAX_PRST_DEV_DB_ENTRIES; - if (qla4xxx_mailbox_command(ha, 2, 3, &mbox_cmd[0], &mbox_sts[0]) != + if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) != QLA_SUCCESS) { if (mbox_sts[0] == MBOX_STS_COMMAND_ERROR) { *ddb_index = mbox_sts[2]; @@ -918,23 +850,23 @@ if (ret_val != QLA_SUCCESS) goto qla4xxx_send_tgts_exit; - memset((void *)fw_ddb_entry->iSCSIAlias, 0, - sizeof(fw_ddb_entry->iSCSIAlias)); + memset(fw_ddb_entry->iscsi_alias, 0, + sizeof(fw_ddb_entry->iscsi_alias)); - memset((void *)fw_ddb_entry->iscsiName, 0, - sizeof(fw_ddb_entry->iscsiName)); + memset(fw_ddb_entry->iscsi_name, 0, + sizeof(fw_ddb_entry->iscsi_name)); - memset((void *)fw_ddb_entry->ipAddr, 0, sizeof(fw_ddb_entry->ipAddr)); - memset((void *)fw_ddb_entry->targetAddr, 0, - sizeof(fw_ddb_entry->targetAddr)); + memset(fw_ddb_entry->ip_addr, 0, sizeof(fw_ddb_entry->ip_addr)); + memset(fw_ddb_entry->tgt_addr, 0, + sizeof(fw_ddb_entry->tgt_addr)); fw_ddb_entry->options = (DDB_OPT_DISC_SESSION | DDB_OPT_TARGET); - fw_ddb_entry->portNumber = cpu_to_le16(ntohs(port)); + fw_ddb_entry->port = cpu_to_le16(ntohs(port)); - fw_ddb_entry->ipAddr[0] = *ip; - fw_ddb_entry->ipAddr[1] = *(ip + 1); - fw_ddb_entry->ipAddr[2] = *(ip + 2); - fw_ddb_entry->ipAddr[3] = *(ip + 3); + fw_ddb_entry->ip_addr[0] = *ip; + fw_ddb_entry->ip_addr[1] = *(ip + 1); + fw_ddb_entry->ip_addr[2] = *(ip + 2); + fw_ddb_entry->ip_addr[3] = *(ip + 3); ret_val = qla4xxx_set_ddb_entry(ha, ddb_index, fw_ddb_entry_dma); diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_nvram.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_nvram.c 2007-12-21 15:36:12.000000000 -0500 @@ -6,6 +6,9 @@ */ #include "ql4_def.h" +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" static inline void eeprom_cmd(uint32_t cmd, struct scsi_qla_host *ha) { diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_os.c --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_os.c 2007-12-21 15:36:12.000000000 -0500 @@ -10,6 +10,10 @@ #include #include "ql4_def.h" +#include "ql4_version.h" +#include "ql4_glbl.h" +#include "ql4_dbg.h" +#include "ql4_inline.h" /* * Driver version @@ -50,12 +54,15 @@ /* * iSCSI template entry points */ -static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no, - uint32_t enable, struct sockaddr *dst_addr); +static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost, + enum iscsi_tgt_dscvr type, uint32_t enable, + struct sockaddr *dst_addr); static int qla4xxx_conn_get_param(struct iscsi_cls_conn *conn, enum iscsi_param param, char *buf); static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, enum iscsi_param param, char *buf); +static int qla4xxx_host_get_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf); static void qla4xxx_conn_stop(struct iscsi_cls_conn *conn, int flag); static int qla4xxx_conn_start(struct iscsi_cls_conn *conn); static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); @@ -95,16 +102,20 @@ static struct iscsi_transport qla4xxx_iscsi_transport = { .owner = THIS_MODULE, .name = DRIVER_NAME, - .param_mask = ISCSI_CONN_PORT | - ISCSI_CONN_ADDRESS | - ISCSI_TARGET_NAME | - ISCSI_TPGT, + .caps = CAP_FW_DB | CAP_SENDTARGETS_OFFLOAD | + CAP_DATA_PATH_OFFLOAD, + .param_mask = ISCSI_CONN_PORT | ISCSI_CONN_ADDRESS | + ISCSI_TARGET_NAME | ISCSI_TPGT, + .host_param_mask = ISCSI_HOST_HWADDRESS | + ISCSI_HOST_IPADDRESS | + ISCSI_HOST_INITIATOR_NAME, .sessiondata_size = sizeof(struct ddb_entry), .host_template = &qla4xxx_driver_template, .tgt_dscvr = qla4xxx_tgt_dscvr, .get_conn_param = qla4xxx_conn_get_param, .get_session_param = qla4xxx_sess_get_param, + .get_host_param = qla4xxx_host_get_param, .start_conn = qla4xxx_conn_start, .stop_conn = qla4xxx_conn_stop, .session_recovery_timedout = qla4xxx_recovery_timedout, @@ -161,6 +172,43 @@ printk(KERN_ERR "iscsi: invalid stop flag %d\n", flag); } +static ssize_t format_addr(char *buf, const unsigned char *addr, int len) +{ + int i; + char *cp = buf; + + for (i = 0; i < len; i++) + cp += sprintf(cp, "%02x%c", addr[i], + i == (len - 1) ? '\n' : ':'); + return cp - buf; +} + + +static int qla4xxx_host_get_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf) +{ + struct scsi_qla_host *ha = to_qla_host(shost); + int len; + + switch (param) { + case ISCSI_HOST_PARAM_HWADDRESS: + len = format_addr(buf, ha->my_mac, MAC_ADDR_LEN); + break; + case ISCSI_HOST_PARAM_IPADDRESS: + len = sprintf(buf, "%d.%d.%d.%d\n", ha->ip_address[0], + ha->ip_address[1], ha->ip_address[2], + ha->ip_address[3]); + break; + case ISCSI_HOST_PARAM_INITIATOR_NAME: + len = sprintf(buf, "%s\n", ha->name_string); + break; + default: + return -ENOSYS; + } + + return len; +} + static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, enum iscsi_param param, char *buf) { @@ -208,21 +256,15 @@ return len; } -static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no, - uint32_t enable, struct sockaddr *dst_addr) +static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost, + enum iscsi_tgt_dscvr type, uint32_t enable, + struct sockaddr *dst_addr) { struct scsi_qla_host *ha; - struct Scsi_Host *shost; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; int ret = 0; - shost = scsi_host_lookup(host_no); - if (IS_ERR(shost)) { - printk(KERN_ERR "Could not find host no %u\n", host_no); - return -ENODEV; - } - ha = (struct scsi_qla_host *) shost->hostdata; switch (type) { @@ -246,8 +288,6 @@ default: ret = -ENOSYS; } - - scsi_host_put(shost); return ret; } @@ -369,14 +409,7 @@ struct scsi_cmnd *cmd = srb->cmd; if (srb->flags & SRB_DMA_VALID) { - if (cmd->use_sg) { - pci_unmap_sg(ha->pdev, cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); - } else if (cmd->request_bufflen) { - pci_unmap_single(ha->pdev, srb->dma_handle, - cmd->request_bufflen, - cmd->sc_data_direction); - } + scsi_dma_unmap(cmd); srb->flags &= ~SRB_DMA_VALID; } cmd->SCp.ptr = NULL; @@ -711,7 +744,7 @@ return stat; } -static void qla4xxx_hw_reset(struct scsi_qla_host *ha) +void qla4xxx_hw_reset(struct scsi_qla_host *ha) { uint32_t ctrl_status; unsigned long flags = 0; @@ -1081,13 +1114,13 @@ if (ha->timer_active) qla4xxx_stop_timer(ha); - /* free extra memory */ - qla4xxx_mem_free(ha); - /* Detach interrupts */ if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) free_irq(ha->pdev->irq, ha); + /* free extra memory */ + qla4xxx_mem_free(ha); + pci_disable_device(ha->pdev); } @@ -1332,6 +1365,11 @@ ha = pci_get_drvdata(pdev); + qla4xxx_disable_intrs(ha); + + while (test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags)) + ssleep(1); + /* remove devs from iscsi_sessions to scsi_devices */ qla4xxx_free_ddb_list(ha); diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_version.h --- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_version.h 2007-12-21 15:36:12.000000000 -0500 @@ -5,4 +5,5 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.00.07-k1" +#define QLA4XXX_DRIVER_VERSION "5.01.00-k7" + diff -Nurb linux-2.6.22-570/drivers/scsi/qlogicfas408.c linux-2.6.22-591/drivers/scsi/qlogicfas408.c --- linux-2.6.22-570/drivers/scsi/qlogicfas408.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/qlogicfas408.c 2007-12-21 15:36:12.000000000 -0500 @@ -265,8 +265,6 @@ unsigned int message; /* scsi returned message */ unsigned int phase; /* recorded scsi phase */ unsigned int reqlen; /* total length of transfer */ - struct scatterlist *sglist; /* scatter-gather list pointer */ - unsigned int sgcount; /* sg counter */ char *buf; struct qlogicfas408_priv *priv = get_priv_by_cmd(cmd); int qbase = priv->qbase; @@ -301,9 +299,10 @@ if (inb(qbase + 7) & 0x1f) /* if some bytes in fifo */ outb(1, qbase + 3); /* clear fifo */ /* note that request_bufflen is the total xfer size when sg is used */ - reqlen = cmd->request_bufflen; + reqlen = scsi_bufflen(cmd); /* note that it won't work if transfers > 16M are requested */ if (reqlen && !((phase = inb(qbase + 4)) & 6)) { /* data phase */ + struct scatterlist *sg; rtrc(2) outb(reqlen, qbase); /* low-mid xfer cnt */ outb(reqlen >> 8, qbase + 1); /* low-mid xfer cnt */ @@ -311,23 +310,16 @@ outb(0x90, qbase + 3); /* command do xfer */ /* PIO pseudo DMA to buffer or sglist */ REG1; - if (!cmd->use_sg) - ql_pdma(priv, phase, cmd->request_buffer, - cmd->request_bufflen); - else { - sgcount = cmd->use_sg; - sglist = cmd->request_buffer; - while (sgcount--) { + + scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) { if (priv->qabort) { REG0; return ((priv->qabort == 1 ? DID_ABORT : DID_RESET) << 16); } - buf = page_address(sglist->page) + sglist->offset; - if (ql_pdma(priv, phase, buf, sglist->length)) + buf = page_address(sg->page) + sg->offset; + if (ql_pdma(priv, phase, buf, sg->length)) break; - sglist++; - } } REG0; rtrc(2) diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_debug.c linux-2.6.22-591/drivers/scsi/scsi_debug.c --- linux-2.6.22-570/drivers/scsi/scsi_debug.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_debug.c 2007-12-21 15:36:12.000000000 -0500 @@ -2405,7 +2405,7 @@ MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)"); MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)"); MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)"); -MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)"); +MODULE_PARM_DESC(every_nth, "timeout every nth command(def=0)"); MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)"); MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)"); MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)"); diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_error.c linux-2.6.22-591/drivers/scsi/scsi_error.c --- linux-2.6.22-570/drivers/scsi/scsi_error.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_error.c 2007-12-21 15:36:12.000000000 -0500 @@ -18,12 +18,13 @@ #include #include #include -#include #include +#include #include #include #include #include +#include #include #include @@ -640,16 +641,8 @@ memcpy(scmd->cmnd, cmnd, cmnd_size); if (copy_sense) { - gfp_t gfp_mask = GFP_ATOMIC; - - if (shost->hostt->unchecked_isa_dma) - gfp_mask |= __GFP_DMA; - - sgl.page = alloc_page(gfp_mask); - if (!sgl.page) - return FAILED; - sgl.offset = 0; - sgl.length = 252; + sg_init_one(&sgl, scmd->sense_buffer, + sizeof(scmd->sense_buffer)); scmd->sc_data_direction = DMA_FROM_DEVICE; scmd->request_bufflen = sgl.length; @@ -720,18 +713,6 @@ /* - * Last chance to have valid sense data. - */ - if (copy_sense) { - if (!SCSI_SENSE_VALID(scmd)) { - memcpy(scmd->sense_buffer, page_address(sgl.page), - sizeof(scmd->sense_buffer)); - } - __free_page(sgl.page); - } - - - /* * Restore original data */ scmd->request_buffer = old_buffer; @@ -1536,8 +1517,6 @@ { struct Scsi_Host *shost = data; - current->flags |= PF_NOFREEZE; - /* * We use TASK_INTERRUPTIBLE so that the thread is not * counted against the load average as a running process. diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_lib.c linux-2.6.22-591/drivers/scsi/scsi_lib.c --- linux-2.6.22-570/drivers/scsi/scsi_lib.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_lib.c 2007-12-21 15:36:12.000000000 -0500 @@ -2290,3 +2290,41 @@ kunmap_atomic(virt, KM_BIO_SRC_IRQ); } EXPORT_SYMBOL(scsi_kunmap_atomic_sg); + +/** + * scsi_dma_map - perform DMA mapping against command's sg lists + * @cmd: scsi command + * + * Returns the number of sg lists actually used, zero if the sg lists + * is NULL, or -ENOMEM if the mapping failed. + */ +int scsi_dma_map(struct scsi_cmnd *cmd) +{ + int nseg = 0; + + if (scsi_sg_count(cmd)) { + struct device *dev = cmd->device->host->shost_gendev.parent; + + nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), + cmd->sc_data_direction); + if (unlikely(!nseg)) + return -ENOMEM; + } + return nseg; +} +EXPORT_SYMBOL(scsi_dma_map); + +/** + * scsi_dma_unmap - unmap command's sg lists mapped by scsi_dma_map + * @cmd: scsi command + */ +void scsi_dma_unmap(struct scsi_cmnd *cmd) +{ + if (scsi_sg_count(cmd)) { + struct device *dev = cmd->device->host->shost_gendev.parent; + + dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), + cmd->sc_data_direction); + } +} +EXPORT_SYMBOL(scsi_dma_unmap); diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_netlink.c linux-2.6.22-591/drivers/scsi/scsi_netlink.c --- linux-2.6.22-570/drivers/scsi/scsi_netlink.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_netlink.c 2007-12-21 15:36:14.000000000 -0500 @@ -167,7 +167,7 @@ return; } - scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, + scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, THIS_MODULE); if (!scsi_nl_sock) { diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_scan.c linux-2.6.22-591/drivers/scsi/scsi_scan.c --- linux-2.6.22-570/drivers/scsi/scsi_scan.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_scan.c 2007-12-21 15:36:12.000000000 -0500 @@ -1213,7 +1213,7 @@ * Given a struct scsi_lun of: 0a 04 0b 03 00 00 00 00, this function returns * the integer: 0x0b030a04 **/ -static int scsilun_to_int(struct scsi_lun *scsilun) +int scsilun_to_int(struct scsi_lun *scsilun) { int i; unsigned int lun; @@ -1224,6 +1224,7 @@ scsilun->scsi_lun[i + 1]) << (i * 8)); return lun; } +EXPORT_SYMBOL(scsilun_to_int); /** * int_to_scsilun: reverts an int into a scsi_lun diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_sysfs.c linux-2.6.22-591/drivers/scsi/scsi_sysfs.c --- linux-2.6.22-570/drivers/scsi/scsi_sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_sysfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -293,30 +293,18 @@ { struct device_driver *drv = dev->driver; struct scsi_device *sdev = to_scsi_device(dev); - struct scsi_host_template *sht = sdev->host->hostt; int err; err = scsi_device_quiesce(sdev); if (err) return err; - /* call HLD suspend first */ if (drv && drv->suspend) { err = drv->suspend(dev, state); if (err) return err; } - /* then, call host suspend */ - if (sht->suspend) { - err = sht->suspend(sdev, state); - if (err) { - if (drv && drv->resume) - drv->resume(dev); - return err; - } - } - return 0; } @@ -324,21 +312,14 @@ { struct device_driver *drv = dev->driver; struct scsi_device *sdev = to_scsi_device(dev); - struct scsi_host_template *sht = sdev->host->hostt; - int err = 0, err2 = 0; + int err = 0; - /* call host resume first */ - if (sht->resume) - err = sht->resume(sdev); - - /* then, call HLD resume */ if (drv && drv->resume) - err2 = drv->resume(dev); + err = drv->resume(dev); scsi_device_resume(sdev); - /* favor LLD failure */ - return err ? err : err2;; + return err; } struct bus_type scsi_bus_type = { diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c linux-2.6.22-591/drivers/scsi/scsi_transport_fc.c --- linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_transport_fc.c 2007-12-21 15:36:12.000000000 -0500 @@ -19,9 +19,10 @@ * * ======== * - * Copyright (C) 2004-2005 James Smart, Emulex Corporation + * Copyright (C) 2004-2007 James Smart, Emulex Corporation * Rewrite for host, target, device, and remote port attributes, * statistics, and service functions... + * Add vports, etc * */ #include @@ -37,6 +38,34 @@ #include "scsi_priv.h" static int fc_queue_work(struct Scsi_Host *, struct work_struct *); +static void fc_vport_sched_delete(struct work_struct *work); + +/* + * This is a temporary carrier for creating a vport. It will eventually + * be replaced by a real message definition for sgio or netlink. + * + * fc_vport_identifiers: This set of data contains all elements + * to uniquely identify and instantiate a FC virtual port. + * + * Notes: + * symbolic_name: The driver is to append the symbolic_name string data + * to the symbolic_node_name data that it generates by default. + * the resulting combination should then be registered with the switch. + * It is expected that things like Xen may stuff a VM title into + * this field. + */ +struct fc_vport_identifiers { + u64 node_name; + u64 port_name; + u32 roles; + bool disable; + enum fc_port_type vport_type; /* only FC_PORTTYPE_NPIV allowed */ + char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN]; +}; + +static int fc_vport_create(struct Scsi_Host *shost, int channel, + struct device *pdev, struct fc_vport_identifiers *ids, + struct fc_vport **vport); /* * Redefine so that we can have same named attributes in the @@ -90,10 +119,14 @@ { FC_PORTTYPE_NLPORT, "NLPort (fabric via loop)" }, { FC_PORTTYPE_LPORT, "LPort (private loop)" }, { FC_PORTTYPE_PTP, "Point-To-Point (direct nport connection" }, + { FC_PORTTYPE_NPIV, "NPIV VPORT" }, }; fc_enum_name_search(port_type, fc_port_type, fc_port_type_names) #define FC_PORTTYPE_MAX_NAMELEN 50 +/* Reuse fc_port_type enum function for vport_type */ +#define get_fc_vport_type_name get_fc_port_type_name + /* Convert fc_host_event_code values to ascii string name */ static const struct { @@ -139,6 +172,29 @@ #define FC_PORTSTATE_MAX_NAMELEN 20 +/* Convert fc_vport_state values to ascii string name */ +static struct { + enum fc_vport_state value; + char *name; +} fc_vport_state_names[] = { + { FC_VPORT_UNKNOWN, "Unknown" }, + { FC_VPORT_ACTIVE, "Active" }, + { FC_VPORT_DISABLED, "Disabled" }, + { FC_VPORT_LINKDOWN, "Linkdown" }, + { FC_VPORT_INITIALIZING, "Initializing" }, + { FC_VPORT_NO_FABRIC_SUPP, "No Fabric Support" }, + { FC_VPORT_NO_FABRIC_RSCS, "No Fabric Resources" }, + { FC_VPORT_FABRIC_LOGOUT, "Fabric Logout" }, + { FC_VPORT_FABRIC_REJ_WWN, "Fabric Rejected WWN" }, + { FC_VPORT_FAILED, "VPort Failed" }, +}; +fc_enum_name_search(vport_state, fc_vport_state, fc_vport_state_names) +#define FC_VPORTSTATE_MAX_NAMELEN 24 + +/* Reuse fc_vport_state enum function for vport_last_state */ +#define get_fc_vport_last_state_name get_fc_vport_state_name + + /* Convert fc_tgtid_binding_type values to ascii string name */ static const struct { enum fc_tgtid_binding_type value; @@ -219,16 +275,16 @@ } -/* Convert FC_RPORT_ROLE bit values to ascii string name */ +/* Convert FC_PORT_ROLE bit values to ascii string name */ static const struct { u32 value; char *name; -} fc_remote_port_role_names[] = { - { FC_RPORT_ROLE_FCP_TARGET, "FCP Target" }, - { FC_RPORT_ROLE_FCP_INITIATOR, "FCP Initiator" }, - { FC_RPORT_ROLE_IP_PORT, "IP Port" }, +} fc_port_role_names[] = { + { FC_PORT_ROLE_FCP_TARGET, "FCP Target" }, + { FC_PORT_ROLE_FCP_INITIATOR, "FCP Initiator" }, + { FC_PORT_ROLE_IP_PORT, "IP Port" }, }; -fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names) +fc_bitfield_name_search(port_roles, fc_port_role_names) /* * Define roles that are specific to port_id. Values are relative to ROLE_MASK. @@ -252,7 +308,8 @@ */ #define FC_STARGET_NUM_ATTRS 3 #define FC_RPORT_NUM_ATTRS 10 -#define FC_HOST_NUM_ATTRS 17 +#define FC_VPORT_NUM_ATTRS 9 +#define FC_HOST_NUM_ATTRS 21 struct fc_internal { struct scsi_transport_template t; @@ -278,6 +335,10 @@ struct transport_container rport_attr_cont; struct class_device_attribute private_rport_attrs[FC_RPORT_NUM_ATTRS]; struct class_device_attribute *rport_attrs[FC_RPORT_NUM_ATTRS + 1]; + + struct transport_container vport_attr_cont; + struct class_device_attribute private_vport_attrs[FC_VPORT_NUM_ATTRS]; + struct class_device_attribute *vport_attrs[FC_VPORT_NUM_ATTRS + 1]; }; #define to_fc_internal(tmpl) container_of(tmpl, struct fc_internal, t) @@ -331,6 +392,7 @@ sizeof(fc_host->supported_fc4s)); fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN; fc_host->maxframe_size = -1; + fc_host->max_npiv_vports = 0; memset(fc_host->serial_number, 0, sizeof(fc_host->serial_number)); @@ -348,8 +410,11 @@ INIT_LIST_HEAD(&fc_host->rports); INIT_LIST_HEAD(&fc_host->rport_bindings); + INIT_LIST_HEAD(&fc_host->vports); fc_host->next_rport_number = 0; fc_host->next_target_id = 0; + fc_host->next_vport_number = 0; + fc_host->npiv_vports_inuse = 0; snprintf(fc_host->work_q_name, KOBJ_NAME_LEN, "fc_wq_%d", shost->host_no); @@ -388,6 +453,16 @@ NULL); /* + * Setup and Remove actions for virtual ports are handled + * in the service functions below. + */ +static DECLARE_TRANSPORT_CLASS(fc_vport_class, + "fc_vports", + NULL, + NULL, + NULL); + +/* * Module Parameters */ @@ -585,6 +660,9 @@ error = transport_class_register(&fc_host_class); if (error) return error; + error = transport_class_register(&fc_vport_class); + if (error) + return error; error = transport_class_register(&fc_rport_class); if (error) return error; @@ -596,6 +674,7 @@ transport_class_unregister(&fc_transport_class); transport_class_unregister(&fc_rport_class); transport_class_unregister(&fc_host_class); + transport_class_unregister(&fc_vport_class); } /* @@ -800,9 +879,9 @@ return snprintf(buf, 30, "Unknown Fabric Entity\n"); } } else { - if (rport->roles == FC_RPORT_ROLE_UNKNOWN) + if (rport->roles == FC_PORT_ROLE_UNKNOWN) return snprintf(buf, 20, "unknown\n"); - return get_fc_remote_port_roles_names(rport->roles, buf); + return get_fc_port_roles_names(rport->roles, buf); } } static FC_CLASS_DEVICE_ATTR(rport, roles, S_IRUGO, @@ -857,7 +936,7 @@ /* * Note: in the target show function we recognize when the remote - * port is in the hierarchy and do not allow the driver to get + * port is in the heirarchy and do not allow the driver to get * involved in sysfs functions. The driver only gets involved if * it's the "old" style that doesn't use rports. */ @@ -912,6 +991,257 @@ /* + * FC Virtual Port Attribute Management + */ + +#define fc_vport_show_function(field, format_string, sz, cast) \ +static ssize_t \ +show_fc_vport_##field (struct class_device *cdev, char *buf) \ +{ \ + struct fc_vport *vport = transport_class_to_vport(cdev); \ + struct Scsi_Host *shost = vport_to_shost(vport); \ + struct fc_internal *i = to_fc_internal(shost->transportt); \ + if ((i->f->get_vport_##field) && \ + !(vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))) \ + i->f->get_vport_##field(vport); \ + return snprintf(buf, sz, format_string, cast vport->field); \ +} + +#define fc_vport_store_function(field) \ +static ssize_t \ +store_fc_vport_##field(struct class_device *cdev, const char *buf, \ + size_t count) \ +{ \ + int val; \ + struct fc_vport *vport = transport_class_to_vport(cdev); \ + struct Scsi_Host *shost = vport_to_shost(vport); \ + struct fc_internal *i = to_fc_internal(shost->transportt); \ + char *cp; \ + if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) \ + return -EBUSY; \ + val = simple_strtoul(buf, &cp, 0); \ + if (*cp && (*cp != '\n')) \ + return -EINVAL; \ + i->f->set_vport_##field(vport, val); \ + return count; \ +} + +#define fc_vport_store_str_function(field, slen) \ +static ssize_t \ +store_fc_vport_##field(struct class_device *cdev, const char *buf, \ + size_t count) \ +{ \ + struct fc_vport *vport = transport_class_to_vport(cdev); \ + struct Scsi_Host *shost = vport_to_shost(vport); \ + struct fc_internal *i = to_fc_internal(shost->transportt); \ + unsigned int cnt=count; \ + \ + /* count may include a LF at end of string */ \ + if (buf[cnt-1] == '\n') \ + cnt--; \ + if (cnt > ((slen) - 1)) \ + return -EINVAL; \ + memcpy(vport->field, buf, cnt); \ + i->f->set_vport_##field(vport); \ + return count; \ +} + +#define fc_vport_rd_attr(field, format_string, sz) \ + fc_vport_show_function(field, format_string, sz, ) \ +static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ + show_fc_vport_##field, NULL) + +#define fc_vport_rd_attr_cast(field, format_string, sz, cast) \ + fc_vport_show_function(field, format_string, sz, (cast)) \ +static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ + show_fc_vport_##field, NULL) + +#define fc_vport_rw_attr(field, format_string, sz) \ + fc_vport_show_function(field, format_string, sz, ) \ + fc_vport_store_function(field) \ +static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR, \ + show_fc_vport_##field, \ + store_fc_vport_##field) + +#define fc_private_vport_show_function(field, format_string, sz, cast) \ +static ssize_t \ +show_fc_vport_##field (struct class_device *cdev, char *buf) \ +{ \ + struct fc_vport *vport = transport_class_to_vport(cdev); \ + return snprintf(buf, sz, format_string, cast vport->field); \ +} + +#define fc_private_vport_store_u32_function(field) \ +static ssize_t \ +store_fc_vport_##field(struct class_device *cdev, const char *buf, \ + size_t count) \ +{ \ + u32 val; \ + struct fc_vport *vport = transport_class_to_vport(cdev); \ + char *cp; \ + if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) \ + return -EBUSY; \ + val = simple_strtoul(buf, &cp, 0); \ + if (*cp && (*cp != '\n')) \ + return -EINVAL; \ + vport->field = val; \ + return count; \ +} + + +#define fc_private_vport_rd_attr(field, format_string, sz) \ + fc_private_vport_show_function(field, format_string, sz, ) \ +static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ + show_fc_vport_##field, NULL) + +#define fc_private_vport_rd_attr_cast(field, format_string, sz, cast) \ + fc_private_vport_show_function(field, format_string, sz, (cast)) \ +static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ + show_fc_vport_##field, NULL) + +#define fc_private_vport_rw_u32_attr(field, format_string, sz) \ + fc_private_vport_show_function(field, format_string, sz, ) \ + fc_private_vport_store_u32_function(field) \ +static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR, \ + show_fc_vport_##field, \ + store_fc_vport_##field) + + +#define fc_private_vport_rd_enum_attr(title, maxlen) \ +static ssize_t \ +show_fc_vport_##title (struct class_device *cdev, char *buf) \ +{ \ + struct fc_vport *vport = transport_class_to_vport(cdev); \ + const char *name; \ + name = get_fc_##title##_name(vport->title); \ + if (!name) \ + return -EINVAL; \ + return snprintf(buf, maxlen, "%s\n", name); \ +} \ +static FC_CLASS_DEVICE_ATTR(vport, title, S_IRUGO, \ + show_fc_vport_##title, NULL) + + +#define SETUP_VPORT_ATTRIBUTE_RD(field) \ + i->private_vport_attrs[count] = class_device_attr_vport_##field; \ + i->private_vport_attrs[count].attr.mode = S_IRUGO; \ + i->private_vport_attrs[count].store = NULL; \ + i->vport_attrs[count] = &i->private_vport_attrs[count]; \ + if (i->f->get_##field) \ + count++ + /* NOTE: Above MACRO differs: checks function not show bit */ + +#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(field) \ + i->private_vport_attrs[count] = class_device_attr_vport_##field; \ + i->private_vport_attrs[count].attr.mode = S_IRUGO; \ + i->private_vport_attrs[count].store = NULL; \ + i->vport_attrs[count] = &i->private_vport_attrs[count]; \ + count++ + +#define SETUP_VPORT_ATTRIBUTE_WR(field) \ + i->private_vport_attrs[count] = class_device_attr_vport_##field; \ + i->vport_attrs[count] = &i->private_vport_attrs[count]; \ + if (i->f->field) \ + count++ + /* NOTE: Above MACRO differs: checks function */ + +#define SETUP_VPORT_ATTRIBUTE_RW(field) \ + i->private_vport_attrs[count] = class_device_attr_vport_##field; \ + if (!i->f->set_vport_##field) { \ + i->private_vport_attrs[count].attr.mode = S_IRUGO; \ + i->private_vport_attrs[count].store = NULL; \ + } \ + i->vport_attrs[count] = &i->private_vport_attrs[count]; \ + count++ + /* NOTE: Above MACRO differs: does not check show bit */ + +#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RW(field) \ +{ \ + i->private_vport_attrs[count] = class_device_attr_vport_##field; \ + i->vport_attrs[count] = &i->private_vport_attrs[count]; \ + count++; \ +} + + +/* The FC Transport Virtual Port Attributes: */ + +/* Fixed Virtual Port Attributes */ + +/* Dynamic Virtual Port Attributes */ + +/* Private Virtual Port Attributes */ + +fc_private_vport_rd_enum_attr(vport_state, FC_VPORTSTATE_MAX_NAMELEN); +fc_private_vport_rd_enum_attr(vport_last_state, FC_VPORTSTATE_MAX_NAMELEN); +fc_private_vport_rd_attr_cast(node_name, "0x%llx\n", 20, unsigned long long); +fc_private_vport_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long); + +static ssize_t +show_fc_vport_roles (struct class_device *cdev, char *buf) +{ + struct fc_vport *vport = transport_class_to_vport(cdev); + + if (vport->roles == FC_PORT_ROLE_UNKNOWN) + return snprintf(buf, 20, "unknown\n"); + return get_fc_port_roles_names(vport->roles, buf); +} +static FC_CLASS_DEVICE_ATTR(vport, roles, S_IRUGO, show_fc_vport_roles, NULL); + +fc_private_vport_rd_enum_attr(vport_type, FC_PORTTYPE_MAX_NAMELEN); + +fc_private_vport_show_function(symbolic_name, "%s\n", + FC_VPORT_SYMBOLIC_NAMELEN + 1, ) +fc_vport_store_str_function(symbolic_name, FC_VPORT_SYMBOLIC_NAMELEN) +static FC_CLASS_DEVICE_ATTR(vport, symbolic_name, S_IRUGO | S_IWUSR, + show_fc_vport_symbolic_name, store_fc_vport_symbolic_name); + +static ssize_t +store_fc_vport_delete(struct class_device *cdev, const char *buf, + size_t count) +{ + struct fc_vport *vport = transport_class_to_vport(cdev); + struct Scsi_Host *shost = vport_to_shost(vport); + + fc_queue_work(shost, &vport->vport_delete_work); + return count; +} +static FC_CLASS_DEVICE_ATTR(vport, vport_delete, S_IWUSR, + NULL, store_fc_vport_delete); + + +/* + * Enable/Disable vport + * Write "1" to disable, write "0" to enable + */ +static ssize_t +store_fc_vport_disable(struct class_device *cdev, const char *buf, + size_t count) +{ + struct fc_vport *vport = transport_class_to_vport(cdev); + struct Scsi_Host *shost = vport_to_shost(vport); + struct fc_internal *i = to_fc_internal(shost->transportt); + int stat; + + if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) + return -EBUSY; + + if (*buf == '0') { + if (vport->vport_state != FC_VPORT_DISABLED) + return -EALREADY; + } else if (*buf == '1') { + if (vport->vport_state == FC_VPORT_DISABLED) + return -EALREADY; + } else + return -EINVAL; + + stat = i->f->vport_disable(vport, ((*buf == '0') ? false : true)); + return stat ? stat : count; +} +static FC_CLASS_DEVICE_ATTR(vport, vport_disable, S_IWUSR, + NULL, store_fc_vport_disable); + + +/* * Host Attribute Management */ @@ -1003,6 +1333,13 @@ if (i->f->show_host_##field) \ count++ +#define SETUP_HOST_ATTRIBUTE_RD_NS(field) \ + i->private_host_attrs[count] = class_device_attr_host_##field; \ + i->private_host_attrs[count].attr.mode = S_IRUGO; \ + i->private_host_attrs[count].store = NULL; \ + i->host_attrs[count] = &i->private_host_attrs[count]; \ + count++ + #define SETUP_HOST_ATTRIBUTE_RW(field) \ i->private_host_attrs[count] = class_device_attr_host_##field; \ if (!i->f->set_host_##field) { \ @@ -1090,6 +1427,7 @@ fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20, unsigned long long); fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20); +fc_private_host_rd_attr(max_npiv_vports, "%u\n", 20); fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1)); @@ -1210,6 +1548,9 @@ static FC_CLASS_DEVICE_ATTR(host, issue_lip, S_IWUSR, NULL, store_fc_private_host_issue_lip); +fc_private_host_rd_attr(npiv_vports_inuse, "%u\n", 20); + + /* * Host Statistics Management */ @@ -1285,7 +1626,6 @@ static FC_CLASS_DEVICE_ATTR(host, reset_statistics, S_IWUSR, NULL, fc_reset_statistics); - static struct attribute *fc_statistics_attrs[] = { &class_device_attr_host_seconds_since_last_reset.attr, &class_device_attr_host_tx_frames.attr, @@ -1316,6 +1656,142 @@ .attrs = fc_statistics_attrs, }; + +/* Host Vport Attributes */ + +static int +fc_parse_wwn(const char *ns, u64 *nm) +{ + unsigned int i, j; + u8 wwn[8]; + + memset(wwn, 0, sizeof(wwn)); + + /* Validate and store the new name */ + for (i=0, j=0; i < 16; i++) { + if ((*ns >= 'a') && (*ns <= 'f')) + j = ((j << 4) | ((*ns++ -'a') + 10)); + else if ((*ns >= 'A') && (*ns <= 'F')) + j = ((j << 4) | ((*ns++ -'A') + 10)); + else if ((*ns >= '0') && (*ns <= '9')) + j = ((j << 4) | (*ns++ -'0')); + else + return -EINVAL; + if (i % 2) { + wwn[i/2] = j & 0xff; + j = 0; + } + } + + *nm = wwn_to_u64(wwn); + + return 0; +} + + +/* + * "Short-cut" sysfs variable to create a new vport on a FC Host. + * Input is a string of the form ":". Other attributes + * will default to a NPIV-based FCP_Initiator; The WWNs are specified + * as hex characters, and may *not* contain any prefixes (e.g. 0x, x, etc) + */ +static ssize_t +store_fc_host_vport_create(struct class_device *cdev, const char *buf, + size_t count) +{ + struct Scsi_Host *shost = transport_class_to_shost(cdev); + struct fc_vport_identifiers vid; + struct fc_vport *vport; + unsigned int cnt=count; + int stat; + + memset(&vid, 0, sizeof(vid)); + + /* count may include a LF at end of string */ + if (buf[cnt-1] == '\n') + cnt--; + + /* validate we have enough characters for WWPN */ + if ((cnt != (16+1+16)) || (buf[16] != ':')) + return -EINVAL; + + stat = fc_parse_wwn(&buf[0], &vid.port_name); + if (stat) + return stat; + + stat = fc_parse_wwn(&buf[17], &vid.node_name); + if (stat) + return stat; + + vid.roles = FC_PORT_ROLE_FCP_INITIATOR; + vid.vport_type = FC_PORTTYPE_NPIV; + /* vid.symbolic_name is already zero/NULL's */ + vid.disable = false; /* always enabled */ + + /* we only allow support on Channel 0 !!! */ + stat = fc_vport_create(shost, 0, &shost->shost_gendev, &vid, &vport); + return stat ? stat : count; +} +static FC_CLASS_DEVICE_ATTR(host, vport_create, S_IWUSR, NULL, + store_fc_host_vport_create); + + +/* + * "Short-cut" sysfs variable to delete a vport on a FC Host. + * Vport is identified by a string containing ":". + * The WWNs are specified as hex characters, and may *not* contain + * any prefixes (e.g. 0x, x, etc) + */ +static ssize_t +store_fc_host_vport_delete(struct class_device *cdev, const char *buf, + size_t count) +{ + struct Scsi_Host *shost = transport_class_to_shost(cdev); + struct fc_host_attrs *fc_host = shost_to_fc_host(shost); + struct fc_vport *vport; + u64 wwpn, wwnn; + unsigned long flags; + unsigned int cnt=count; + int stat, match; + + /* count may include a LF at end of string */ + if (buf[cnt-1] == '\n') + cnt--; + + /* validate we have enough characters for WWPN */ + if ((cnt != (16+1+16)) || (buf[16] != ':')) + return -EINVAL; + + stat = fc_parse_wwn(&buf[0], &wwpn); + if (stat) + return stat; + + stat = fc_parse_wwn(&buf[17], &wwnn); + if (stat) + return stat; + + spin_lock_irqsave(shost->host_lock, flags); + match = 0; + /* we only allow support on Channel 0 !!! */ + list_for_each_entry(vport, &fc_host->vports, peers) { + if ((vport->channel == 0) && + (vport->port_name == wwpn) && (vport->node_name == wwnn)) { + match = 1; + break; + } + } + spin_unlock_irqrestore(shost->host_lock, flags); + + if (!match) + return -ENODEV; + + stat = fc_vport_terminate(vport); + return stat ? stat : count; +} +static FC_CLASS_DEVICE_ATTR(host, vport_delete, S_IWUSR, NULL, + store_fc_host_vport_delete); + + static int fc_host_match(struct attribute_container *cont, struct device *dev) { @@ -1387,6 +1863,40 @@ } +static void fc_vport_dev_release(struct device *dev) +{ + struct fc_vport *vport = dev_to_vport(dev); + put_device(dev->parent); /* release kobj parent */ + kfree(vport); +} + +int scsi_is_fc_vport(const struct device *dev) +{ + return dev->release == fc_vport_dev_release; +} +EXPORT_SYMBOL(scsi_is_fc_vport); + +static int fc_vport_match(struct attribute_container *cont, + struct device *dev) +{ + struct fc_vport *vport; + struct Scsi_Host *shost; + struct fc_internal *i; + + if (!scsi_is_fc_vport(dev)) + return 0; + vport = dev_to_vport(dev); + + shost = vport_to_shost(vport); + if (!shost->transportt || shost->transportt->host_attrs.ac.class + != &fc_host_class.class) + return 0; + + i = to_fc_internal(shost->transportt); + return &i->vport_attr_cont.ac == cont; +} + + /** * fc_timed_out - FC Transport I/O timeout intercept handler * @@ -1433,6 +1943,9 @@ if (rport->scsi_target_id == -1) continue; + if (rport->port_state != FC_PORTSTATE_ONLINE) + continue; + if ((channel == SCAN_WILD_CARD || channel == rport->channel) && (id == SCAN_WILD_CARD || id == rport->scsi_target_id)) { scsi_scan_target(&rport->dev, rport->channel, @@ -1472,6 +1985,11 @@ i->rport_attr_cont.ac.match = fc_rport_match; transport_container_register(&i->rport_attr_cont); + i->vport_attr_cont.ac.attrs = &i->vport_attrs[0]; + i->vport_attr_cont.ac.class = &fc_vport_class.class; + i->vport_attr_cont.ac.match = fc_vport_match; + transport_container_register(&i->vport_attr_cont); + i->f = ft; /* Transport uses the shost workq for scsi scanning */ @@ -1505,6 +2023,10 @@ SETUP_HOST_ATTRIBUTE_RD(supported_fc4s); SETUP_HOST_ATTRIBUTE_RD(supported_speeds); SETUP_HOST_ATTRIBUTE_RD(maxframe_size); + if (ft->vport_create) { + SETUP_HOST_ATTRIBUTE_RD_NS(max_npiv_vports); + SETUP_HOST_ATTRIBUTE_RD_NS(npiv_vports_inuse); + } SETUP_HOST_ATTRIBUTE_RD(serial_number); SETUP_HOST_ATTRIBUTE_RD(port_id); @@ -1520,6 +2042,10 @@ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type); if (ft->issue_fc_host_lip) SETUP_PRIVATE_HOST_ATTRIBUTE_RW(issue_lip); + if (ft->vport_create) + SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_create); + if (ft->vport_delete) + SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_delete); BUG_ON(count > FC_HOST_NUM_ATTRS); @@ -1545,6 +2071,24 @@ i->rport_attrs[count] = NULL; + /* + * Setup Virtual Port Attributes. + */ + count=0; + SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_state); + SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_last_state); + SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(node_name); + SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(port_name); + SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(roles); + SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_type); + SETUP_VPORT_ATTRIBUTE_RW(symbolic_name); + SETUP_VPORT_ATTRIBUTE_WR(vport_delete); + SETUP_VPORT_ATTRIBUTE_WR(vport_disable); + + BUG_ON(count > FC_VPORT_NUM_ATTRS); + + i->vport_attrs[count] = NULL; + return &i->t; } EXPORT_SYMBOL(fc_attach_transport); @@ -1556,6 +2100,7 @@ transport_container_unregister(&i->t.target_attrs); transport_container_unregister(&i->t.host_attrs); transport_container_unregister(&i->rport_attr_cont); + transport_container_unregister(&i->vport_attr_cont); kfree(i); } @@ -1667,9 +2212,17 @@ void fc_remove_host(struct Scsi_Host *shost) { - struct fc_rport *rport, *next_rport; + struct fc_vport *vport = NULL, *next_vport = NULL; + struct fc_rport *rport = NULL, *next_rport = NULL; struct workqueue_struct *work_q; struct fc_host_attrs *fc_host = shost_to_fc_host(shost); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + + /* Remove any vports */ + list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers) + fc_queue_work(shost, &vport->vport_delete_work); /* Remove any remote ports */ list_for_each_entry_safe(rport, next_rport, @@ -1686,6 +2239,8 @@ fc_queue_work(shost, &rport->rport_delete_work); } + spin_unlock_irqrestore(shost->host_lock, flags); + /* flush all scan work items */ scsi_flush_work(shost); @@ -1844,7 +2399,7 @@ spin_lock_irqsave(shost->host_lock, flags); rport->number = fc_host->next_rport_number++; - if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) + if (rport->roles & FC_PORT_ROLE_FCP_TARGET) rport->scsi_target_id = fc_host->next_target_id++; else rport->scsi_target_id = -1; @@ -1869,7 +2424,7 @@ transport_add_device(dev); transport_configure_device(dev); - if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) { + if (rport->roles & FC_PORT_ROLE_FCP_TARGET) { /* initiate a scan of the target */ rport->flags |= FC_RPORT_SCAN_PENDING; scsi_queue_work(shost, &rport->scan_work); @@ -2003,7 +2558,7 @@ /* was a target, not in roles */ if ((rport->scsi_target_id != -1) && - (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET))) + (!(ids->roles & FC_PORT_ROLE_FCP_TARGET))) return rport; /* @@ -2086,7 +2641,7 @@ memset(rport->dd_data, 0, fci->f->dd_fcrport_size); - if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) { + if (rport->roles & FC_PORT_ROLE_FCP_TARGET) { /* initiate a scan of the target */ rport->flags |= FC_RPORT_SCAN_PENDING; scsi_queue_work(shost, &rport->scan_work); @@ -2243,11 +2798,11 @@ int create = 0; spin_lock_irqsave(shost->host_lock, flags); - if (roles & FC_RPORT_ROLE_FCP_TARGET) { + if (roles & FC_PORT_ROLE_FCP_TARGET) { if (rport->scsi_target_id == -1) { rport->scsi_target_id = fc_host->next_target_id++; create = 1; - } else if (!(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) + } else if (!(rport->roles & FC_PORT_ROLE_FCP_TARGET)) create = 1; } @@ -2317,7 +2872,7 @@ */ if ((rport->port_state == FC_PORTSTATE_ONLINE) && (rport->scsi_target_id != -1) && - !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) { + !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) { dev_printk(KERN_ERR, &rport->dev, "blocked FC remote port time out: no longer" " a FCP target, removing starget\n"); @@ -2367,7 +2922,7 @@ */ rport->maxframe_size = -1; rport->supported_classes = FC_COS_UNSPECIFIED; - rport->roles = FC_RPORT_ROLE_UNKNOWN; + rport->roles = FC_PORT_ROLE_UNKNOWN; rport->port_state = FC_PORTSTATE_NOTPRESENT; /* remove the identifiers that aren't used in the consisting binding */ @@ -2436,7 +2991,7 @@ unsigned long flags; if ((rport->port_state == FC_PORTSTATE_ONLINE) && - (rport->roles & FC_RPORT_ROLE_FCP_TARGET)) { + (rport->roles & FC_PORT_ROLE_FCP_TARGET)) { scsi_scan_target(&rport->dev, rport->channel, rport->scsi_target_id, SCAN_WILD_CARD, 1); } @@ -2447,7 +3002,227 @@ } -MODULE_AUTHOR("Martin Hicks"); +/** + * fc_vport_create - allocates and creates a FC virtual port. + * @shost: scsi host the virtual port is connected to. + * @channel: Channel on shost port connected to. + * @pdev: parent device for vport + * @ids: The world wide names, FC4 port roles, etc for + * the virtual port. + * @ret_vport: The pointer to the created vport. + * + * Allocates and creates the vport structure, calls the parent host + * to instantiate the vport, the completes w/ class and sysfs creation. + * + * Notes: + * This routine assumes no locks are held on entry. + **/ +static int +fc_vport_create(struct Scsi_Host *shost, int channel, struct device *pdev, + struct fc_vport_identifiers *ids, struct fc_vport **ret_vport) +{ + struct fc_host_attrs *fc_host = shost_to_fc_host(shost); + struct fc_internal *fci = to_fc_internal(shost->transportt); + struct fc_vport *vport; + struct device *dev; + unsigned long flags; + size_t size; + int error; + + *ret_vport = NULL; + + if ( ! fci->f->vport_create) + return -ENOENT; + + size = (sizeof(struct fc_vport) + fci->f->dd_fcvport_size); + vport = kzalloc(size, GFP_KERNEL); + if (unlikely(!vport)) { + printk(KERN_ERR "%s: allocation failure\n", __FUNCTION__); + return -ENOMEM; + } + + vport->vport_state = FC_VPORT_UNKNOWN; + vport->vport_last_state = FC_VPORT_UNKNOWN; + vport->node_name = ids->node_name; + vport->port_name = ids->port_name; + vport->roles = ids->roles; + vport->vport_type = ids->vport_type; + if (fci->f->dd_fcvport_size) + vport->dd_data = &vport[1]; + vport->shost = shost; + vport->channel = channel; + vport->flags = FC_VPORT_CREATING; + INIT_WORK(&vport->vport_delete_work, fc_vport_sched_delete); + + spin_lock_irqsave(shost->host_lock, flags); + + if (fc_host->npiv_vports_inuse >= fc_host->max_npiv_vports) { + spin_unlock_irqrestore(shost->host_lock, flags); + kfree(vport); + return -ENOSPC; + } + fc_host->npiv_vports_inuse++; + vport->number = fc_host->next_vport_number++; + list_add_tail(&vport->peers, &fc_host->vports); + get_device(&shost->shost_gendev); /* for fc_host->vport list */ + + spin_unlock_irqrestore(shost->host_lock, flags); + + dev = &vport->dev; + device_initialize(dev); /* takes self reference */ + dev->parent = get_device(pdev); /* takes parent reference */ + dev->release = fc_vport_dev_release; + sprintf(dev->bus_id, "vport-%d:%d-%d", + shost->host_no, channel, vport->number); + transport_setup_device(dev); + + error = device_add(dev); + if (error) { + printk(KERN_ERR "FC Virtual Port device_add failed\n"); + goto delete_vport; + } + transport_add_device(dev); + transport_configure_device(dev); + + error = fci->f->vport_create(vport, ids->disable); + if (error) { + printk(KERN_ERR "FC Virtual Port LLDD Create failed\n"); + goto delete_vport_all; + } + + /* + * if the parent isn't the physical adapter's Scsi_Host, ensure + * the Scsi_Host at least contains ia symlink to the vport. + */ + if (pdev != &shost->shost_gendev) { + error = sysfs_create_link(&shost->shost_gendev.kobj, + &dev->kobj, dev->bus_id); + if (error) + printk(KERN_ERR + "%s: Cannot create vport symlinks for " + "%s, err=%d\n", + __FUNCTION__, dev->bus_id, error); + } + spin_lock_irqsave(shost->host_lock, flags); + vport->flags &= ~FC_VPORT_CREATING; + spin_unlock_irqrestore(shost->host_lock, flags); + + dev_printk(KERN_NOTICE, pdev, + "%s created via shost%d channel %d\n", dev->bus_id, + shost->host_no, channel); + + *ret_vport = vport; + + return 0; + +delete_vport_all: + transport_remove_device(dev); + device_del(dev); +delete_vport: + transport_destroy_device(dev); + spin_lock_irqsave(shost->host_lock, flags); + list_del(&vport->peers); + put_device(&shost->shost_gendev); /* for fc_host->vport list */ + fc_host->npiv_vports_inuse--; + spin_unlock_irqrestore(shost->host_lock, flags); + put_device(dev->parent); + kfree(vport); + + return error; +} + + +/** + * fc_vport_terminate - Admin App or LLDD requests termination of a vport + * @vport: fc_vport to be terminated + * + * Calls the LLDD vport_delete() function, then deallocates and removes + * the vport from the shost and object tree. + * + * Notes: + * This routine assumes no locks are held on entry. + **/ +int +fc_vport_terminate(struct fc_vport *vport) +{ + struct Scsi_Host *shost = vport_to_shost(vport); + struct fc_host_attrs *fc_host = shost_to_fc_host(shost); + struct fc_internal *i = to_fc_internal(shost->transportt); + struct device *dev = &vport->dev; + unsigned long flags; + int stat; + + spin_lock_irqsave(shost->host_lock, flags); + if (vport->flags & FC_VPORT_CREATING) { + spin_unlock_irqrestore(shost->host_lock, flags); + return -EBUSY; + } + if (vport->flags & (FC_VPORT_DEL)) { + spin_unlock_irqrestore(shost->host_lock, flags); + return -EALREADY; + } + vport->flags |= FC_VPORT_DELETING; + spin_unlock_irqrestore(shost->host_lock, flags); + + if (i->f->vport_delete) + stat = i->f->vport_delete(vport); + else + stat = -ENOENT; + + spin_lock_irqsave(shost->host_lock, flags); + vport->flags &= ~FC_VPORT_DELETING; + if (!stat) { + vport->flags |= FC_VPORT_DELETED; + list_del(&vport->peers); + fc_host->npiv_vports_inuse--; + put_device(&shost->shost_gendev); /* for fc_host->vport list */ + } + spin_unlock_irqrestore(shost->host_lock, flags); + + if (stat) + return stat; + + if (dev->parent != &shost->shost_gendev) + sysfs_remove_link(&shost->shost_gendev.kobj, dev->bus_id); + transport_remove_device(dev); + device_del(dev); + transport_destroy_device(dev); + + /* + * Removing our self-reference should mean our + * release function gets called, which will drop the remaining + * parent reference and free the data structure. + */ + put_device(dev); /* for self-reference */ + + return 0; /* SUCCESS */ +} +EXPORT_SYMBOL(fc_vport_terminate); + +/** + * fc_vport_sched_delete - workq-based delete request for a vport + * + * @work: vport to be deleted. + **/ +static void +fc_vport_sched_delete(struct work_struct *work) +{ + struct fc_vport *vport = + container_of(work, struct fc_vport, vport_delete_work); + int stat; + + stat = fc_vport_terminate(vport); + if (stat) + dev_printk(KERN_ERR, vport->dev.parent, + "%s: %s could not be deleted created via " + "shost%d channel %d - error %d\n", __FUNCTION__, + vport->dev.bus_id, vport->shost->host_no, + vport->channel, stat); +} + + +/* Original Author: Martin Hicks */ +MODULE_AUTHOR("James Smart"); MODULE_DESCRIPTION("FC Transport Attributes"); MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c linux-2.6.22-591/drivers/scsi/scsi_transport_iscsi.c --- linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/scsi_transport_iscsi.c 2007-12-21 15:36:14.000000000 -0500 @@ -30,9 +30,9 @@ #include #include -#define ISCSI_SESSION_ATTRS 11 +#define ISCSI_SESSION_ATTRS 15 #define ISCSI_CONN_ATTRS 11 -#define ISCSI_HOST_ATTRS 0 +#define ISCSI_HOST_ATTRS 4 #define ISCSI_TRANSPORT_VERSION "2.0-724" struct iscsi_internal { @@ -609,12 +609,10 @@ int t = done ? NLMSG_DONE : type; skb = alloc_skb(len, GFP_ATOMIC); - /* - * FIXME: - * user is supposed to react on iferror == -ENOMEM; - * see iscsi_if_rx(). - */ - BUG_ON(!skb); + if (!skb) { + printk(KERN_ERR "Could not allocate skb to send reply.\n"); + return -ENOMEM; + } nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0); nlh->nlmsg_flags = flags; @@ -816,6 +814,8 @@ uint32_t hostno; session = transport->create_session(transport, &priv->t, + ev->u.c_session.cmds_max, + ev->u.c_session.queue_depth, ev->u.c_session.initial_cmdsn, &hostno); if (!session) @@ -947,15 +947,50 @@ iscsi_tgt_dscvr(struct iscsi_transport *transport, struct iscsi_uevent *ev) { + struct Scsi_Host *shost; struct sockaddr *dst_addr; + int err; if (!transport->tgt_dscvr) return -EINVAL; + shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no); + if (IS_ERR(shost)) { + printk(KERN_ERR "target discovery could not find host no %u\n", + ev->u.tgt_dscvr.host_no); + return -ENODEV; + } + + dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); - return transport->tgt_dscvr(ev->u.tgt_dscvr.type, - ev->u.tgt_dscvr.host_no, + err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type, ev->u.tgt_dscvr.enable, dst_addr); + scsi_host_put(shost); + return err; +} + +static int +iscsi_set_host_param(struct iscsi_transport *transport, + struct iscsi_uevent *ev) +{ + char *data = (char*)ev + sizeof(*ev); + struct Scsi_Host *shost; + int err; + + if (!transport->set_host_param) + return -ENOSYS; + + shost = scsi_host_lookup(ev->u.set_host_param.host_no); + if (IS_ERR(shost)) { + printk(KERN_ERR "set_host_param could not find host no %u\n", + ev->u.set_host_param.host_no); + return -ENODEV; + } + + err = transport->set_host_param(shost, ev->u.set_host_param.param, + data, ev->u.set_host_param.len); + scsi_host_put(shost); + return err; } static int @@ -1049,8 +1084,11 @@ case ISCSI_UEVENT_TGT_DSCVR: err = iscsi_tgt_dscvr(transport, ev); break; + case ISCSI_UEVENT_SET_HOST_PARAM: + err = iscsi_set_host_param(transport, ev); + break; default: - err = -EINVAL; + err = -ENOSYS; break; } @@ -1160,30 +1198,37 @@ /* * iSCSI session attrs */ -#define iscsi_session_attr_show(param) \ +#define iscsi_session_attr_show(param, perm) \ static ssize_t \ show_session_param_##param(struct class_device *cdev, char *buf) \ { \ struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev); \ struct iscsi_transport *t = session->transport; \ + \ + if (perm && !capable(CAP_SYS_ADMIN)) \ + return -EACCES; \ return t->get_session_param(session, param, buf); \ } -#define iscsi_session_attr(field, param) \ - iscsi_session_attr_show(param) \ +#define iscsi_session_attr(field, param, perm) \ + iscsi_session_attr_show(param, perm) \ static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \ NULL); -iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME); -iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN); -iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T); -iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN); -iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST); -iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST); -iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN); -iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN); -iscsi_session_attr(erl, ISCSI_PARAM_ERL); -iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT); +iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME, 0); +iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, 0); +iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, 0); +iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, 0); +iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, 0); +iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, 0); +iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, 0); +iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, 0); +iscsi_session_attr(erl, ISCSI_PARAM_ERL, 0); +iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT, 0); +iscsi_session_attr(username, ISCSI_PARAM_USERNAME, 1); +iscsi_session_attr(username_in, ISCSI_PARAM_USERNAME_IN, 1); +iscsi_session_attr(password, ISCSI_PARAM_PASSWORD, 1); +iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ @@ -1199,6 +1244,28 @@ NULL) iscsi_priv_session_attr(recovery_tmo, "%d"); +/* + * iSCSI host attrs + */ +#define iscsi_host_attr_show(param) \ +static ssize_t \ +show_host_param_##param(struct class_device *cdev, char *buf) \ +{ \ + struct Scsi_Host *shost = transport_class_to_shost(cdev); \ + struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); \ + return priv->iscsi_transport->get_host_param(shost, param, buf); \ +} + +#define iscsi_host_attr(field, param) \ + iscsi_host_attr_show(param) \ +static ISCSI_CLASS_ATTR(host, field, S_IRUGO, show_host_param_##param, \ + NULL); + +iscsi_host_attr(netdev, ISCSI_HOST_PARAM_NETDEV_NAME); +iscsi_host_attr(hwaddress, ISCSI_HOST_PARAM_HWADDRESS); +iscsi_host_attr(ipaddress, ISCSI_HOST_PARAM_IPADDRESS); +iscsi_host_attr(initiatorname, ISCSI_HOST_PARAM_INITIATOR_NAME); + #define SETUP_PRIV_SESSION_RD_ATTR(field) \ do { \ priv->session_attrs[count] = &class_device_attr_priv_sess_##field; \ @@ -1222,6 +1289,14 @@ } \ } while (0) +#define SETUP_HOST_RD_ATTR(field, param_flag) \ +do { \ + if (tt->host_param_mask & param_flag) { \ + priv->host_attrs[count] = &class_device_attr_host_##field; \ + count++; \ + } \ +} while (0) + static int iscsi_session_match(struct attribute_container *cont, struct device *dev) { @@ -1323,9 +1398,16 @@ priv->t.host_attrs.ac.class = &iscsi_host_class.class; priv->t.host_attrs.ac.match = iscsi_host_match; priv->t.host_size = sizeof(struct iscsi_host); - priv->host_attrs[0] = NULL; transport_container_register(&priv->t.host_attrs); + SETUP_HOST_RD_ATTR(netdev, ISCSI_HOST_NETDEV_NAME); + SETUP_HOST_RD_ATTR(ipaddress, ISCSI_HOST_IPADDRESS); + SETUP_HOST_RD_ATTR(hwaddress, ISCSI_HOST_HWADDRESS); + SETUP_HOST_RD_ATTR(initiatorname, ISCSI_HOST_INITIATOR_NAME); + BUG_ON(count > ISCSI_HOST_ATTRS); + priv->host_attrs[count] = NULL; + count = 0; + /* connection parameters */ priv->conn_cont.ac.attrs = &priv->conn_attrs[0]; priv->conn_cont.ac.class = &iscsi_connection_class.class; @@ -1364,6 +1446,10 @@ SETUP_SESSION_RD_ATTR(erl, ISCSI_ERL); SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME); SETUP_SESSION_RD_ATTR(tpgt, ISCSI_TPGT); + SETUP_SESSION_RD_ATTR(password, ISCSI_USERNAME); + SETUP_SESSION_RD_ATTR(password_in, ISCSI_USERNAME_IN); + SETUP_SESSION_RD_ATTR(username, ISCSI_PASSWORD); + SETUP_SESSION_RD_ATTR(username_in, ISCSI_PASSWORD_IN); SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo); BUG_ON(count > ISCSI_SESSION_ATTRS); @@ -1437,7 +1523,7 @@ if (err) goto unregister_conn_class; - nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL, + nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, 1, iscsi_if_rx, NULL, THIS_MODULE); if (!nls) { err = -ENOBUFS; diff -Nurb linux-2.6.22-570/drivers/scsi/sd.c linux-2.6.22-591/drivers/scsi/sd.c --- linux-2.6.22-570/drivers/scsi/sd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/sd.c 2007-12-21 15:36:12.000000000 -0500 @@ -1515,7 +1515,7 @@ if (!scsi_device_online(sdp)) goto out; - buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA); + buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); if (!buffer) { sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " "allocation failure.\n"); diff -Nurb linux-2.6.22-570/drivers/scsi/sg.c linux-2.6.22-591/drivers/scsi/sg.c --- linux-2.6.22-570/drivers/scsi/sg.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/sg.c 2007-12-21 15:36:12.000000000 -0500 @@ -1842,7 +1842,7 @@ int blk_size = buff_size; struct page *p = NULL; - if ((blk_size < 0) || (!sfp)) + if (blk_size < 0) return -EFAULT; if (0 == blk_size) ++blk_size; /* don't know why */ diff -Nurb linux-2.6.22-570/drivers/scsi/stex.c linux-2.6.22-591/drivers/scsi/stex.c --- linux-2.6.22-570/drivers/scsi/stex.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/stex.c 2007-12-21 15:36:12.000000000 -0500 @@ -395,53 +395,34 @@ static int stex_map_sg(struct st_hba *hba, struct req_msg *req, struct st_ccb *ccb) { - struct pci_dev *pdev = hba->pdev; struct scsi_cmnd *cmd; - dma_addr_t dma_handle; - struct scatterlist *src; + struct scatterlist *sg; struct st_sgtable *dst; - int i; + int i, nseg; cmd = ccb->cmd; dst = (struct st_sgtable *)req->variable; dst->max_sg_count = cpu_to_le16(ST_MAX_SG); - dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen); - - if (cmd->use_sg) { - int n_elem; + dst->sz_in_byte = cpu_to_le32(scsi_bufflen(cmd)); - src = (struct scatterlist *) cmd->request_buffer; - n_elem = pci_map_sg(pdev, src, - cmd->use_sg, cmd->sc_data_direction); - if (n_elem <= 0) + nseg = scsi_dma_map(cmd); + if (nseg < 0) return -EIO; + if (nseg) { + ccb->sg_count = nseg; + dst->sg_count = cpu_to_le16((u16)nseg); - ccb->sg_count = n_elem; - dst->sg_count = cpu_to_le16((u16)n_elem); - - for (i = 0; i < n_elem; i++, src++) { - dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src)); + scsi_for_each_sg(cmd, sg, nseg, i) { + dst->table[i].count = cpu_to_le32((u32)sg_dma_len(sg)); dst->table[i].addr = - cpu_to_le32(sg_dma_address(src) & 0xffffffff); + cpu_to_le32(sg_dma_address(sg) & 0xffffffff); dst->table[i].addr_hi = - cpu_to_le32((sg_dma_address(src) >> 16) >> 16); + cpu_to_le32((sg_dma_address(sg) >> 16) >> 16); dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST; } dst->table[--i].ctrl |= SG_CF_EOT; - return 0; } - dma_handle = pci_map_single(pdev, cmd->request_buffer, - cmd->request_bufflen, cmd->sc_data_direction); - cmd->SCp.dma_handle = dma_handle; - - ccb->sg_count = 1; - dst->sg_count = cpu_to_le16(1); - dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff); - dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16); - dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen); - dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST; - return 0; } @@ -451,24 +432,24 @@ size_t lcount; size_t len; void *s, *d, *base = NULL; - if (*count > cmd->request_bufflen) - *count = cmd->request_bufflen; + size_t offset; + + if (*count > scsi_bufflen(cmd)) + *count = scsi_bufflen(cmd); lcount = *count; while (lcount) { len = lcount; s = (void *)src; - if (cmd->use_sg) { - size_t offset = *count - lcount; + + offset = *count - lcount; s += offset; - base = scsi_kmap_atomic_sg(cmd->request_buffer, + base = scsi_kmap_atomic_sg(scsi_sglist(cmd), sg_count, &offset, &len); - if (base == NULL) { + if (!base) { *count -= lcount; return; } d = base + offset; - } else - d = cmd->request_buffer; if (direction == ST_TO_CMD) memcpy(d, s, len); @@ -476,7 +457,6 @@ memcpy(s, d, len); lcount -= len; - if (cmd->use_sg) scsi_kunmap_atomic_sg(base); } } @@ -484,22 +464,17 @@ static int stex_direct_copy(struct scsi_cmnd *cmd, const void *src, size_t count) { - struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0]; size_t cp_len = count; int n_elem = 0; - if (cmd->use_sg) { - n_elem = pci_map_sg(hba->pdev, cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); - if (n_elem <= 0) + n_elem = scsi_dma_map(cmd); + if (n_elem < 0) return 0; - } stex_internal_copy(cmd, src, &cp_len, n_elem, ST_TO_CMD); - if (cmd->use_sg) - pci_unmap_sg(hba->pdev, cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); + scsi_dma_unmap(cmd); + return cp_len == count; } @@ -678,18 +653,6 @@ return 0; } -static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd) -{ - if (cmd->sc_data_direction != DMA_NONE) { - if (cmd->use_sg) - pci_unmap_sg(hba->pdev, cmd->request_buffer, - cmd->use_sg, cmd->sc_data_direction); - else - pci_unmap_single(hba->pdev, cmd->SCp.dma_handle, - cmd->request_bufflen, cmd->sc_data_direction); - } -} - static void stex_scsi_done(struct st_ccb *ccb) { struct scsi_cmnd *cmd = ccb->cmd; @@ -756,7 +719,7 @@ if (ccb->cmd->cmnd[0] == MGT_CMD && resp->scsi_status != SAM_STAT_CHECK_CONDITION) { - ccb->cmd->request_bufflen = + scsi_bufflen(ccb->cmd) = le32_to_cpu(*(__le32 *)&resp->variable[0]); return; } @@ -855,7 +818,7 @@ ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER)) stex_controller_info(hba, ccb); - stex_unmap_sg(hba, ccb->cmd); + scsi_dma_unmap(ccb->cmd); stex_scsi_done(ccb); hba->out_req_cnt--; } else if (ccb->req_type & PASSTHRU_REQ_TYPE) { @@ -1028,7 +991,7 @@ } fail_out: - stex_unmap_sg(hba, cmd); + scsi_dma_unmap(cmd); hba->wait_ccb->req = NULL; /* nullify the req's future return */ hba->wait_ccb = NULL; result = FAILED; diff -Nurb linux-2.6.22-570/drivers/scsi/sun_esp.c linux-2.6.22-591/drivers/scsi/sun_esp.c --- linux-2.6.22-570/drivers/scsi/sun_esp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/sun_esp.c 2007-12-21 15:36:12.000000000 -0500 @@ -493,7 +493,7 @@ goto fail; host->max_id = (hme ? 16 : 8); - esp = host_to_esp(host); + esp = shost_priv(host); esp->host = host; esp->dev = esp_dev; diff -Nurb linux-2.6.22-570/drivers/scsi/sym53c416.c linux-2.6.22-591/drivers/scsi/sym53c416.c --- linux-2.6.22-570/drivers/scsi/sym53c416.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/sym53c416.c 2007-12-21 15:36:12.000000000 -0500 @@ -332,8 +332,7 @@ int i; unsigned long flags = 0; unsigned char status_reg, pio_int_reg, int_reg; - struct scatterlist *sglist; - unsigned int sgcount; + struct scatterlist *sg; unsigned int tot_trans = 0; /* We search the base address of the host adapter which caused the interrupt */ @@ -429,19 +428,15 @@ { current_command->SCp.phase = data_out; outb(FLUSH_FIFO, base + COMMAND_REG); - sym53c416_set_transfer_counter(base, current_command->request_bufflen); + sym53c416_set_transfer_counter(base, + scsi_bufflen(current_command)); outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG); - if(!current_command->use_sg) - tot_trans = sym53c416_write(base, current_command->request_buffer, current_command->request_bufflen); - else - { - sgcount = current_command->use_sg; - sglist = current_command->request_buffer; - while(sgcount--) - { - tot_trans += sym53c416_write(base, SG_ADDRESS(sglist), sglist->length); - sglist++; - } + + scsi_for_each_sg(current_command, + sg, scsi_sg_count(current_command), i) { + tot_trans += sym53c416_write(base, + SG_ADDRESS(sg), + sg->length); } if(tot_trans < current_command->underflow) printk(KERN_WARNING "sym53c416: Underflow, wrote %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow); @@ -455,19 +450,16 @@ { current_command->SCp.phase = data_in; outb(FLUSH_FIFO, base + COMMAND_REG); - sym53c416_set_transfer_counter(base, current_command->request_bufflen); + sym53c416_set_transfer_counter(base, + scsi_bufflen(current_command)); + outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG); - if(!current_command->use_sg) - tot_trans = sym53c416_read(base, current_command->request_buffer, current_command->request_bufflen); - else - { - sgcount = current_command->use_sg; - sglist = current_command->request_buffer; - while(sgcount--) - { - tot_trans += sym53c416_read(base, SG_ADDRESS(sglist), sglist->length); - sglist++; - } + + scsi_for_each_sg(current_command, + sg, scsi_sg_count(current_command), i) { + tot_trans += sym53c416_read(base, + SG_ADDRESS(sg), + sg->length); } if(tot_trans < current_command->underflow) printk(KERN_WARNING "sym53c416: Underflow, read %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow); diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.c linux-2.6.22-591/drivers/scsi/tmscsim.c --- linux-2.6.22-570/drivers/scsi/tmscsim.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/tmscsim.c 2007-12-21 15:36:12.000000000 -0500 @@ -457,27 +457,20 @@ error = 1; DEBUG1(printk("%s(): Mapped sense buffer %p at %x\n", __FUNCTION__, pcmd->sense_buffer, cmdp->saved_dma_handle)); /* Map SG list */ - } else if (pcmd->use_sg) { - pSRB->pSegmentList = (struct scatterlist *) pcmd->request_buffer; - pSRB->SGcount = pci_map_sg(pdev, pSRB->pSegmentList, pcmd->use_sg, - pcmd->sc_data_direction); + } else if (scsi_sg_count(pcmd)) { + int nseg; + + nseg = scsi_dma_map(pcmd); + + pSRB->pSegmentList = scsi_sglist(pcmd); + pSRB->SGcount = nseg; + /* TODO: error handling */ - if (!pSRB->SGcount) + if (nseg < 0) error = 1; DEBUG1(printk("%s(): Mapped SG %p with %d (%d) elements\n",\ - __FUNCTION__, pcmd->request_buffer, pSRB->SGcount, pcmd->use_sg)); + __FUNCTION__, scsi_sglist(pcmd), nseg, scsi_sg_count(pcmd))); /* Map single segment */ - } else if (pcmd->request_buffer && pcmd->request_bufflen) { - pSRB->pSegmentList = dc390_sg_build_single(&pSRB->Segmentx, pcmd->request_buffer, pcmd->request_bufflen); - pSRB->SGcount = pci_map_sg(pdev, pSRB->pSegmentList, 1, - pcmd->sc_data_direction); - cmdp->saved_dma_handle = sg_dma_address(pSRB->pSegmentList); - - /* TODO: error handling */ - if (pSRB->SGcount != 1) - error = 1; - DEBUG1(printk("%s(): Mapped request buffer %p at %x\n", __FUNCTION__, pcmd->request_buffer, cmdp->saved_dma_handle)); - /* No mapping !? */ } else pSRB->SGcount = 0; @@ -494,12 +487,10 @@ if (pSRB->SRBFlag) { pci_unmap_sg(pdev, &pSRB->Segmentx, 1, DMA_FROM_DEVICE); DEBUG1(printk("%s(): Unmapped sense buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle)); - } else if (pcmd->use_sg) { - pci_unmap_sg(pdev, pcmd->request_buffer, pcmd->use_sg, pcmd->sc_data_direction); - DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n", __FUNCTION__, pcmd->request_buffer, pcmd->use_sg)); - } else if (pcmd->request_buffer && pcmd->request_bufflen) { - pci_unmap_sg(pdev, &pSRB->Segmentx, 1, pcmd->sc_data_direction); - DEBUG1(printk("%s(): Unmapped request buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle)); + } else { + scsi_dma_unmap(pcmd); + DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n", + __FUNCTION__, scsi_sglist(pcmd), scsi_sg_count(pcmd))); } } @@ -1153,9 +1144,9 @@ struct scatterlist *psgl; pSRB->TotalXferredLen = 0; pSRB->SGIndex = 0; - if (pcmd->use_sg) { + if (scsi_sg_count(pcmd)) { size_t saved; - pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer; + pSRB->pSegmentList = scsi_sglist(pcmd); psgl = pSRB->pSegmentList; //dc390_pci_sync(pSRB); @@ -1179,12 +1170,6 @@ printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n", pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr); - } else if(pcmd->request_buffer) { - //dc390_pci_sync(pSRB); - - sg_dma_len(&pSRB->Segmentx) = pcmd->request_bufflen - pSRB->Saved_Ptr; - pSRB->SGcount = 1; - pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx; } else { pSRB->SGcount = 0; printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n"); @@ -1579,7 +1564,8 @@ if( (pSRB->SRBState & (SRB_START_+SRB_MSGOUT)) || !(pSRB->SRBState & (SRB_DISCONNECT+SRB_COMPLETED)) ) { /* Selection time out */ - pSRB->TargetStatus = SCSI_STAT_SEL_TIMEOUT; + pSRB->AdaptStatus = H_SEL_TIMEOUT; + pSRB->TargetStatus = 0; goto disc1; } else if (!(pSRB->SRBState & SRB_DISCONNECT) && (pSRB->SRBState & SRB_COMPLETED)) @@ -1612,7 +1598,7 @@ if( !( pACB->scan_devices ) ) { struct scsi_cmnd *pcmd = pSRB->pcmd; - pcmd->resid = pcmd->request_bufflen; + scsi_set_resid(pcmd, scsi_bufflen(pcmd)); SET_RES_DID(pcmd->result, DID_SOFT_ERROR); dc390_Going_remove(pDCB, pSRB); dc390_Free_insert(pACB, pSRB); @@ -1695,7 +1681,7 @@ pcmd->cmnd[0], pDCB->TargetID, pDCB->TargetLUN)); pSRB->SRBFlag |= AUTO_REQSENSE; - pSRB->SavedSGCount = pcmd->use_sg; + pSRB->SavedSGCount = scsi_sg_count(pcmd); pSRB->SavedTotXLen = pSRB->TotalXferredLen; pSRB->AdaptStatus = 0; pSRB->TargetStatus = 0; /* CHECK_CONDITION<<1; */ @@ -1728,22 +1714,22 @@ { /* Last command was a Request Sense */ pSRB->SRBFlag &= ~AUTO_REQSENSE; pSRB->AdaptStatus = 0; - pSRB->TargetStatus = CHECK_CONDITION << 1; + pSRB->TargetStatus = SAM_STAT_CHECK_CONDITION; //pcmd->result = MK_RES(DRIVER_SENSE,DID_OK,0,status); - if (status == (CHECK_CONDITION << 1)) + if (status == SAM_STAT_CHECK_CONDITION) pcmd->result = MK_RES_LNX(0, DID_BAD_TARGET, 0, /*CHECK_CONDITION*/0); else /* Retry */ { if( pSRB->pcmd->cmnd[0] == TEST_UNIT_READY /* || pSRB->pcmd->cmnd[0] == START_STOP */) { /* Don't retry on TEST_UNIT_READY */ - pcmd->result = MK_RES_LNX(DRIVER_SENSE,DID_OK,0,CHECK_CONDITION); + pcmd->result = MK_RES_LNX(DRIVER_SENSE, DID_OK, 0, SAM_STAT_CHECK_CONDITION); REMOVABLEDEBUG(printk(KERN_INFO "Cmd=%02x, Result=%08x, XferL=%08x\n",pSRB->pcmd->cmnd[0],\ (u32) pcmd->result, (u32) pSRB->TotalXferredLen)); } else { SET_RES_DRV(pcmd->result, DRIVER_SENSE); - pcmd->use_sg = pSRB->SavedSGCount; + scsi_sg_count(pcmd) = pSRB->SavedSGCount; //pSRB->ScsiCmdLen = (u8) (pSRB->Segment1[0] >> 8); DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun)); pSRB->TotalXferredLen = 0; @@ -1754,7 +1740,7 @@ } if( status ) { - if( status_byte(status) == CHECK_CONDITION ) + if (status == SAM_STAT_CHECK_CONDITION) { if (dc390_RequestSense(pACB, pDCB, pSRB)) { SET_RES_DID(pcmd->result, DID_ERROR); @@ -1762,22 +1748,15 @@ } return; } - else if( status_byte(status) == QUEUE_FULL ) + else if (status == SAM_STAT_TASK_SET_FULL) { scsi_track_queue_full(pcmd->device, pDCB->GoingSRBCnt - 1); - pcmd->use_sg = pSRB->SavedSGCount; + scsi_sg_count(pcmd) = pSRB->SavedSGCount; DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun)); pSRB->TotalXferredLen = 0; SET_RES_DID(pcmd->result, DID_SOFT_ERROR); } - else if(status == SCSI_STAT_SEL_TIMEOUT) - { - pSRB->AdaptStatus = H_SEL_TIMEOUT; - pSRB->TargetStatus = 0; - pcmd->result = MK_RES(0,DID_NO_CONNECT,0,0); - /* Devices are removed below ... */ - } - else if (status_byte(status) == BUSY && + else if (status == SAM_STAT_BUSY && (pcmd->cmnd[0] == TEST_UNIT_READY || pcmd->cmnd[0] == INQUIRY) && pACB->scan_devices) { @@ -1795,12 +1774,17 @@ else { /* Target status == 0 */ status = pSRB->AdaptStatus; - if(status & H_OVER_UNDER_RUN) + if (status == H_OVER_UNDER_RUN) { pSRB->TargetStatus = 0; SET_RES_DID(pcmd->result,DID_OK); SET_RES_MSG(pcmd->result,pSRB->EndMessage); } + else if (status == H_SEL_TIMEOUT) + { + pcmd->result = MK_RES(0, DID_NO_CONNECT, 0, 0); + /* Devices are removed below ... */ + } else if( pSRB->SRBStatus & PARITY_ERROR) { //pcmd->result = MK_RES(0,DID_PARITY,pSRB->EndMessage,0); @@ -1816,7 +1800,7 @@ } cmd_done: - pcmd->resid = pcmd->request_bufflen - pSRB->TotalXferredLen; + scsi_set_resid(pcmd, scsi_bufflen(pcmd) - pSRB->TotalXferredLen); dc390_Going_remove (pDCB, pSRB); /* Add to free list */ diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.h linux-2.6.22-591/drivers/scsi/tmscsim.h --- linux-2.6.22-570/drivers/scsi/tmscsim.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/tmscsim.h 2007-12-21 15:36:12.000000000 -0500 @@ -258,13 +258,6 @@ #define H_BAD_CCB_OR_SG 0x1A #define H_ABORT 0x0FF -/*; SCSI Status byte codes*/ -/* The values defined in include/scsi/scsi.h, to be shifted << 1 */ - -#define SCSI_STAT_UNEXP_BUS_F 0xFD /*; Unexpect Bus Free */ -#define SCSI_STAT_BUS_RST_DETECT 0xFE /*; Scsi Bus Reset detected */ -#define SCSI_STAT_SEL_TIMEOUT 0xFF /*; Selection Time out */ - /* cmd->result */ #define RES_TARGET 0x000000FF /* Target State */ #define RES_TARGET_LNX STATUS_MASK /* Only official ... */ @@ -273,7 +266,7 @@ #define RES_DRV 0xFF000000 /* DRIVER_ codes */ #define MK_RES(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)) -#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)<<1) +#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)) #define SET_RES_TARGET(who, tgt) do { who &= ~RES_TARGET; who |= (int)(tgt); } while (0) #define SET_RES_TARGET_LNX(who, tgt) do { who &= ~RES_TARGET_LNX; who |= (int)(tgt) << 1; } while (0) diff -Nurb linux-2.6.22-570/drivers/scsi/u14-34f.c linux-2.6.22-591/drivers/scsi/u14-34f.c --- linux-2.6.22-570/drivers/scsi/u14-34f.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/u14-34f.c 2007-12-21 15:36:12.000000000 -0500 @@ -1111,7 +1111,7 @@ static void map_dma(unsigned int i, unsigned int j) { unsigned int data_len = 0; unsigned int k, count, pci_dir; - struct scatterlist *sgpnt; + struct scatterlist *sg; struct mscp *cpp; struct scsi_cmnd *SCpnt; @@ -1124,33 +1124,28 @@ cpp->sense_len = sizeof SCpnt->sense_buffer; - if (!SCpnt->use_sg) { - - /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */ - if (!SCpnt->request_bufflen) pci_dir = PCI_DMA_BIDIRECTIONAL; - - if (SCpnt->request_buffer) - cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev, - SCpnt->request_buffer, SCpnt->request_bufflen, pci_dir)); - - cpp->data_len = H2DEV(SCpnt->request_bufflen); - return; - } - - sgpnt = (struct scatterlist *) SCpnt->request_buffer; - count = pci_map_sg(HD(j)->pdev, sgpnt, SCpnt->use_sg, pci_dir); - - for (k = 0; k < count; k++) { - cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k])); - cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k])); - data_len += sgpnt[k].length; + if (scsi_bufflen(SCpnt)) { + count = scsi_dma_map(SCpnt); + BUG_ON(count < 0); + + scsi_for_each_sg(SCpnt, sg, count, k) { + cpp->sglist[k].address = H2DEV(sg_dma_address(sg)); + cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg)); + data_len += sg->length; } cpp->sg = TRUE; - cpp->use_sg = SCpnt->use_sg; - cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist, - SCpnt->use_sg * sizeof(struct sg_list), pci_dir)); + cpp->use_sg = scsi_sg_count(SCpnt); + cpp->data_address = + H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist, + cpp->use_sg * sizeof(struct sg_list), + pci_dir)); cpp->data_len = H2DEV(data_len); + + } else { + pci_dir = PCI_DMA_BIDIRECTIONAL; + cpp->data_len = H2DEV(scsi_bufflen(SCpnt)); + } } static void unmap_dma(unsigned int i, unsigned int j) { @@ -1165,8 +1160,7 @@ pci_unmap_single(HD(j)->pdev, DEV2H(cpp->sense_addr), DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); - if (SCpnt->use_sg) - pci_unmap_sg(HD(j)->pdev, SCpnt->request_buffer, SCpnt->use_sg, pci_dir); + scsi_dma_unmap(SCpnt); if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL; @@ -1187,9 +1181,9 @@ pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->sense_addr), DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); - if (SCpnt->use_sg) - pci_dma_sync_sg_for_cpu(HD(j)->pdev, SCpnt->request_buffer, - SCpnt->use_sg, pci_dir); + if (scsi_sg_count(SCpnt)) + pci_dma_sync_sg_for_cpu(HD(j)->pdev, scsi_sglist(SCpnt), + scsi_sg_count(SCpnt), pci_dir); if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL; diff -Nurb linux-2.6.22-570/drivers/scsi/ultrastor.c linux-2.6.22-591/drivers/scsi/ultrastor.c --- linux-2.6.22-570/drivers/scsi/ultrastor.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/ultrastor.c 2007-12-21 15:36:12.000000000 -0500 @@ -675,16 +675,15 @@ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt) { - struct scatterlist *sl; + struct scatterlist *sg; long transfer_length = 0; int i, max; - sl = (struct scatterlist *) SCpnt->request_buffer; - max = SCpnt->use_sg; - for (i = 0; i < max; i++) { - mscp->sglist[i].address = isa_page_to_bus(sl[i].page) + sl[i].offset; - mscp->sglist[i].num_bytes = sl[i].length; - transfer_length += sl[i].length; + max = scsi_sg_count(SCpnt); + scsi_for_each_sg(SCpnt, sg, max, i) { + mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset; + mscp->sglist[i].num_bytes = sg->length; + transfer_length += sg->length; } mscp->number_of_sg_list = max; mscp->transfer_data = isa_virt_to_bus(mscp->sglist); @@ -730,15 +729,15 @@ my_mscp->target_id = SCpnt->device->id; my_mscp->ch_no = 0; my_mscp->lun = SCpnt->device->lun; - if (SCpnt->use_sg) { + if (scsi_sg_count(SCpnt)) { /* Set scatter/gather flag in SCSI command packet */ my_mscp->sg = TRUE; build_sg_list(my_mscp, SCpnt); } else { /* Unset scatter/gather flag in SCSI command packet */ my_mscp->sg = FALSE; - my_mscp->transfer_data = isa_virt_to_bus(SCpnt->request_buffer); - my_mscp->transfer_data_length = SCpnt->request_bufflen; + my_mscp->transfer_data = isa_virt_to_bus(scsi_sglist(SCpnt)); + my_mscp->transfer_data_length = scsi_bufflen(SCpnt); } my_mscp->command_link = 0; /*???*/ my_mscp->scsi_command_link_id = 0; /*???*/ diff -Nurb linux-2.6.22-570/drivers/scsi/wd7000.c linux-2.6.22-591/drivers/scsi/wd7000.c --- linux-2.6.22-570/drivers/scsi/wd7000.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/scsi/wd7000.c 2007-12-21 15:36:12.000000000 -0500 @@ -1091,6 +1091,7 @@ unchar *cdb = (unchar *) SCpnt->cmnd; unchar idlun; short cdblen; + int nseg; Adapter *host = (Adapter *) SCpnt->device->host->hostdata; cdblen = SCpnt->cmd_len; @@ -1106,28 +1107,29 @@ SCpnt->host_scribble = (unchar *) scb; scb->host = host; - if (SCpnt->use_sg) { - struct scatterlist *sg = (struct scatterlist *) SCpnt->request_buffer; + nseg = scsi_sg_count(SCpnt); + if (nseg) { + struct scatterlist *sg; unsigned i; if (SCpnt->device->host->sg_tablesize == SG_NONE) { panic("wd7000_queuecommand: scatter/gather not supported.\n"); } - dprintk("Using scatter/gather with %d elements.\n", SCpnt->use_sg); + dprintk("Using scatter/gather with %d elements.\n", nseg); sgb = scb->sgb; scb->op = 1; any2scsi(scb->dataptr, (int) sgb); - any2scsi(scb->maxlen, SCpnt->use_sg * sizeof(Sgb)); + any2scsi(scb->maxlen, nseg * sizeof(Sgb)); - for (i = 0; i < SCpnt->use_sg; i++) { - any2scsi(sgb[i].ptr, isa_page_to_bus(sg[i].page) + sg[i].offset); - any2scsi(sgb[i].len, sg[i].length); + scsi_for_each_sg(SCpnt, sg, nseg, i) { + any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset); + any2scsi(sgb[i].len, sg->length); } } else { scb->op = 0; - any2scsi(scb->dataptr, isa_virt_to_bus(SCpnt->request_buffer)); - any2scsi(scb->maxlen, SCpnt->request_bufflen); + any2scsi(scb->dataptr, isa_virt_to_bus(scsi_sglist(SCpnt))); + any2scsi(scb->maxlen, scsi_bufflen(SCpnt)); } /* FIXME: drop lock and yield here ? */ diff -Nurb linux-2.6.22-570/drivers/scsi/zorro7xx.c linux-2.6.22-591/drivers/scsi/zorro7xx.c --- linux-2.6.22-570/drivers/scsi/zorro7xx.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/scsi/zorro7xx.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,180 @@ +/* + * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux. + * Amiga MacroSystemUS WarpEngine SCSI controller. + * Amiga Technologies/DKB A4091 SCSI controller. + * + * Written 1997 by Alan Hourihane + * plus modifications of the 53c7xx.c driver to support the Amiga. + * + * Rewritten to use 53c700.c by Kars de Jong + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "53c700.h" + +MODULE_AUTHOR("Alan Hourihane / Kars de Jong "); +MODULE_DESCRIPTION("Amiga Zorro NCR53C710 driver"); +MODULE_LICENSE("GPL"); + + +static struct scsi_host_template zorro7xx_scsi_driver_template = { + .proc_name = "zorro7xx", + .this_id = 7, + .module = THIS_MODULE, +}; + +static struct zorro_driver_data { + const char *name; + unsigned long offset; + int absolute; /* offset is absolute address */ +} zorro7xx_driver_data[] __devinitdata = { + { .name = "PowerUP 603e+", .offset = 0xf40000, .absolute = 1 }, + { .name = "WarpEngine 40xx", .offset = 0x40000 }, + { .name = "A4091", .offset = 0x800000 }, + { .name = "GForce 040/060", .offset = 0x40000 }, + { 0 } +}; + +static struct zorro_device_id zorro7xx_zorro_tbl[] __devinitdata = { + { + .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS, + .driver_data = (unsigned long)&zorro7xx_driver_data[0], + }, + { + .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx, + .driver_data = (unsigned long)&zorro7xx_driver_data[1], + }, + { + .id = ZORRO_PROD_CBM_A4091_1, + .driver_data = (unsigned long)&zorro7xx_driver_data[2], + }, + { + .id = ZORRO_PROD_CBM_A4091_2, + .driver_data = (unsigned long)&zorro7xx_driver_data[2], + }, + { + .id = ZORRO_PROD_GVP_GFORCE_040_060, + .driver_data = (unsigned long)&zorro7xx_driver_data[3], + }, + { 0 } +}; + +static int __devinit zorro7xx_init_one(struct zorro_dev *z, + const struct zorro_device_id *ent) +{ + struct Scsi_Host * host = NULL; + struct NCR_700_Host_Parameters *hostdata; + struct zorro_driver_data *zdd; + unsigned long board, ioaddr; + + board = zorro_resource_start(z); + zdd = (struct zorro_driver_data *)ent->driver_data; + + if (zdd->absolute) { + ioaddr = zdd->offset; + } else { + ioaddr = board + zdd->offset; + } + + if (!zorro_request_device(z, zdd->name)) { + printk(KERN_ERR "zorro7xx: cannot reserve region 0x%lx, abort\n", + board); + return -EBUSY; + } + + hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); + if (hostdata == NULL) { + printk(KERN_ERR "zorro7xx: Failed to allocate host data\n"); + goto out_release; + } + + memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); + + /* Fill in the required pieces of hostdata */ + if (ioaddr > 0x01000000) + hostdata->base = ioremap(ioaddr, zorro_resource_len(z)); + else + hostdata->base = (void __iomem *)ZTWO_VADDR(ioaddr); + + hostdata->clock = 50; + hostdata->chip710 = 1; + + /* Settings for at least WarpEngine 40xx */ + hostdata->ctest7_extra = CTEST7_TT1; + + zorro7xx_scsi_driver_template.name = zdd->name; + + /* and register the chip */ + host = NCR_700_detect(&zorro7xx_scsi_driver_template, hostdata, + &z->dev); + if (!host) { + printk(KERN_ERR "zorro7xx: No host detected; " + "board configuration problem?\n"); + goto out_free; + } + + host->this_id = 7; + host->base = ioaddr; + host->irq = IRQ_AMIGA_PORTS; + + if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "zorro7xx-scsi", + host)) { + printk(KERN_ERR "zorro7xx: request_irq failed\n"); + goto out_put_host; + } + + scsi_scan_host(host); + + return 0; + + out_put_host: + scsi_host_put(host); + out_free: + if (ioaddr > 0x01000000) + iounmap(hostdata->base); + kfree(hostdata); + out_release: + zorro_release_device(z); + + return -ENODEV; +} + +static __devexit void zorro7xx_remove_one(struct zorro_dev *z) +{ + struct Scsi_Host *host = dev_to_shost(&z->dev); + struct NCR_700_Host_Parameters *hostdata = shost_priv(host); + + scsi_remove_host(host); + + NCR_700_release(host); + kfree(hostdata); + free_irq(host->irq, host); + zorro_release_device(z); +} + +static struct zorro_driver zorro7xx_driver = { + .name = "zorro7xx-scsi", + .id_table = zorro7xx_zorro_tbl, + .probe = zorro7xx_init_one, + .remove = __devexit_p(zorro7xx_remove_one), +}; + +static int __init zorro7xx_scsi_init(void) +{ + return zorro_register_driver(&zorro7xx_driver); +} + +static void __exit zorro7xx_scsi_exit(void) +{ + zorro_unregister_driver(&zorro7xx_driver); +} + +module_init(zorro7xx_scsi_init); +module_exit(zorro7xx_scsi_exit); diff -Nurb linux-2.6.22-570/drivers/serial/8250.c linux-2.6.22-591/drivers/serial/8250.c --- linux-2.6.22-570/drivers/serial/8250.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/8250.c 2007-12-21 15:36:12.000000000 -0500 @@ -2845,6 +2845,25 @@ } EXPORT_SYMBOL(serial8250_unregister_port); +/** + * serial8250_unregister_by_port - remove a 16x50 serial port + * at runtime. + * @port: A &struct uart_port that describes the port to remove. + * + * Remove one serial port. This may not be called from interrupt + * context. We hand the port back to the our control. + */ +void serial8250_unregister_by_port(struct uart_port *port) +{ + struct uart_8250_port *uart; + + uart = serial8250_find_match_or_unused(port); + + if (uart) + serial8250_unregister_port(uart->port.line); +} +EXPORT_SYMBOL(serial8250_unregister_by_port); + static int __init serial8250_init(void) { int ret, i; diff -Nurb linux-2.6.22-570/drivers/serial/8250_kgdb.c linux-2.6.22-591/drivers/serial/8250_kgdb.c --- linux-2.6.22-570/drivers/serial/8250_kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/serial/8250_kgdb.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,515 @@ +/* + * 8250 interface for kgdb. + * + * This is a merging of many different drivers, and all of the people have + * had an impact in some form or another: + * + * 2004-2005 (c) MontaVista Software, Inc. + * 2005-2006 (c) Wind River Systems, Inc. + * + * Amit Kale , David Grothe , + * Scott Foehner , George Anzinger , + * Robert Walsh , wangdi , + * San Mehat, Tom Rini , + * Jason Wessel + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include /* For BASE_BAUD and SERIAL_PORT_DFNS */ + +#include "8250.h" + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +MODULE_DESCRIPTION("KGDB driver for the 8250"); +MODULE_LICENSE("GPL"); +/* These will conflict with early_param otherwise. */ +#ifdef CONFIG_KGDB_8250_MODULE +static char config[256]; +module_param_string(kgdb8250, config, 256, 0); +MODULE_PARM_DESC(kgdb8250, + " kgdb8250=,
,,\n"); +static struct kgdb_io local_kgdb_io_ops; +#endif /* CONFIG_KGDB_8250_MODULE */ + +/* Speed of the UART. */ +static int kgdb8250_baud; + +/* Flag for if we need to call request_mem_region */ +static int kgdb8250_needs_request_mem_region; + +static char kgdb8250_buf[GDB_BUF_SIZE]; +static atomic_t kgdb8250_buf_in_cnt; +static int kgdb8250_buf_out_inx; + +/* Old-style serial definitions, if existant, and a counter. */ +#ifdef CONFIG_KGDB_SIMPLE_SERIAL +static int __initdata should_copy_rs_table = 1; +static struct serial_state old_rs_table[] __initdata = { +#ifdef SERIAL_PORT_DFNS + SERIAL_PORT_DFNS +#endif +}; +#endif + +/* Our internal table of UARTS. */ +#define UART_NR CONFIG_SERIAL_8250_NR_UARTS +static struct uart_port kgdb8250_ports[UART_NR]; + +static struct uart_port *current_port; + +/* Base of the UART. */ +static void *kgdb8250_addr; + +/* Forward declarations. */ +static int kgdb8250_uart_init(void); +static int __init kgdb_init_io(void); +static int __init kgdb8250_opt(char *str); + +/* These are much shorter calls to ioread8/iowrite8 that take into + * account our shifts, etc. */ +static inline unsigned int kgdb_ioread(u8 mask) +{ + return ioread8(kgdb8250_addr + (mask << current_port->regshift)); +} + +static inline void kgdb_iowrite(u8 val, u8 mask) +{ + iowrite8(val, kgdb8250_addr + (mask << current_port->regshift)); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void kgdb_put_debug_char(u8 chr) +{ + while (!(kgdb_ioread(UART_LSR) & UART_LSR_THRE)) ; + + kgdb_iowrite(chr, UART_TX); +} + +/* + * Get a byte from the hardware data buffer and return it + */ +static int read_data_bfr(void) +{ + char it = kgdb_ioread(UART_LSR); + + if (it & UART_LSR_DR) + return kgdb_ioread(UART_RX); + + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + kgdb8250_uart_init(); + kgdb_put_debug_char('-'); + return '-'; + } + + return -1; +} + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + */ +static int kgdb_get_debug_char(void) +{ + int retchr; + + /* intr routine has q'd chars */ + if (atomic_read(&kgdb8250_buf_in_cnt) != 0) { + retchr = kgdb8250_buf[kgdb8250_buf_out_inx++]; + kgdb8250_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&kgdb8250_buf_in_cnt); + return retchr; + } + + do { + retchr = read_data_bfr(); + } while (retchr < 0); + + return retchr; +} + +/* + * This is the receiver interrupt routine for the GDB stub. + * All that we need to do is verify that the interrupt happened on the + * line we're in charge of. If this is true, schedule a breakpoint and + * return. + */ +static irqreturn_t +kgdb8250_interrupt(int irq, void *dev_id) +{ + if (kgdb_ioread(UART_IIR) & UART_IIR_RDI) { + /* Throw away the data if another I/O routine is active. */ + if (kgdb_io_ops.read_char != kgdb_get_debug_char && + (kgdb_ioread(UART_LSR) & UART_LSR_DR)) + kgdb_ioread(UART_RX); + else + breakpoint(); + } + + return IRQ_HANDLED; +} + +/* + * Initializes the UART. + * Returns: + * 0 on success, 1 on failure. + */ +static int +kgdb8250_uart_init (void) +{ + unsigned int ier, base_baud = current_port->uartclk ? + current_port->uartclk / 16 : BASE_BAUD; + + /* test uart existance */ + if(kgdb_ioread(UART_LSR) == 0xff) + return -1; + + /* disable interrupts */ + kgdb_iowrite(0, UART_IER); + +#if defined(CONFIG_ARCH_OMAP1510) + /* Workaround to enable 115200 baud on OMAP1510 internal ports */ + if (cpu_is_omap1510() && is_omap_port((void *)kgdb8250_addr)) { + if (kgdb8250_baud == 115200) { + base_baud = 1; + kgdb8250_baud = 1; + kgdb_iowrite(1, UART_OMAP_OSC_12M_SEL); + } else + kgdb_iowrite(0, UART_OMAP_OSC_12M_SEL); + } +#endif + /* set DLAB */ + kgdb_iowrite(UART_LCR_DLAB, UART_LCR); + + /* set baud */ + kgdb_iowrite((base_baud / kgdb8250_baud) & 0xff, UART_DLL); + kgdb_iowrite((base_baud / kgdb8250_baud) >> 8, UART_DLM); + + /* reset DLAB, set LCR */ + kgdb_iowrite(UART_LCR_WLEN8, UART_LCR); + + /* set DTR and RTS */ + kgdb_iowrite(UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS, UART_MCR); + + /* setup fifo */ + kgdb_iowrite(UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR + | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_8, + UART_FCR); + + /* clear pending interrupts */ + kgdb_ioread(UART_IIR); + kgdb_ioread(UART_RX); + kgdb_ioread(UART_LSR); + kgdb_ioread(UART_MSR); + + /* turn on RX interrupt only */ + kgdb_iowrite(UART_IER_RDI, UART_IER); + + /* + * Borrowed from the main 8250 driver. + * Try writing and reading the UART_IER_UUE bit (b6). + * If it works, this is probably one of the Xscale platform's + * internal UARTs. + * We're going to explicitly set the UUE bit to 0 before + * trying to write and read a 1 just to make sure it's not + * already a 1 and maybe locked there before we even start start. + */ + ier = kgdb_ioread(UART_IER); + kgdb_iowrite(ier & ~UART_IER_UUE, UART_IER); + if (!(kgdb_ioread(UART_IER) & UART_IER_UUE)) { + /* + * OK it's in a known zero state, try writing and reading + * without disturbing the current state of the other bits. + */ + kgdb_iowrite(ier | UART_IER_UUE, UART_IER); + if (kgdb_ioread(UART_IER) & UART_IER_UUE) + /* + * It's an Xscale. + */ + ier |= UART_IER_UUE | UART_IER_RTOIE; + } + kgdb_iowrite(ier, UART_IER); + return 0; +} + +/* + * Copy the old serial_state table to our uart_port table if we haven't + * had values specifically configured in. We need to make sure this only + * happens once. + */ +static void __init kgdb8250_copy_rs_table(void) +{ +#ifdef CONFIG_KGDB_SIMPLE_SERIAL + int i; + + if (!should_copy_rs_table) + return; + + for (i = 0; i < ARRAY_SIZE(old_rs_table); i++) { + kgdb8250_ports[i].iobase = old_rs_table[i].port; + kgdb8250_ports[i].irq = irq_canonicalize(old_rs_table[i].irq); + kgdb8250_ports[i].uartclk = old_rs_table[i].baud_base * 16; + kgdb8250_ports[i].membase = old_rs_table[i].iomem_base; + kgdb8250_ports[i].iotype = old_rs_table[i].io_type; + kgdb8250_ports[i].regshift = old_rs_table[i].iomem_reg_shift; + kgdb8250_ports[i].line = i; + } + + should_copy_rs_table = 0; +#endif +} + +/* + * Hookup our IRQ line now that it is safe to do so, after we grab any + * memory regions we might need to. If we haven't been initialized yet, + * go ahead and copy the old_rs_table in. + */ +static void __init kgdb8250_late_init(void) +{ + /* Try and copy the old_rs_table. */ + kgdb8250_copy_rs_table(); + +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE) + /* Take the port away from the main driver. */ + serial8250_unregister_by_port(current_port); + + /* Now reinit the port as the above has disabled things. */ + kgdb8250_uart_init(); +#endif + /* We may need to call request_mem_region() first. */ + if (kgdb8250_needs_request_mem_region) + request_mem_region(current_port->mapbase, + 8 << current_port->regshift, "kgdb"); + if (request_irq(current_port->irq, kgdb8250_interrupt, SA_SHIRQ, + "GDB-stub", current_port) < 0) + printk(KERN_ERR "KGDB failed to request the serial IRQ (%d)\n", + current_port->irq); +} + +static __init int kgdb_init_io(void) +{ + /* Give us the basic table of uarts. */ + kgdb8250_copy_rs_table(); + + /* We're either a module and parse a config string, or we have a + * semi-static config. */ +#ifdef CONFIG_KGDB_8250_MODULE + if (strlen(config)) { + if (kgdb8250_opt(config)) + return -EINVAL; + } else { + printk(KERN_ERR "kgdb8250: argument error, usage: " + "kgdb8250=,
,,\n"); + return -EINVAL; + } +#elif defined(CONFIG_KGDB_SIMPLE_SERIAL) + kgdb8250_baud = CONFIG_KGDB_BAUDRATE; + + /* Setup our pointer to the serial port now. */ + current_port = &kgdb8250_ports[CONFIG_KGDB_PORT_NUM]; +#else + if (kgdb8250_opt(CONFIG_KGDB_8250_CONF_STRING)) + return -EINVAL; +#endif + + + /* Internal driver setup. */ + switch (current_port->iotype) { + case UPIO_MEM: + if (current_port->mapbase) + kgdb8250_needs_request_mem_region = 1; + if (current_port->flags & UPF_IOREMAP) { + current_port->membase = ioremap(current_port->mapbase, + 8 << current_port->regshift); + if (!current_port->membase) + return -EIO; /* Failed. */ + } + kgdb8250_addr = current_port->membase; + break; + case UPIO_PORT: + default: + kgdb8250_addr = ioport_map(current_port->iobase, + 8 << current_port->regshift); + if (!kgdb8250_addr) + return -EIO; /* Failed. */ + } + + if (kgdb8250_uart_init() == -1) { + printk(KERN_ERR "kgdb8250: init failed\n"); + return -EIO; + } +#ifdef CONFIG_KGDB_8250_MODULE + /* Attach the kgdb irq. When this is built into the kernel, it + * is called as a part of late_init sequence. + */ + kgdb8250_late_init(); + if (kgdb_register_io_module(&local_kgdb_io_ops)) + return -EINVAL; + + printk(KERN_INFO "kgdb8250: debugging enabled\n"); +#endif /* CONFIG_KGD_8250_MODULE */ + + return 0; +} + +#ifdef CONFIG_KGDB_8250_MODULE +/* If it is a module the kgdb_io_ops should be a static which + * is passed to the KGDB I/O initialization + */ +static struct kgdb_io local_kgdb_io_ops = { +#else /* ! CONFIG_KGDB_8250_MODULE */ +struct kgdb_io kgdb_io_ops = { +#endif /* ! CONFIG_KGD_8250_MODULE */ + .read_char = kgdb_get_debug_char, + .write_char = kgdb_put_debug_char, + .init = kgdb_init_io, + .late_init = kgdb8250_late_init, +}; + +/** + * kgdb8250_add_port - Define a serial port for use with KGDB + * @i: The index of the port being added + * @serial_req: The &struct uart_port describing the port + * + * On platforms where we must register the serial device + * dynamically, this is the best option if a platform also normally + * calls early_serial_setup(). + */ +void __init kgdb8250_add_port(int i, struct uart_port *serial_req) +{ + /* Make sure we've got the built-in data before we override. */ + kgdb8250_copy_rs_table(); + + /* Copy the whole thing over. */ + if (current_port != &kgdb8250_ports[i]) + memcpy(&kgdb8250_ports[i], serial_req, sizeof(struct uart_port)); +} + +/** + * kgdb8250_add_platform_port - Define a serial port for use with KGDB + * @i: The index of the port being added + * @p: The &struct plat_serial8250_port describing the port + * + * On platforms where we must register the serial device + * dynamically, this is the best option if a platform normally + * handles uart setup with an array of &struct plat_serial8250_port. + */ +void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *p) +{ + /* Make sure we've got the built-in data before we override. */ + kgdb8250_copy_rs_table(); + + kgdb8250_ports[i].iobase = p->iobase; + kgdb8250_ports[i].membase = p->membase; + kgdb8250_ports[i].irq = p->irq; + kgdb8250_ports[i].uartclk = p->uartclk; + kgdb8250_ports[i].regshift = p->regshift; + kgdb8250_ports[i].iotype = p->iotype; + kgdb8250_ports[i].flags = p->flags; + kgdb8250_ports[i].mapbase = p->mapbase; +} + +/* + * Syntax for this cmdline option is: + * kgdb8250=,
,," + */ +static int __init kgdb8250_opt(char *str) +{ + /* We'll fill out and use the first slot. */ + current_port = &kgdb8250_ports[0]; + + if (!strncmp(str, "io", 2)) { + current_port->iotype = UPIO_PORT; + str += 2; + } else if (!strncmp(str, "mmap", 4)) { + current_port->iotype = UPIO_MEM; + current_port->flags |= UPF_IOREMAP; + str += 4; + } else if (!strncmp(str, "mmio", 4)) { + current_port->iotype = UPIO_MEM; + current_port->flags &= ~UPF_IOREMAP; + str += 4; + } else + goto errout; + + if (*str != ',') + goto errout; + str++; + + if (current_port->iotype == UPIO_PORT) + current_port->iobase = simple_strtoul(str, &str, 16); + else { + if (current_port->flags & UPF_IOREMAP) + current_port->mapbase = + (unsigned long) simple_strtoul(str, &str, 16); + else + current_port->membase = + (void *) simple_strtoul(str, &str, 16); + } + + if (*str != ',') + goto errout; + str++; + + kgdb8250_baud = simple_strtoul(str, &str, 10); + if (!kgdb8250_baud) + goto errout; + + if (*str != ',') + goto errout; + str++; + + current_port->irq = simple_strtoul(str, &str, 10); + +#ifdef CONFIG_KGDB_SIMPLE_SERIAL + should_copy_rs_table = 0; +#endif + + return 0; + + errout: + printk(KERN_ERR "Invalid syntax for option kgdb8250=\n"); + return 1; +} + +#ifdef CONFIG_KGDB_8250_MODULE +static void cleanup_kgdb8250(void) +{ + kgdb_unregister_io_module(&local_kgdb_io_ops); + + /* Clean up the irq and memory */ + free_irq(current_port->irq, current_port); + + if (kgdb8250_needs_request_mem_region) + release_mem_region(current_port->mapbase, + 8 << current_port->regshift); + /* Hook up the serial port back to what it was previously + * hooked up to. + */ +#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE) + /* Give the port back to the 8250 driver. */ + serial8250_register_port(current_port); +#endif +} + +module_init(kgdb_init_io); +module_exit(cleanup_kgdb8250); +#else /* ! CONFIG_KGDB_8250_MODULE */ +early_param("kgdb8250", kgdb8250_opt); +#endif /* ! CONFIG_KGDB_8250_MODULE */ diff -Nurb linux-2.6.22-570/drivers/serial/Kconfig linux-2.6.22-591/drivers/serial/Kconfig --- linux-2.6.22-570/drivers/serial/Kconfig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/serial/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -107,7 +107,7 @@ config SERIAL_8250_NR_UARTS int "Maximum number of 8250/16550 serial ports" - depends on SERIAL_8250 + depends on SERIAL_8250 || KGDB_8250 default "4" help Set this to the number of serial ports you want the driver diff -Nurb linux-2.6.22-570/drivers/serial/Makefile linux-2.6.22-591/drivers/serial/Makefile --- linux-2.6.22-570/drivers/serial/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -23,6 +23,7 @@ obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o +obj-$(CONFIG_KGDB_AMBA_PL011) += pl011_kgdb.o obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o obj-$(CONFIG_SERIAL_PXA) += pxa.o obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o @@ -50,10 +51,12 @@ obj-$(CONFIG_SERIAL_MPC52xx) += mpc52xx_uart.o obj-$(CONFIG_SERIAL_ICOM) += icom.o obj-$(CONFIG_SERIAL_M32R_SIO) += m32r_sio.o +obj-$(CONFIG_KGDB_MPSC) += mpsc_kgdb.o obj-$(CONFIG_SERIAL_MPSC) += mpsc.o obj-$(CONFIG_ETRAX_SERIAL) += crisv10.o obj-$(CONFIG_SERIAL_JSM) += jsm/ obj-$(CONFIG_SERIAL_TXX9) += serial_txx9.o +obj-$(CONFIG_KGDB_TXX9) += serial_txx9_kgdb.o obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o @@ -62,3 +65,4 @@ obj-$(CONFIG_SERIAL_NETX) += netx-serial.o obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o +obj-$(CONFIG_KGDB_8250) += 8250_kgdb.o diff -Nurb linux-2.6.22-570/drivers/serial/amba-pl011.c linux-2.6.22-591/drivers/serial/amba-pl011.c --- linux-2.6.22-570/drivers/serial/amba-pl011.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/amba-pl011.c 2007-12-21 15:36:12.000000000 -0500 @@ -332,7 +332,7 @@ /* * Allocate the IRQ */ - retval = request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap); + retval = request_irq(uap->port.irq, pl011_int, SA_SHIRQ, "uart-pl011", uap); if (retval) goto clk_dis; diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/Makefile linux-2.6.22-591/drivers/serial/cpm_uart/Makefile --- linux-2.6.22-570/drivers/serial/cpm_uart/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/cpm_uart/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -7,5 +7,6 @@ # Select the correct platform objects. cpm_uart-objs-$(CONFIG_CPM2) += cpm_uart_cpm2.o cpm_uart-objs-$(CONFIG_8xx) += cpm_uart_cpm1.o +cpm_uart-objs-$(CONFIG_KGDB_CPM_UART) += cpm_uart_kgdb.o cpm_uart-objs := cpm_uart_core.o $(cpm_uart-objs-y) diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart.h --- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart.h 2007-12-21 15:36:12.000000000 -0500 @@ -50,6 +50,41 @@ #define SCC_WAIT_CLOSING 100 +#ifdef CONFIG_KGDB_CPM_UART + +/* Speed of the debug UART. */ +#if CONFIG_KGDB_BAUDRATE == 9600 +#define KGDB_BAUD B9600 +#elif CONFIG_KGDB_BAUDRATE == 19200 +#define KGDB_BAUD B19200 +#elif CONFIG_KGDB_BAUDRATE == 38400 +#define KGDB_BAUD B38400 +#elif CONFIG_KGDB_BAUDRATE == 57600 +#define KGDB_BAUD B57600 +#elif CONFIG_KGDB_BAUDRATE == 115200 +#define KGDB_BAUD B115200 /* Start with this if not given */ +#else +#error Unsupported baud rate! +#endif + +#if defined(CONFIG_KGDB_CPM_UART_SCC1) +#define KGDB_PINFO_INDEX UART_SCC1 +#elif defined(CONFIG_KGDB_CPM_UART_SCC2) +#define KGDB_PINFO_INDEX UART_SCC2 +#elif defined(CONFIG_KGDB_CPM_UART_SCC3) +#define KGDB_PINFO_INDEX UART_SCC3 +#elif defined(CONFIG_KGDB_CPM_UART_SCC4) +#define KGDB_PINFO_INDEX UART_SCC4 +#elif defined(CONFIG_KGDB_CPM_UART_SMC1) +#define KGDB_PINFO_INDEX UART_SMC1 +#elif defined(CONFIG_KGDB_CPM_UART_SMC2) +#define KGDB_PINFO_INDEX UART_SMC2 +#else +#error The port for KGDB is undefined! +#endif + +#endif /* CONFIG_KGDB_CPM_UART */ + struct uart_cpm_port { struct uart_port port; u16 rx_nrfifos; @@ -86,6 +121,9 @@ extern int cpm_uart_nr; extern struct uart_cpm_port cpm_uart_ports[UART_NR]; +void cpm_uart_early_write(int index, const char *s, u_int count); +int cpm_uart_early_setup(int index,int early); + /* these are located in their respective files */ void cpm_line_cr_cmd(int line, int cmd); int cpm_uart_init_portdesc(void); @@ -132,5 +170,4 @@ return 0; } - #endif /* CPM_UART_H */ diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_core.c --- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_core.c 2007-12-21 15:36:12.000000000 -0500 @@ -1073,22 +1073,17 @@ return 0; } -#ifdef CONFIG_SERIAL_CPM_CONSOLE -/* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... - * - * Note that this is called with interrupts already disabled - */ -static void cpm_uart_console_write(struct console *co, const char *s, +void cpm_uart_early_write(int index, const char *s, u_int count) { - struct uart_cpm_port *pinfo = - &cpm_uart_ports[cpm_uart_port_map[co->index]]; + struct uart_cpm_port *pinfo; unsigned int i; volatile cbd_t *bdp, *bdbase; volatile unsigned char *cp; + BUG_ON(index>UART_NR); + pinfo = &cpm_uart_ports[index]; + /* Get the address of the host memory buffer. */ bdp = pinfo->tx_cur; @@ -1152,19 +1147,14 @@ pinfo->tx_cur = (volatile cbd_t *) bdp; } - -static int __init cpm_uart_console_setup(struct console *co, char *options) +int cpm_uart_early_setup(int index, int early) { + int ret; struct uart_port *port; struct uart_cpm_port *pinfo; - int baud = 38400; - int bits = 8; - int parity = 'n'; - int flow = 'n'; - int ret; struct fs_uart_platform_info *pdata; - struct platform_device* pdev = early_uart_get_pdev(co->index); + struct platform_device* pdev = early_uart_get_pdev(index); if (!pdev) { pr_info("cpm_uart: console: compat mode\n"); @@ -1172,8 +1162,9 @@ cpm_uart_init_portdesc(); } + BUG_ON(index>UART_NR); port = - (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]]; + (struct uart_port *)&cpm_uart_ports[index]; pinfo = (struct uart_cpm_port *)port; if (!pdev) { if (pinfo->set_lineif) @@ -1187,15 +1178,6 @@ cpm_uart_drv_get_platform_data(pdev, 1); } - pinfo->flags |= FLAG_CONSOLE; - - if (options) { - uart_parse_options(options, &baud, &parity, &bits, &flow); - } else { - if ((baud = uart_baudrate()) == -1) - baud = 9600; - } - if (IS_SMC(pinfo)) { pinfo->smcp->smc_smcm &= ~(SMCM_RX | SMCM_TX); pinfo->smcp->smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); @@ -1203,8 +1185,7 @@ pinfo->sccp->scc_sccm &= ~(UART_SCCM_TX | UART_SCCM_RX); pinfo->sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT); } - - ret = cpm_uart_allocbuf(pinfo, 1); + ret = cpm_uart_allocbuf(pinfo, early); if (ret) return ret; @@ -1216,6 +1197,62 @@ else cpm_uart_init_scc(pinfo); + return 0; +} + +#ifdef CONFIG_SERIAL_CPM_CONSOLE +/* + * Print a string to the serial port trying not to disturb + * any possible real use of the port... + * + * Note that this is called with interrupts already disabled + */ + +static void cpm_uart_console_write(struct console *co, const char *s, + u_int count) +{ + cpm_uart_early_write(cpm_uart_port_map[co->index],s,count); +} + +/* + * Setup console. Be careful is called early ! + */ +static int __init cpm_uart_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + struct uart_cpm_port *pinfo; + int baud = 115200; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + int ret; + +#ifdef CONFIG_KGDB_CPM_UART + /* We are not interested in ports yet utilized by kgdb */ + if (co->index == KGDB_PINFO_INDEX) + return 0; +#endif + + port = + (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]]; + pinfo = (struct uart_cpm_port *)port; + + pinfo->flags |= FLAG_CONSOLE; + + if (options) { + uart_parse_options(options, &baud, &parity, &bits, &flow); + } else { + bd_t *bd = (bd_t *) __res; + + if (bd->bi_baudrate) + baud = bd->bi_baudrate; + else + baud = 9600; + } + + ret = cpm_uart_early_setup(cpm_uart_port_map[co->index], 1); + if(ret) + return ret; uart_set_options(port, co, baud, parity, bits, flow); return 0; @@ -1266,6 +1303,12 @@ pdata = pdev->dev.platform_data; +#ifdef CONFIG_KGDB_CPM_UART + /* We are not interested in ports yet utilized by kgdb */ + if (cpm_uart_id2nr(fs_uart_get_id(pdata)) == KGDB_PINFO_INDEX) + return ret; +#endif + if ((ret = cpm_uart_drv_get_platform_data(pdev, 0))) return ret; @@ -1363,6 +1406,12 @@ for (i = 0; i < cpm_uart_nr; i++) { int con = cpm_uart_port_map[i]; + +#ifdef CONFIG_KGDB_CPM_UART + /* We are not interested in ports yet utilized by kgdb */ + if (con == KGDB_PINFO_INDEX) + continue; +#endif cpm_uart_ports[con].port.line = i; cpm_uart_ports[con].port.flags = UPF_BOOT_AUTOCONF; if (cpm_uart_ports[con].set_lineif) diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm1.c --- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm1.c 2007-12-21 15:36:12.000000000 -0500 @@ -53,6 +53,7 @@ { ushort val; volatile cpm8xx_t *cp = cpmp; + unsigned *bcsr_io; switch (line) { case UART_SMC1: @@ -95,12 +96,35 @@ { /* XXX SCC1: insert port configuration here */ pinfo->brg = 1; + +#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS) + bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); + + if (bcsr_io == NULL) { + printk(KERN_CRIT "Could not remap BCSR\n"); + return; + } + out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_1); + iounmap(bcsr_io); +#endif } void scc2_lineif(struct uart_cpm_port *pinfo) { /* XXX SCC2: insert port configuration here */ pinfo->brg = 2; + unsigned *bcsr_io; + +#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS) + bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); + + if (bcsr_io == NULL) { + printk(KERN_CRIT "Could not remap BCSR\n"); + return; + } + out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_2); + iounmap(bcsr_io); +#endif } void scc3_lineif(struct uart_cpm_port *pinfo) @@ -189,6 +213,10 @@ { pr_debug("CPM uart[-]:init portdesc\n"); + /* Check if we have called this yet. This may happen if early kgdb + breakpoint is on */ + if(cpm_uart_nr) + return 0; cpm_uart_nr = 0; #ifdef CONFIG_SERIAL_CPM_SMC1 cpm_uart_ports[UART_SMC1].smcp = &cpmp->cp_smc[0]; diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm2.c --- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm2.c 2007-12-21 15:36:12.000000000 -0500 @@ -289,6 +289,10 @@ #endif pr_debug("CPM uart[-]:init portdesc\n"); + /* Check if we have called this yet. This may happen if early kgdb + breakpoint is on */ + if(cpm_uart_nr) + return 0; cpm_uart_nr = 0; #ifdef CONFIG_SERIAL_CPM_SMC1 cpm_uart_ports[UART_SMC1].smcp = (smc_t *) cpm2_map(im_smc[0]); diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_kgdb.c --- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_kgdb.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,185 @@ +/* + * drivers/serial/cpm_uart/cpm_uart_kgdb.c + * + * CPM UART interface for kgdb. + * + * Author: Vitaly Bordug + * + * Used some bits from drivers/serial/kgdb_8250.c as a template + * + * 2005-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include /* For BASE_BAUD and SERIAL_PORT_DFNS */ + +#include "cpm_uart.h" + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + + +static char kgdb_buf[GDB_BUF_SIZE], *kgdbp; +static int kgdb_chars; + +/* Forward declarations. */ + +/* + * Receive character from the serial port. This only works well + * before the port is initialize for real use. + */ +static int kgdb_wait_key(char *obuf) +{ + struct uart_cpm_port *pinfo; + u_char c, *cp; + volatile cbd_t *bdp; + int i; + + pinfo = &cpm_uart_ports[KGDB_PINFO_INDEX]; + + /* Get the address of the host memory buffer. + */ + bdp = pinfo->rx_cur; + while (bdp->cbd_sc & BD_SC_EMPTY); + + /* If the buffer address is in the CPM DPRAM, don't + * convert it. + */ + cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo); + + if (obuf) { + i = c = bdp->cbd_datlen; + while (i-- > 0) + *obuf++ = *cp++; + } else + c = *cp; + bdp->cbd_sc |= BD_SC_EMPTY; + + if (bdp->cbd_sc & BD_SC_WRAP) + bdp = pinfo->rx_bd_base; + else + bdp++; + pinfo->rx_cur = (cbd_t *)bdp; + + return (int)c; +} + + +/* + * Wait until the interface can accept a char, then write it. + */ +static void kgdb_put_debug_char(u8 chr) +{ + static char ch[2]; + + ch[0] = (char)chr; + cpm_uart_early_write(KGDB_PINFO_INDEX, ch, 1); +} + + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + */ +static int kgdb_get_debug_char(void) +{ + if (kgdb_chars <= 0) { + kgdb_chars = kgdb_wait_key(kgdb_buf); + kgdbp = kgdb_buf; + } + kgdb_chars--; + + return (*kgdbp++); +} + +static void termios_set_options(int index, + int baud, int parity, int bits, int flow) +{ + struct ktermios termios; + struct uart_port *port; + struct uart_cpm_port *pinfo; + + BUG_ON(index>UART_NR); + + port = (struct uart_port *)&cpm_uart_ports[index]; + pinfo = (struct uart_cpm_port *)port; + + /* + * Ensure that the serial console lock is initialised + * early. + */ + spin_lock_init(&port->lock); + + memset(&termios, 0, sizeof(struct termios)); + + termios.c_cflag = CREAD | HUPCL | CLOCAL; + + termios.c_cflag |= baud; + + if (bits == 7) + termios.c_cflag |= CS7; + else + termios.c_cflag |= CS8; + + switch (parity) { + case 'o': case 'O': + termios.c_cflag |= PARODD; + /*fall through*/ + case 'e': case 'E': + termios.c_cflag |= PARENB; + break; + } + + if (flow == 'r') + termios.c_cflag |= CRTSCTS; + + port->ops->set_termios(port, &termios, NULL); +} + +/* + * Returns: + * 0 on success, 1 on failure. + */ +static int kgdb_init(void) +{ + struct uart_port *port; + struct uart_cpm_port *pinfo; + int use_bootmem = 0; /* use dma by default */ + + if (!cpm_uart_nr) { + use_bootmem = 1; + cpm_uart_init_portdesc(); + } + port = (struct uart_port *)&cpm_uart_ports[KGDB_PINFO_INDEX]; + pinfo = (struct uart_cpm_port *)port; + + if (cpm_uart_early_setup(KGDB_PINFO_INDEX, use_bootmem)) + return 1; + + termios_set_options(KGDB_PINFO_INDEX, KGDB_BAUD,'n',8,'n'); + if (IS_SMC(pinfo)) + pinfo->smcp->smc_smcm |= SMCM_TX; + else + pinfo->sccp->scc_sccm |= UART_SCCM_TX; + + return 0; +} + + +struct kgdb_io kgdb_io_ops = { + .read_char = kgdb_get_debug_char, + .write_char = kgdb_put_debug_char, + .init = kgdb_init, +}; + diff -Nurb linux-2.6.22-570/drivers/serial/mpsc_kgdb.c linux-2.6.22-591/drivers/serial/mpsc_kgdb.c --- linux-2.6.22-570/drivers/serial/mpsc_kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/serial/mpsc_kgdb.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,345 @@ +/* + * drivers/serial/mpsc_kgdb.c + * + * KGDB driver for the Marvell MultiProtocol Serial Controller (MPCS) + * + * Based on the polled boot loader driver by Ajit Prem (ajit.prem@motorola.com) + * + * Author: Randy Vinson + * + * Copyright (C) 2005-2006 MontaVista Software, Inc. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Main MPSC Configuration Register Offsets */ +#define MPSC_MMCRL 0x0000 +#define MPSC_MMCRH 0x0004 +#define MPSC_MPCR 0x0008 +#define MPSC_CHR_1 0x000c +#define MPSC_CHR_2 0x0010 +#define MPSC_CHR_3 0x0014 +#define MPSC_CHR_4 0x0018 +#define MPSC_CHR_5 0x001c +#define MPSC_CHR_6 0x0020 +#define MPSC_CHR_7 0x0024 +#define MPSC_CHR_8 0x0028 +#define MPSC_CHR_9 0x002c +#define MPSC_CHR_10 0x0030 +#define MPSC_CHR_11 0x0034 + +#define MPSC_MPCR_FRZ (1 << 9) +#define MPSC_MPCR_CL_5 0 +#define MPSC_MPCR_CL_6 1 +#define MPSC_MPCR_CL_7 2 +#define MPSC_MPCR_CL_8 3 +#define MPSC_MPCR_SBL_1 0 +#define MPSC_MPCR_SBL_2 1 + +#define MPSC_CHR_2_TEV (1<<1) +#define MPSC_CHR_2_TA (1<<7) +#define MPSC_CHR_2_TTCS (1<<9) +#define MPSC_CHR_2_REV (1<<17) +#define MPSC_CHR_2_RA (1<<23) +#define MPSC_CHR_2_CRD (1<<25) +#define MPSC_CHR_2_EH (1<<31) +#define MPSC_CHR_2_PAR_ODD 0 +#define MPSC_CHR_2_PAR_SPACE 1 +#define MPSC_CHR_2_PAR_EVEN 2 +#define MPSC_CHR_2_PAR_MARK 3 + +/* MPSC Signal Routing */ +#define MPSC_MRR 0x0000 +#define MPSC_RCRR 0x0004 +#define MPSC_TCRR 0x0008 + +/* MPSC Interrupt registers (offset from MV64x60_SDMA_INTR_OFFSET) */ +#define MPSC_INTR_CAUSE 0x0004 +#define MPSC_INTR_MASK 0x0084 +#define MPSC_INTR_CAUSE_RCC (1<<6) + +/* Baud Rate Generator Interface Registers */ +#define BRG_BCR 0x0000 +#define BRG_BTR 0x0004 + +/* Speed of the UART. */ +static int kgdbmpsc_baud = CONFIG_KGDB_BAUDRATE; + +/* Index of the UART, matches ttyMX naming. */ +static int kgdbmpsc_ttyMM = CONFIG_KGDB_PORT_NUM; + +#define MPSC_INTR_REG_SELECT(x) ((x) + (8 * kgdbmpsc_ttyMM)) + +static int kgdbmpsc_init(void); + +static struct platform_device mpsc_dev, shared_dev; + +static void __iomem *mpsc_base; +static void __iomem *brg_base; +static void __iomem *routing_base; +static void __iomem *sdma_base; + +static unsigned int mpsc_irq; + +static void kgdb_write_debug_char(u8 c) +{ + u32 data; + + data = readl(mpsc_base + MPSC_MPCR); + writeb(c, mpsc_base + MPSC_CHR_1); + mb(); + data = readl(mpsc_base + MPSC_CHR_2); + data |= MPSC_CHR_2_TTCS; + writel(data, mpsc_base + MPSC_CHR_2); + mb(); + + while (readl(mpsc_base + MPSC_CHR_2) & MPSC_CHR_2_TTCS) ; +} + +static int kgdb_get_debug_char(void) +{ + unsigned char c; + + while (!(readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) & + MPSC_INTR_CAUSE_RCC)) ; + + c = readb(mpsc_base + MPSC_CHR_10 + (1 << 1)); + mb(); + writeb(c, mpsc_base + MPSC_CHR_10 + (1 << 1)); + mb(); + writel(~MPSC_INTR_CAUSE_RCC, sdma_base + + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)); + return (c); +} + +/* + * This is the receiver interrupt routine for the GDB stub. + * All that we need to do is verify that the interrupt happened on the + * line we're in charge of. If this is true, schedule a breakpoint and + * return. + */ +static irqreturn_t kgdbmpsc_interrupt(int irq, void *dev_id) +{ + if (irq != mpsc_irq) + return IRQ_NONE; + /* + * If there is some other CPU in KGDB then this is a + * spurious interrupt. so return without even checking a byte + */ + if (atomic_read(&debugger_active)) + return IRQ_NONE; + + if (readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) & + MPSC_INTR_CAUSE_RCC) + breakpoint(); + + return IRQ_HANDLED; +} + +static int __init kgdbmpsc_init(void) +{ + struct mpsc_pdata *pdata; + u32 cdv; + + if (!brg_base || !mpsc_base || !routing_base || !sdma_base) + return -1; + + /* Set MPSC Routing to enable both ports */ + writel(0x0, routing_base + MPSC_MRR); + + /* MPSC 0/1 Rx & Tx get clocks BRG0/1 */ + writel(0x00000100, routing_base + MPSC_RCRR); + writel(0x00000100, routing_base + MPSC_TCRR); + + /* Disable all MPSC interrupts and clear any pending interrupts */ + writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK)); + writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)); + + pdata = (struct mpsc_pdata *)mpsc_dev.dev.platform_data; + + /* cdv = (clock/(2*16*baud rate)) for 16X mode. */ + cdv = ((pdata->brg_clk_freq / (32 * kgdbmpsc_baud)) - 1); + writel((pdata->brg_clk_src << 18) | (1 << 16) | cdv, + brg_base + BRG_BCR); + + /* Put MPSC into UART mode, no null modem, 16x clock mode */ + writel(0x000004c4, mpsc_base + MPSC_MMCRL); + writel(0x04400400, mpsc_base + MPSC_MMCRH); + + writel(0, mpsc_base + MPSC_CHR_1); + writel(0, mpsc_base + MPSC_CHR_9); + writel(0, mpsc_base + MPSC_CHR_10); + writel(4, mpsc_base + MPSC_CHR_3); + writel(0x20000000, mpsc_base + MPSC_CHR_4); + writel(0x9000, mpsc_base + MPSC_CHR_5); + writel(0, mpsc_base + MPSC_CHR_6); + writel(0, mpsc_base + MPSC_CHR_7); + writel(0, mpsc_base + MPSC_CHR_8); + + /* 8 data bits, 1 stop bit */ + writel((3 << 12), mpsc_base + MPSC_MPCR); + + /* Enter "hunt" mode */ + writel((1 << 31), mpsc_base + MPSC_CHR_2); + + udelay(100); + return 0; +} + +static void __iomem *__init +kgdbmpsc_map_resource(struct platform_device *pd, int type, int num) +{ + void __iomem *base = NULL; + struct resource *r; + + if ((r = platform_get_resource(pd, IORESOURCE_MEM, num))) + base = ioremap(r->start, r->end - r->start + 1); + return base; +} + +static void __iomem *__init +kgdbmpsc_unmap_resource(struct platform_device *pd, int type, int num, + void __iomem * base) +{ + if (base) + iounmap(base); + return NULL; +} + +static void __init +kgdbmpsc_reserve_resource(struct platform_device *pd, int type, int num) +{ + struct resource *r; + + if ((r = platform_get_resource(pd, IORESOURCE_MEM, num))) + request_mem_region(r->start, r->end - r->start + 1, "kgdb"); +} + +static int __init kgdbmpsc_local_init(void) +{ + if (!mpsc_dev.num_resources || !shared_dev.num_resources) + return 1; /* failure */ + + mpsc_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM, + MPSC_BASE_ORDER); + brg_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM, + MPSC_BRG_BASE_ORDER); + + /* get the platform data for the shared registers and get them mapped */ + routing_base = kgdbmpsc_map_resource(&shared_dev, + IORESOURCE_MEM, + MPSC_ROUTING_BASE_ORDER); + sdma_base = + kgdbmpsc_map_resource(&shared_dev, IORESOURCE_MEM, + MPSC_SDMA_INTR_BASE_ORDER); + + mpsc_irq = platform_get_irq(&mpsc_dev, 1); + + if (mpsc_base && brg_base && routing_base && sdma_base) + return 0; /* success */ + + return 1; /* failure */ +} + +static void __init kgdbmpsc_local_exit(void) +{ + if (sdma_base) + sdma_base = kgdbmpsc_unmap_resource(&shared_dev, IORESOURCE_MEM, + MPSC_SDMA_INTR_BASE_ORDER, + sdma_base); + if (routing_base) + routing_base = kgdbmpsc_unmap_resource(&shared_dev, + IORESOURCE_MEM, + MPSC_ROUTING_BASE_ORDER, + routing_base); + if (brg_base) + brg_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM, + MPSC_BRG_BASE_ORDER, + brg_base); + if (mpsc_base) + mpsc_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM, + MPSC_BASE_ORDER, mpsc_base); +} + +static void __init kgdbmpsc_update_pdata(struct platform_device *pdev) +{ + + snprintf(pdev->dev.bus_id, BUS_ID_SIZE, "%s.%u", pdev->name, pdev->id); +} + +static int __init kgdbmpsc_pdev_init(void) +{ + struct platform_device *pdev; + + /* get the platform data for the specified port. */ + pdev = mv64x60_early_get_pdev_data(MPSC_CTLR_NAME, kgdbmpsc_ttyMM, 1); + if (pdev) { + memcpy(&mpsc_dev, pdev, sizeof(struct platform_device)); + if (platform_notify) { + kgdbmpsc_update_pdata(&mpsc_dev); + platform_notify(&mpsc_dev.dev); + } + + /* get the platform data for the shared registers. */ + pdev = mv64x60_early_get_pdev_data(MPSC_SHARED_NAME, 0, 0); + if (pdev) { + memcpy(&shared_dev, pdev, + sizeof(struct platform_device)); + if (platform_notify) { + kgdbmpsc_update_pdata(&shared_dev); + platform_notify(&shared_dev.dev); + } + } + } + return 0; +} + +postcore_initcall(kgdbmpsc_pdev_init); + +static int __init kgdbmpsc_init_io(void) +{ + + kgdbmpsc_pdev_init(); + + if (kgdbmpsc_local_init()) { + kgdbmpsc_local_exit(); + return -1; + } + + if (kgdbmpsc_init() == -1) + return -1; + return 0; +} + +static void __init kgdbmpsc_hookup_irq(void) +{ + unsigned int msk; + if (!request_irq(mpsc_irq, kgdbmpsc_interrupt, 0, "kgdb mpsc", NULL)) { + /* Enable interrupt */ + msk = readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK)); + msk |= MPSC_INTR_CAUSE_RCC; + writel(msk, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK)); + + kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM, + MPSC_BASE_ORDER); + kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM, + MPSC_BRG_BASE_ORDER); + } +} + +struct kgdb_io kgdb_io_ops = { + .read_char = kgdb_get_debug_char, + .write_char = kgdb_write_debug_char, + .init = kgdbmpsc_init_io, + .late_init = kgdbmpsc_hookup_irq, +}; diff -Nurb linux-2.6.22-570/drivers/serial/pl011_kgdb.c linux-2.6.22-591/drivers/serial/pl011_kgdb.c --- linux-2.6.22-570/drivers/serial/pl011_kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/serial/pl011_kgdb.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,111 @@ +/* + * driver/serial/pl011_kgdb.c + * + * Support for KGDB on ARM AMBA PL011 UARTs + * + * Authors: Manish Lachwani + * Deepak Saxena + * + * Copyright (c) 2005-2007 MontaVista Software, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether expressor implied. + * + */ +#include +#include +#include + +#include +#include +#include + +static int kgdb_irq = CONFIG_KGDB_AMBA_IRQ; + +#define UART_DIVISOR (CONFIG_KGDB_AMBA_UARTCLK * 4 / CONFIG_KGDB_BAUDRATE) +/* + * Todo: IO_ADDRESS is not very generic across ARM... + */ +static volatile unsigned char *kgdb_port = + (unsigned char*)IO_ADDRESS(CONFIG_KGDB_AMBA_BASE); + +/* + * Init code taken from amba-pl011.c. + */ +static int kgdb_serial_init(void) +{ + writew(0, kgdb_port + UART010_CR); + + /* Set baud rate */ + writew(UART_DIVISOR & 0x3f, kgdb_port + UART011_FBRD); + writew(UART_DIVISOR >> 6, kgdb_port + UART011_IBRD); + + writew(UART01x_LCRH_WLEN_8 | UART01x_LCRH_FEN, kgdb_port + UART010_LCRH); + writew(UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_RXE, + kgdb_port + UART010_CR); + + writew(UART011_RXIM, kgdb_port + UART011_IMSC); + + return 0; +} + +static void kgdb_serial_putchar(u8 ch) +{ + unsigned int status; + + do { + status = readw(kgdb_port + UART01x_FR); + } while (status & UART01x_FR_TXFF); + + writew(ch, kgdb_port + UART01x_DR); +} + +static int kgdb_serial_getchar(void) +{ + unsigned int status; + int ch; + +#ifdef CONFIG_DEBUG_LL + printascii("Entering serial_getchar loop"); +#endif + do { + status = readw(kgdb_port + UART01x_FR); + } while (status & UART01x_FR_RXFE); + ch = readw(kgdb_port + UART01x_DR); +#ifdef CONFIG_DEBUG_LL + printascii("Exited serial_getchar loop"); + printascii("Read char: "); + printch(ch); + printascii("\n"); +#endif + return ch; +} + +static irqreturn_t kgdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + int status = readw(kgdb_port + UART011_MIS); + +#ifdef CONFIG_DEBUG_LL + printascii("KGDB irq\n"); +#endif + if (irq != kgdb_irq) + return IRQ_NONE; + + if (status & 0x40) + breakpoint(); + + return IRQ_HANDLED; +} + +static void __init kgdb_hookup_irq(void) +{ + request_irq(kgdb_irq, kgdb_interrupt, SA_SHIRQ, "KGDB-serial", kgdb_port); +} + +struct kgdb_io kgdb_io_ops = { + .init = kgdb_serial_init, + .write_char = kgdb_serial_putchar, + .read_char = kgdb_serial_getchar, + .late_init = kgdb_hookup_irq, +}; diff -Nurb linux-2.6.22-570/drivers/serial/pxa.c linux-2.6.22-591/drivers/serial/pxa.c --- linux-2.6.22-570/drivers/serial/pxa.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/pxa.c 2007-12-21 15:36:12.000000000 -0500 @@ -42,6 +42,9 @@ #include #include #include +#ifdef CONFIG_KGDB_CONSOLE +#include +#endif #include #include @@ -690,6 +693,8 @@ console_initcall(serial_pxa_console_init); #define PXA_CONSOLE &serial_pxa_console +#elif defined(CONFIG_KGDB_CONSOLE) +#define PXA_CONSOLE &kgdbcons #else #define PXA_CONSOLE NULL #endif diff -Nurb linux-2.6.22-570/drivers/serial/serial_core.c linux-2.6.22-591/drivers/serial/serial_core.c --- linux-2.6.22-570/drivers/serial/serial_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/serial_core.c 2007-12-21 15:36:12.000000000 -0500 @@ -33,6 +33,7 @@ #include /* for serial_state and serial_icounter_struct */ #include #include +#include #include #include @@ -58,6 +59,12 @@ #define uart_console(port) (0) #endif +#ifdef CONFIG_KGDB_CONSOLE +#define uart_kgdb(port) (port->cons && !strcmp(port->cons->name, "kgdb")) +#else +#define uart_kgdb(port) (0) +#endif + static void uart_change_speed(struct uart_state *state, struct ktermios *old_termios); static void uart_wait_until_sent(struct tty_struct *tty, int timeout); static void uart_change_pm(struct uart_state *state, int pm_state); @@ -1671,6 +1678,9 @@ mmio ? "mmio:0x" : "port:", mmio ? port->mapbase : (unsigned long) port->iobase, port->irq); + if (port->iotype == UPIO_MEM) + ret += sprintf(buf+ret, " membase 0x%08lX", + (unsigned long) port->membase); if (port->type == PORT_UNKNOWN) { strcat(buf, "\n"); @@ -2063,7 +2073,8 @@ case UPIO_TSI: case UPIO_DWAPB: snprintf(address, sizeof(address), - "MMIO 0x%lx", port->mapbase); + "MMIO map 0x%lx mem 0x%lx", port->mapbase, + (unsigned long) port->membase); break; default: strlcpy(address, "*unknown*", sizeof(address)); @@ -2118,9 +2129,9 @@ /* * Power down all ports by default, except the - * console if we have one. + * console (real or kgdb) if we have one. */ - if (!uart_console(port)) + if (!uart_console(port) && !uart_kgdb(port)) uart_change_pm(state, 3); } } @@ -2311,6 +2322,12 @@ */ port->flags &= ~UPF_DEAD; +#if defined(CONFIG_KGDB_8250) + /* Add any 8250-like ports we find later. */ + if (port->type <= PORT_MAX_8250) + kgdb8250_add_port(port->line, port); +#endif + out: mutex_unlock(&state->mutex); mutex_unlock(&port_mutex); diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9.c linux-2.6.22-591/drivers/serial/serial_txx9.c --- linux-2.6.22-570/drivers/serial/serial_txx9.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/serial_txx9.c 2007-12-21 15:36:12.000000000 -0500 @@ -40,6 +40,10 @@ static char *serial_version = "1.09"; static char *serial_name = "TX39/49 Serial driver"; +#ifndef CONFIG_KGDB_TXX9 +#define CONFIG_KGDB_PORT_NUM -1 +#endif + #define PASS_LIMIT 256 #if !defined(CONFIG_SERIAL_TXX9_STDSERIAL) @@ -471,6 +475,9 @@ unsigned long flags; int retval; + if (up->port.line == CONFIG_KGDB_PORT_NUM) + return -EBUSY; + /* * Clear the FIFO buffers and disable them. * (they will be reenabled in set_termios()) @@ -799,6 +806,9 @@ for (i = 0; i < UART_NR; i++) { struct uart_txx9_port *up = &serial_txx9_ports[i]; + if (up->port.line == CONFIG_KGDB_PORT_NUM) + continue; + up->port.line = i; up->port.ops = &serial_txx9_pops; up->port.dev = dev; @@ -967,6 +977,9 @@ mutex_lock(&serial_txx9_mutex); for (i = 0; i < UART_NR; i++) { + if (i == CONFIG_KGDB_PORT_NUM) + continue; + uart = &serial_txx9_ports[i]; if (uart_match_port(&uart->port, port)) { uart_remove_one_port(&serial_txx9_reg, &uart->port); diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c linux-2.6.22-591/drivers/serial/serial_txx9_kgdb.c --- linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/drivers/serial/serial_txx9_kgdb.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,150 @@ +/* + * drivers/serial/serial_txx9_kgdb.c + * + * kgdb interface for gdb + * + * Author: MontaVista Software, Inc. + * source@mvista.com + * + * Copyright (C) 2005-2006 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include + +/* Speed of the UART. */ +static unsigned int kgdb_txx9_baud = CONFIG_KGDB_BAUDRATE; + +#define TXX9_NPORT 4 /* TX4939 has 4 UARTs, others only have 2 */ + +static struct uart_port kgdb_txx9_ports[TXX9_NPORT]; +static struct uart_port *kgdb_port; + +/* TXX9 Serial Registers */ +#define TXX9_SILCR 0x00 +#define TXX9_SIDISR 0x08 +#define TXX9_SISCISR 0x0c +#define TXX9_SIFCR 0x10 +#define TXX9_SIFLCR 0x14 +#define TXX9_SIBGR 0x18 +#define TXX9_SITFIFO 0x1c +#define TXX9_SIRFIFO 0x20 + +/* SILCR : Line Control */ +#define TXX9_SILCR_SCS_IMCLK_BG 0x00000020 +#define TXX9_SILCR_SCS_SCLK_BG 0x00000060 +#define TXX9_SILCR_USBL_1BIT 0x00000000 +#define TXX9_SILCR_UMODE_8BIT 0x00000000 + +/* SIDISR : DMA/Int. Status */ +#define TXX9_SIDISR_RFDN_MASK 0x0000001f + +/* SISCISR : Status Change Int. Status */ +#define TXX9_SISCISR_TRDY 0x00000004 + +/* SIFCR : FIFO Control */ +#define TXX9_SIFCR_SWRST 0x00008000 + +/* SIBGR : Baud Rate Control */ +#define TXX9_SIBGR_BCLK_T0 0x00000000 +#define TXX9_SIBGR_BCLK_T2 0x00000100 +#define TXX9_SIBGR_BCLK_T4 0x00000200 +#define TXX9_SIBGR_BCLK_T6 0x00000300 + +static inline unsigned int sio_in(struct uart_port *port, int offset) +{ + return *(volatile u32 *)(port->membase + offset); +} + +static inline void sio_out(struct uart_port *port, int offset, unsigned int value) +{ + *(volatile u32 *)(port->membase + offset) = value; +} + +void __init txx9_kgdb_add_port(int n, struct uart_port *port) +{ + memcpy(&kgdb_txx9_ports[n], port, sizeof(struct uart_port)); +} + +static int txx9_kgdb_init(void) +{ + unsigned int quot, sibgr; + + kgdb_port = &kgdb_txx9_ports[CONFIG_KGDB_PORT_NUM]; + + if (kgdb_port->iotype != UPIO_MEM && + kgdb_port->iotype != UPIO_MEM32) + return -1; + + /* Reset the UART. */ + sio_out(kgdb_port, TXX9_SIFCR, TXX9_SIFCR_SWRST); +#ifdef CONFIG_CPU_TX49XX + /* + * TX4925 BUG WORKAROUND. Accessing SIOC register + * immediately after soft reset causes bus error. + */ + iob(); + udelay(1); +#endif + /* Wait until reset is complete. */ + while (sio_in(kgdb_port, TXX9_SIFCR) & TXX9_SIFCR_SWRST); + + /* Select the frame format and input clock. */ + sio_out(kgdb_port, TXX9_SILCR, + TXX9_SILCR_UMODE_8BIT | TXX9_SILCR_USBL_1BIT | + ((kgdb_port->flags & UPF_MAGIC_MULTIPLIER) ? + TXX9_SILCR_SCS_SCLK_BG : TXX9_SILCR_SCS_IMCLK_BG)); + + /* Select the input clock prescaler that fits the baud rate. */ + quot = (kgdb_port->uartclk + 8 * kgdb_txx9_baud) / (16 * kgdb_txx9_baud); + if (quot < (256 << 1)) + sibgr = (quot >> 1) | TXX9_SIBGR_BCLK_T0; + else if (quot < ( 256 << 3)) + sibgr = (quot >> 3) | TXX9_SIBGR_BCLK_T2; + else if (quot < ( 256 << 5)) + sibgr = (quot >> 5) | TXX9_SIBGR_BCLK_T4; + else if (quot < ( 256 << 7)) + sibgr = (quot >> 7) | TXX9_SIBGR_BCLK_T6; + else + sibgr = 0xff | TXX9_SIBGR_BCLK_T6; + + sio_out(kgdb_port, TXX9_SIBGR, sibgr); + + /* Enable receiver and transmitter. */ + sio_out(kgdb_port, TXX9_SIFLCR, 0); + + return 0; +} + +static void txx9_kgdb_late_init(void) +{ + request_mem_region(kgdb_port->mapbase, 0x40, "serial_txx9(debug)"); +} + +static int txx9_kgdb_read(void) +{ + while (!(sio_in(kgdb_port, TXX9_SIDISR) & TXX9_SIDISR_RFDN_MASK)); + + return sio_in(kgdb_port, TXX9_SIRFIFO); +} + +static void txx9_kgdb_write(u8 ch) +{ + while (!(sio_in(kgdb_port, TXX9_SISCISR) & TXX9_SISCISR_TRDY)); + + sio_out(kgdb_port, TXX9_SITFIFO, ch); +} + +struct kgdb_io kgdb_io_ops = { + .read_char = txx9_kgdb_read, + .write_char = txx9_kgdb_write, + .init = txx9_kgdb_init, + .late_init = txx9_kgdb_late_init +}; diff -Nurb linux-2.6.22-570/drivers/serial/sh-sci.c linux-2.6.22-591/drivers/serial/sh-sci.c --- linux-2.6.22-570/drivers/serial/sh-sci.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/serial/sh-sci.c 2007-12-21 15:36:12.000000000 -0500 @@ -118,7 +118,8 @@ do { status = sci_in(port, SCxSR); if (status & SCxSR_ERRORS(port)) { - handle_error(port); + /* Clear error flags. */ + sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port)); continue; } } while (!(status & SCxSR_RDxF(port))); @@ -184,18 +185,18 @@ int h, l; c = *p++; - h = highhex(c); - l = lowhex(c); + h = hexchars[c >> 4]; + l = hexchars[c % 16]; put_char(port, h); put_char(port, l); checksum += h + l; } put_char(port, '#'); - put_char(port, highhex(checksum)); - put_char(port, lowhex(checksum)); + put_char(port, hexchars[checksum >> 4]); + put_char(port, hexchars[checksum & 16]); } while (get_char(port) != '+'); } else -#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */ +#endif /* CONFIG_SH_STANDARD_BIOS */ for (i=0; iline == KGDBPORT.port.line && + c == 3) + breakpoint(); +#endif + /* Store data and status */ if (status&SCxSR_FER(port)) { flag = TTY_FRAME; @@ -1279,6 +1290,7 @@ console_initcall(sci_console_init); #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */ +#if 0 #ifdef CONFIG_SH_KGDB /* * FIXME: Most of this can go away.. at the moment, we rely on diff -Nurb linux-2.6.22-570/drivers/spi/at25.c linux-2.6.22-591/drivers/spi/at25.c --- linux-2.6.22-570/drivers/spi/at25.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/spi/at25.c 2007-12-21 15:36:12.000000000 -0500 @@ -111,7 +111,8 @@ } static ssize_t -at25_bin_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev; struct at25_data *at25; @@ -236,7 +237,8 @@ } static ssize_t -at25_bin_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev; struct at25_data *at25; @@ -314,7 +316,6 @@ */ at25->bin.attr.name = "eeprom"; at25->bin.attr.mode = S_IRUSR; - at25->bin.attr.owner = THIS_MODULE; at25->bin.read = at25_bin_read; at25->bin.size = at25->chip.byte_len; diff -Nurb linux-2.6.22-570/drivers/usb/atm/cxacru.c linux-2.6.22-591/drivers/usb/atm/cxacru.c --- linux-2.6.22-570/drivers/usb/atm/cxacru.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/atm/cxacru.c 2007-12-21 15:36:14.000000000 -0500 @@ -171,7 +171,7 @@ struct delayed_work poll_work; u32 card_info[CXINF_MAX]; struct mutex poll_state_serialize; - int poll_state; + enum cxacru_poll_state poll_state; /* contol handles */ struct mutex cm_serialize; @@ -226,58 +226,48 @@ static ssize_t cxacru_sysfs_showattr_dB(s16 value, char *buf) { - if (unlikely(value < 0)) { return snprintf(buf, PAGE_SIZE, "%d.%02u\n", - value / 100, -value % 100); - } else { - return snprintf(buf, PAGE_SIZE, "%d.%02u\n", - value / 100, value % 100); - } + value / 100, abs(value) % 100); } static ssize_t cxacru_sysfs_showattr_bool(u32 value, char *buf) { - switch (value) { - case 0: return snprintf(buf, PAGE_SIZE, "no\n"); - case 1: return snprintf(buf, PAGE_SIZE, "yes\n"); - default: return 0; - } + static char *str[] = { "no", "yes" }; + if (unlikely(value >= ARRAY_SIZE(str))) + return snprintf(buf, PAGE_SIZE, "%u\n", value); + return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); } static ssize_t cxacru_sysfs_showattr_LINK(u32 value, char *buf) { - switch (value) { - case 1: return snprintf(buf, PAGE_SIZE, "not connected\n"); - case 2: return snprintf(buf, PAGE_SIZE, "connected\n"); - case 3: return snprintf(buf, PAGE_SIZE, "lost\n"); - default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); - } + static char *str[] = { NULL, "not connected", "connected", "lost" }; + if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL)) + return snprintf(buf, PAGE_SIZE, "%u\n", value); + return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); } static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf) { - switch (value) { - case 0: return snprintf(buf, PAGE_SIZE, "down\n"); - case 1: return snprintf(buf, PAGE_SIZE, "attempting to activate\n"); - case 2: return snprintf(buf, PAGE_SIZE, "training\n"); - case 3: return snprintf(buf, PAGE_SIZE, "channel analysis\n"); - case 4: return snprintf(buf, PAGE_SIZE, "exchange\n"); - case 5: return snprintf(buf, PAGE_SIZE, "up\n"); - case 6: return snprintf(buf, PAGE_SIZE, "waiting\n"); - case 7: return snprintf(buf, PAGE_SIZE, "initialising\n"); - default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); - } + static char *str[] = { "down", "attempting to activate", + "training", "channel analysis", "exchange", "up", + "waiting", "initialising" + }; + if (unlikely(value >= ARRAY_SIZE(str))) + return snprintf(buf, PAGE_SIZE, "%u\n", value); + return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); } static ssize_t cxacru_sysfs_showattr_MODU(u32 value, char *buf) { - switch (value) { - case 0: return 0; - case 1: return snprintf(buf, PAGE_SIZE, "ANSI T1.413\n"); - case 2: return snprintf(buf, PAGE_SIZE, "ITU-T G.992.1 (G.DMT)\n"); - case 3: return snprintf(buf, PAGE_SIZE, "ITU-T G.992.2 (G.LITE)\n"); - default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); - } + static char *str[] = { + NULL, + "ANSI T1.413", + "ITU-T G.992.1 (G.DMT)", + "ITU-T G.992.2 (G.LITE)" + }; + if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL)) + return snprintf(buf, PAGE_SIZE, "%u\n", value); + return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); } /* @@ -308,11 +298,10 @@ struct cxacru_data *instance = usbatm_instance->driver_data; u32 value = instance->card_info[CXINF_LINE_STARTABLE]; - switch (value) { - case 0: return snprintf(buf, PAGE_SIZE, "running\n"); - case 1: return snprintf(buf, PAGE_SIZE, "stopped\n"); - default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); - } + static char *str[] = { "running", "stopped" }; + if (unlikely(value >= ARRAY_SIZE(str))) + return snprintf(buf, PAGE_SIZE, "%u\n", value); + return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); } static ssize_t cxacru_sysfs_store_adsl_state(struct device *dev, diff -Nurb linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c linux-2.6.22-591/drivers/usb/atm/ueagle-atm.c --- linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/atm/ueagle-atm.c 2007-12-21 15:36:12.000000000 -0500 @@ -1168,6 +1168,7 @@ struct uea_softc *sc = data; int ret = -EAGAIN; + set_freezable(); uea_enters(INS_TO_USBDEV(sc)); while (!kthread_should_stop()) { if (ret < 0 || sc->reset) diff -Nurb linux-2.6.22-570/drivers/usb/core/config.c linux-2.6.22-591/drivers/usb/core/config.c --- linux-2.6.22-570/drivers/usb/core/config.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/core/config.c 2007-12-21 15:36:14.000000000 -0500 @@ -274,6 +274,7 @@ struct usb_descriptor_header *header; int len, retval; u8 inums[USB_MAXINTERFACES], nalts[USB_MAXINTERFACES]; + unsigned iad_num = 0; memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); if (config->desc.bDescriptorType != USB_DT_CONFIG || @@ -351,6 +352,20 @@ ++n; } + } else if (header->bDescriptorType == + USB_DT_INTERFACE_ASSOCIATION) { + if (iad_num == USB_MAXIADS) { + dev_warn(ddev, "found more Interface " + "Association Descriptors " + "than allocated for in " + "configuration %d\n", cfgno); + } else { + config->intf_assoc[iad_num] = + (struct usb_interface_assoc_descriptor + *)header; + iad_num++; + } + } else if (header->bDescriptorType == USB_DT_DEVICE || header->bDescriptorType == USB_DT_CONFIG) dev_warn(ddev, "config %d contains an unexpected " diff -Nurb linux-2.6.22-570/drivers/usb/core/devices.c linux-2.6.22-591/drivers/usb/core/devices.c --- linux-2.6.22-570/drivers/usb/core/devices.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/core/devices.c 2007-12-21 15:36:14.000000000 -0500 @@ -102,6 +102,10 @@ /* C: #Ifs=dd Cfg#=dd Atr=xx MPwr=dddmA */ "C:%c #Ifs=%2d Cfg#=%2d Atr=%02x MxPwr=%3dmA\n"; +static const char *format_iad = +/* A: FirstIf#=dd IfCount=dd Cls=xx(sssss) Sub=xx Prot=xx */ + "A: FirstIf#=%2d IfCount=%2d Cls=%02x(%-5s) Sub=%02x Prot=%02x\n"; + static const char *format_iface = /* I: If#=dd Alt=dd #EPs=dd Cls=xx(sssss) Sub=xx Prot=xx Driver=xxxx*/ "I:%c If#=%2d Alt=%2d #EPs=%2d Cls=%02x(%-5s) Sub=%02x Prot=%02x Driver=%s\n"; @@ -146,6 +150,7 @@ {USB_CLASS_STILL_IMAGE, "still"}, {USB_CLASS_CSCID, "scard"}, {USB_CLASS_CONTENT_SEC, "c-sec"}, + {USB_CLASS_VIDEO, "video"}, {-1, "unk."} /* leave as last */ }; @@ -286,6 +291,21 @@ return start; } +static char *usb_dump_iad_descriptor(char *start, char *end, + const struct usb_interface_assoc_descriptor *iad) +{ + if (start > end) + return start; + start += sprintf(start, format_iad, + iad->bFirstInterface, + iad->bInterfaceCount, + iad->bFunctionClass, + class_decode(iad->bFunctionClass), + iad->bFunctionSubClass, + iad->bFunctionProtocol); + return start; +} + /* TBD: * 0. TBDs * 1. marking active interface altsettings (code lists all, but should mark @@ -322,6 +342,12 @@ if (!config) /* getting these some in 2.3.7; none in 2.3.6 */ return start + sprintf(start, "(null Cfg. desc.)\n"); start = usb_dump_config_descriptor(start, end, &config->desc, active); + for (i = 0; i < USB_MAXIADS; i++) { + if (config->intf_assoc[i] == NULL) + break; + start = usb_dump_iad_descriptor(start, end, + config->intf_assoc[i]); + } for (i = 0; i < config->desc.bNumInterfaces; i++) { intfc = config->intf_cache[i]; interface = config->interface[i]; diff -Nurb linux-2.6.22-570/drivers/usb/core/hub.c linux-2.6.22-591/drivers/usb/core/hub.c --- linux-2.6.22-570/drivers/usb/core/hub.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/usb/core/hub.c 2007-12-21 15:36:12.000000000 -0500 @@ -2831,6 +2831,7 @@ static int hub_thread(void *__unused) { + set_freezable(); do { hub_events(); wait_event_interruptible(khubd_wait, diff -Nurb linux-2.6.22-570/drivers/usb/core/message.c linux-2.6.22-591/drivers/usb/core/message.c --- linux-2.6.22-570/drivers/usb/core/message.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/drivers/usb/core/message.c 2007-12-21 15:36:14.000000000 -0500 @@ -1409,6 +1409,36 @@ .uevent = usb_if_uevent, }; +static struct usb_interface_assoc_descriptor *find_iad(struct usb_device *dev, + struct usb_host_config *config, + u8 inum) +{ + struct usb_interface_assoc_descriptor *retval = NULL; + struct usb_interface_assoc_descriptor *intf_assoc; + int first_intf; + int last_intf; + int i; + + for (i = 0; (i < USB_MAXIADS && config->intf_assoc[i]); i++) { + intf_assoc = config->intf_assoc[i]; + if (intf_assoc->bInterfaceCount == 0) + continue; + + first_intf = intf_assoc->bFirstInterface; + last_intf = first_intf + (intf_assoc->bInterfaceCount - 1); + if (inum >= first_intf && inum <= last_intf) { + if (!retval) + retval = intf_assoc; + else + dev_err(&dev->dev, "Interface #%d referenced" + " by multiple IADs\n", inum); + } + } + + return retval; +} + + /* * usb_set_configuration - Makes a particular device setting be current * @dev: the device whose configuration is being updated @@ -1555,6 +1585,7 @@ intfc = cp->intf_cache[i]; intf->altsetting = intfc->altsetting; intf->num_altsetting = intfc->num_altsetting; + intf->intf_assoc = find_iad(dev, cp, i); kref_get(&intfc->ref); alt = usb_altnum_to_altsetting(intf, 0); diff -Nurb linux-2.6.22-570/drivers/usb/core/sysfs.c linux-2.6.22-591/drivers/usb/core/sysfs.c --- linux-2.6.22-570/drivers/usb/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/core/sysfs.c 2007-12-21 15:36:14.000000000 -0500 @@ -424,6 +424,25 @@ sysfs_remove_group(&dev->kobj, &dev_attr_grp); } +/* Interface Accociation Descriptor fields */ +#define usb_intf_assoc_attr(field, format_string) \ +static ssize_t \ +show_iad_##field (struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct usb_interface *intf = to_usb_interface (dev); \ + \ + return sprintf (buf, format_string, \ + intf->intf_assoc->field); \ +} \ +static DEVICE_ATTR(iad_##field, S_IRUGO, show_iad_##field, NULL); + +usb_intf_assoc_attr (bFirstInterface, "%02x\n") +usb_intf_assoc_attr (bInterfaceCount, "%02d\n") +usb_intf_assoc_attr (bFunctionClass, "%02x\n") +usb_intf_assoc_attr (bFunctionSubClass, "%02x\n") +usb_intf_assoc_attr (bFunctionProtocol, "%02x\n") + /* Interface fields */ #define usb_intf_attr(field, format_string) \ static ssize_t \ @@ -487,6 +506,18 @@ } static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL); +static struct attribute *intf_assoc_attrs[] = { + &dev_attr_iad_bFirstInterface.attr, + &dev_attr_iad_bInterfaceCount.attr, + &dev_attr_iad_bFunctionClass.attr, + &dev_attr_iad_bFunctionSubClass.attr, + &dev_attr_iad_bFunctionProtocol.attr, + NULL, +}; +static struct attribute_group intf_assoc_attr_grp = { + .attrs = intf_assoc_attrs, +}; + static struct attribute *intf_attrs[] = { &dev_attr_bInterfaceNumber.attr, &dev_attr_bAlternateSetting.attr, @@ -538,6 +569,8 @@ alt->string = usb_cache_string(udev, alt->desc.iInterface); if (alt->string) retval = device_create_file(dev, &dev_attr_interface); + if (intf->intf_assoc) + retval = sysfs_create_group(&dev->kobj, &intf_assoc_attr_grp); usb_create_intf_ep_files(intf, udev); return 0; } @@ -549,4 +582,5 @@ usb_remove_intf_ep_files(intf); device_remove_file(dev, &dev_attr_interface); sysfs_remove_group(&dev->kobj, &intf_attr_grp); + sysfs_remove_group(&intf->dev.kobj, &intf_assoc_attr_grp); } diff -Nurb linux-2.6.22-570/drivers/usb/gadget/file_storage.c linux-2.6.22-591/drivers/usb/gadget/file_storage.c --- linux-2.6.22-570/drivers/usb/gadget/file_storage.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/gadget/file_storage.c 2007-12-21 15:36:12.000000000 -0500 @@ -3434,6 +3434,9 @@ allow_signal(SIGKILL); allow_signal(SIGUSR1); + /* Allow the thread to be frozen */ + set_freezable(); + /* Arrange for userspace references to be interpreted as kernel * pointers. That way we can pass a kernel pointer to a routine * that expects a __user pointer and it will work okay. */ diff -Nurb linux-2.6.22-570/drivers/usb/storage/usb.c linux-2.6.22-591/drivers/usb/storage/usb.c --- linux-2.6.22-570/drivers/usb/storage/usb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/usb/storage/usb.c 2007-12-21 15:36:12.000000000 -0500 @@ -301,8 +301,6 @@ struct us_data *us = (struct us_data *)__us; struct Scsi_Host *host = us_to_host(us); - current->flags |= PF_NOFREEZE; - for(;;) { US_DEBUGP("*** thread sleeping.\n"); if(down_interruptible(&us->sema)) @@ -909,6 +907,7 @@ printk(KERN_DEBUG "usb-storage: device found at %d\n", us->pusb_dev->devnum); + set_freezable(); /* Wait for the timeout to expire or for a disconnect */ if (delay_use > 0) { printk(KERN_DEBUG "usb-storage: waiting for device " diff -Nurb linux-2.6.22-570/drivers/video/Kconfig linux-2.6.22-591/drivers/video/Kconfig --- linux-2.6.22-570/drivers/video/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/video/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -12,6 +12,13 @@ tristate default n +config VIDEO_OUTPUT_CONTROL + tristate "Lowlevel video output switch controls" + default m + help + This framework adds support for low-level control of the video + output switch. + config FB tristate "Support for frame buffer devices" ---help--- diff -Nurb linux-2.6.22-570/drivers/video/Makefile linux-2.6.22-591/drivers/video/Makefile --- linux-2.6.22-570/drivers/video/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/video/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -122,3 +122,6 @@ # the test framebuffer is last obj-$(CONFIG_FB_VIRTUAL) += vfb.o + +#video output switch sysfs driver +obj-$(CONFIG_VIDEO_OUTPUT_CONTROL) += output.o diff -Nurb linux-2.6.22-570/drivers/video/aty/radeon_base.c linux-2.6.22-591/drivers/video/aty/radeon_base.c --- linux-2.6.22-570/drivers/video/aty/radeon_base.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/video/aty/radeon_base.c 2007-12-21 15:36:12.000000000 -0500 @@ -2102,7 +2102,9 @@ } -static ssize_t radeon_show_edid1(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t radeon_show_edid1(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); struct pci_dev *pdev = to_pci_dev(dev); @@ -2113,7 +2115,9 @@ } -static ssize_t radeon_show_edid2(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t radeon_show_edid2(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); struct pci_dev *pdev = to_pci_dev(dev); @@ -2126,7 +2130,6 @@ static struct bin_attribute edid1_attr = { .attr = { .name = "edid1", - .owner = THIS_MODULE, .mode = 0444, }, .size = EDID_LENGTH, @@ -2136,7 +2139,6 @@ static struct bin_attribute edid2_attr = { .attr = { .name = "edid2", - .owner = THIS_MODULE, .mode = 0444, }, .size = EDID_LENGTH, diff -Nurb linux-2.6.22-570/drivers/video/backlight/backlight.c linux-2.6.22-591/drivers/video/backlight/backlight.c --- linux-2.6.22-570/drivers/video/backlight/backlight.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/video/backlight/backlight.c 2007-12-21 15:36:12.000000000 -0500 @@ -172,7 +172,7 @@ #define DECLARE_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff -Nurb linux-2.6.22-570/drivers/video/backlight/lcd.c linux-2.6.22-591/drivers/video/backlight/lcd.c --- linux-2.6.22-570/drivers/video/backlight/lcd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/video/backlight/lcd.c 2007-12-21 15:36:12.000000000 -0500 @@ -157,7 +157,7 @@ #define DECLARE_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff -Nurb linux-2.6.22-570/drivers/video/ps3fb.c linux-2.6.22-591/drivers/video/ps3fb.c --- linux-2.6.22-570/drivers/video/ps3fb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/video/ps3fb.c 2007-12-21 15:36:12.000000000 -0500 @@ -812,6 +812,7 @@ static int ps3fbd(void *arg) { + set_freezable(); while (!kthread_should_stop()) { try_to_freeze(); set_current_state(TASK_INTERRUPTIBLE); diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c linux-2.6.22-591/drivers/w1/slaves/w1_ds2433.c --- linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/w1/slaves/w1_ds2433.c 2007-12-21 15:36:12.000000000 -0500 @@ -91,8 +91,9 @@ } #endif /* CONFIG_W1_SLAVE_DS2433_CRC */ -static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t w1_f23_read_bin(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); #ifdef CONFIG_W1_SLAVE_DS2433_CRC @@ -199,8 +200,9 @@ return 0; } -static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t w1_f23_write_bin(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); int addr, len, idx; @@ -252,7 +254,6 @@ .attr = { .name = "eeprom", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = W1_EEPROM_SIZE, .read = w1_f23_read_bin, diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_therm.c linux-2.6.22-591/drivers/w1/slaves/w1_therm.c --- linux-2.6.22-570/drivers/w1/slaves/w1_therm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/w1/slaves/w1_therm.c 2007-12-21 15:36:12.000000000 -0500 @@ -42,13 +42,13 @@ {} }; -static ssize_t w1_therm_read_bin(struct kobject *, char *, loff_t, size_t); +static ssize_t w1_therm_read_bin(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); static struct bin_attribute w1_therm_bin_attr = { .attr = { .name = "w1_slave", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = W1_SLAVE_DATA_SIZE, .read = w1_therm_read_bin, @@ -159,7 +159,9 @@ return 0; } -static ssize_t w1_therm_read_bin(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_therm_read_bin(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); struct w1_master *dev = sl->master; diff -Nurb linux-2.6.22-570/drivers/w1/w1.c linux-2.6.22-591/drivers/w1/w1.c --- linux-2.6.22-570/drivers/w1/w1.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/w1/w1.c 2007-12-21 15:36:12.000000000 -0500 @@ -105,7 +105,9 @@ return sprintf(buf, "%s\n", sl->name); } -static ssize_t w1_slave_read_id(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_slave_read_id(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); @@ -128,7 +130,6 @@ .attr = { .name = "id", .mode = S_IRUGO, - .owner = THIS_MODULE, }, .size = 8, .read = w1_slave_read_id, @@ -136,7 +137,9 @@ /* Default family */ -static ssize_t w1_default_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_default_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); @@ -153,7 +156,9 @@ return count; } -static ssize_t w1_default_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +static ssize_t w1_default_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct w1_slave *sl = kobj_to_w1_slave(kobj); @@ -167,7 +172,6 @@ .attr = { .name = "rw", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE, }, .size = PAGE_SIZE, .read = w1_default_read, @@ -801,6 +805,7 @@ struct w1_master *dev, *n; int have_to_wait = 0; + set_freezable(); while (!kthread_should_stop() || have_to_wait) { have_to_wait = 0; diff -Nurb linux-2.6.22-570/drivers/zorro/zorro-sysfs.c linux-2.6.22-591/drivers/zorro/zorro-sysfs.c --- linux-2.6.22-570/drivers/zorro/zorro-sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/drivers/zorro/zorro-sysfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -49,8 +49,9 @@ static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL); -static ssize_t zorro_read_config(struct kobject *kobj, char *buf, loff_t off, - size_t count) +static ssize_t zorro_read_config(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct zorro_dev *z = to_zorro_dev(container_of(kobj, struct device, kobj)); @@ -78,7 +79,6 @@ .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, - .owner = THIS_MODULE }, .size = sizeof(struct ConfigDev), .read = zorro_read_config, diff -Nurb linux-2.6.22-570/ed linux-2.6.22-591/ed --- linux-2.6.22-570/ed 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/ed 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,6 @@ +vi -o ./fs/proc/proc_misc.c ./fs/proc/proc_misc.c.rej +vi -o ./fs/proc/array.c ./fs/proc/array.c.rej +vi -o ./include/linux/sched.h ./include/linux/sched.h.rej +vi -o ./kernel/time/timekeeping.c ./kernel/time/timekeeping.c.rej +vi -o ./kernel/timer.c ./kernel/timer.c.rej +vi -o ./kernel/fork.c ./kernel/fork.c.rej diff -Nurb linux-2.6.22-570/edit linux-2.6.22-591/edit --- linux-2.6.22-570/edit 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/edit 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,19 @@ +vi -o ./fs/proc/root.c ./fs/proc/root.c.rej +vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej +vi -o ./include/linux/sched.h ./include/linux/sched.h.rej +vi -o ./include/net/inet_timewait_sock.h ./include/net/inet_timewait_sock.h.rej +vi -o ./include/net/route.h ./include/net/route.h.rej +vi -o ./include/net/sock.h ./include/net/sock.h.rej +vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej +vi -o ./lib/Makefile ./lib/Makefile.rej +vi -o ./net/core/dev.c ./net/core/dev.c.rej +vi -o ./net/core/rtnetlink.c ./net/core/rtnetlink.c.rej +vi -o ./net/core/sock.c ./net/core/sock.c.rej +vi -o ./net/ipv4/af_inet.c ./net/ipv4/af_inet.c.rej +vi -o ./net/ipv4/inet_connection_sock.c ./net/ipv4/inet_connection_sock.c.rej +vi -o ./net/ipv4/inet_hashtables.c ./net/ipv4/inet_hashtables.c.rej +vi -o ./net/ipv4/raw.c ./net/ipv4/raw.c.rej +vi -o ./net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c.rej +vi -o ./net/ipv4/udp.c ./net/ipv4/udp.c.rej +vi -o ./net/ipv6/addrconf.c ./net/ipv6/addrconf.c.rej +vi -o ./net/unix/af_unix.c ./net/unix/af_unix.c.rej diff -Nurb linux-2.6.22-570/fs/Kconfig linux-2.6.22-591/fs/Kconfig --- linux-2.6.22-570/fs/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -1030,6 +1030,41 @@ endmenu +menu "Layered filesystems" + +config ECRYPT_FS + tristate "eCrypt filesystem layer support (EXPERIMENTAL)" + depends on EXPERIMENTAL && KEYS && CRYPTO && NET + help + Encrypted filesystem that operates on the VFS layer. See + to learn more about + eCryptfs. Userspace components are required and can be + obtained from . + + To compile this file system support as a module, choose M here: the + module will be called ecryptfs. + +config UNION_FS + tristate "Union file system (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Unionfs is a stackable unification file system, which appears to + merge the contents of several directories (branches), while keeping + their physical content separate. + + See for details + +config UNION_FS_XATTR + bool "Unionfs extended attributes" + depends on UNION_FS + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page). + + If unsure, say N. + +endmenu + menu "Miscellaneous filesystems" config ADFS_FS @@ -1082,18 +1117,6 @@ To compile this file system support as a module, choose M here: the module will be called affs. If unsure, say N. -config ECRYPT_FS - tristate "eCrypt filesystem layer support (EXPERIMENTAL)" - depends on EXPERIMENTAL && KEYS && CRYPTO && NET - help - Encrypted filesystem that operates on the VFS layer. See - to learn more about - eCryptfs. Userspace components are required and can be - obtained from . - - To compile this file system support as a module, choose M here: the - module will be called ecryptfs. - config HFS_FS tristate "Apple Macintosh file system support (EXPERIMENTAL)" depends on BLOCK && EXPERIMENTAL diff -Nurb linux-2.6.22-570/fs/Makefile linux-2.6.22-591/fs/Makefile --- linux-2.6.22-570/fs/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -19,6 +19,7 @@ obj-y += no-block.o endif +obj-$(CONFIG_MMU) += revoke.o revoked_inode.o obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY_USER) += inotify_user.o obj-$(CONFIG_EPOLL) += eventpoll.o @@ -118,3 +119,4 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_GFS2_FS) += gfs2/ +obj-$(CONFIG_UNION_FS) += unionfs/ diff -Nurb linux-2.6.22-570/fs/afs/netdevices.c linux-2.6.22-591/fs/afs/netdevices.c --- linux-2.6.22-570/fs/afs/netdevices.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/afs/netdevices.c 2007-12-21 15:36:14.000000000 -0500 @@ -8,6 +8,7 @@ #include #include #include +#include #include "internal.h" /* @@ -23,7 +24,7 @@ BUG(); rtnl_lock(); - dev = __dev_getfirstbyhwtype(ARPHRD_ETHER); + dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); if (dev) { memcpy(mac, dev->dev_addr, maclen); ret = 0; @@ -47,7 +48,7 @@ ASSERT(maxbufs > 0); rtnl_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->type == ARPHRD_LOOPBACK && !wantloopback) continue; idev = __in_dev_get_rtnl(dev); diff -Nurb linux-2.6.22-570/fs/buffer.c linux-2.6.22-591/fs/buffer.c --- linux-2.6.22-570/fs/buffer.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/buffer.c 2007-12-21 15:36:12.000000000 -0500 @@ -982,7 +982,7 @@ struct buffer_head *bh; page = find_or_create_page(inode->i_mapping, index, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) return NULL; @@ -2899,7 +2899,8 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, + set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; diff -Nurb linux-2.6.22-570/fs/cifs/cifsfs.c linux-2.6.22-591/fs/cifs/cifsfs.c --- linux-2.6.22-570/fs/cifs/cifsfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/cifs/cifsfs.c 2007-12-21 15:36:12.000000000 -0500 @@ -849,6 +849,7 @@ __u16 netfid; int rc; + set_freezable(); do { if (try_to_freeze()) continue; diff -Nurb linux-2.6.22-570/fs/cifs/connect.c linux-2.6.22-591/fs/cifs/connect.c --- linux-2.6.22-570/fs/cifs/connect.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/cifs/connect.c 2007-12-21 15:36:12.000000000 -0500 @@ -363,6 +363,7 @@ GFP_KERNEL); } + set_freezable(); while (!kthread_should_stop()) { if (try_to_freeze()) continue; diff -Nurb linux-2.6.22-570/fs/compat_ioctl.c linux-2.6.22-591/fs/compat_ioctl.c --- linux-2.6.22-570/fs/compat_ioctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/compat_ioctl.c 2007-12-21 15:36:14.000000000 -0500 @@ -319,22 +319,21 @@ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct net_device *dev; - struct ifreq32 ifr32; + struct ifreq __user *uifr; int err; - if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32))) + uifr = compat_alloc_user_space(sizeof(struct ifreq)); + if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32))); return -EFAULT; - dev = dev_get_by_index(ifr32.ifr_ifindex); - if (!dev) - return -ENODEV; + err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr); + if (err) + return err; - strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name)); - dev_put(dev); + if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32))) + return -EFAULT; - err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32)); - return (err ? -EFAULT : 0); + return 0; } static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) diff -Nurb linux-2.6.22-570/fs/configfs/configfs_internal.h linux-2.6.22-591/fs/configfs/configfs_internal.h --- linux-2.6.22-570/fs/configfs/configfs_internal.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/configfs/configfs_internal.h 2007-12-21 15:36:12.000000000 -0500 @@ -29,6 +29,7 @@ struct configfs_dirent { atomic_t s_count; + int s_dependent_count; struct list_head s_sibling; struct list_head s_children; struct list_head s_links; diff -Nurb linux-2.6.22-570/fs/configfs/dir.c linux-2.6.22-591/fs/configfs/dir.c --- linux-2.6.22-570/fs/configfs/dir.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/configfs/dir.c 2007-12-21 15:36:12.000000000 -0500 @@ -355,6 +355,10 @@ /* Mark that we've taken i_mutex */ sd->s_type |= CONFIGFS_USET_DROPPING; + /* + * Yup, recursive. If there's a problem, blame + * deep nesting of default_groups + */ ret = configfs_detach_prep(sd->s_dentry); if (!ret) continue; @@ -714,6 +718,28 @@ } /* + * After the item has been detached from the filesystem view, we are + * ready to tear it out of the hierarchy. Notify the client before + * we do that so they can perform any cleanup that requires + * navigating the hierarchy. A client does not need to provide this + * callback. The subsystem semaphore MUST be held by the caller, and + * references must be valid for both items. It also assumes the + * caller has validated ci_type. + */ +static void client_disconnect_notify(struct config_item *parent_item, + struct config_item *item) +{ + struct config_item_type *type; + + type = parent_item->ci_type; + BUG_ON(!type); + + if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) + type->ct_group_ops->disconnect_notify(to_config_group(parent_item), + item); +} + +/* * Drop the initial reference from make_item()/make_group() * This function assumes that reference is held on item * and that item holds a valid reference to the parent. Also, it @@ -738,6 +764,239 @@ config_item_put(item); } +#ifdef DEBUG +static void configfs_dump_one(struct configfs_dirent *sd, int level) +{ + printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd)); + +#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type); + type_print(CONFIGFS_ROOT); + type_print(CONFIGFS_DIR); + type_print(CONFIGFS_ITEM_ATTR); + type_print(CONFIGFS_ITEM_LINK); + type_print(CONFIGFS_USET_DIR); + type_print(CONFIGFS_USET_DEFAULT); + type_print(CONFIGFS_USET_DROPPING); +#undef type_print +} + +static int configfs_dump(struct configfs_dirent *sd, int level) +{ + struct configfs_dirent *child_sd; + int ret = 0; + + configfs_dump_one(sd, level); + + if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) + return 0; + + list_for_each_entry(child_sd, &sd->s_children, s_sibling) { + ret = configfs_dump(child_sd, level + 2); + if (ret) + break; + } + + return ret; +} +#endif + + +/* + * configfs_depend_item() and configfs_undepend_item() + * + * WARNING: Do not call these from a configfs callback! + * + * This describes these functions and their helpers. + * + * Allow another kernel system to depend on a config_item. If this + * happens, the item cannot go away until the dependant can live without + * it. The idea is to give client modules as simple an interface as + * possible. When a system asks them to depend on an item, they just + * call configfs_depend_item(). If the item is live and the client + * driver is in good shape, we'll happily do the work for them. + * + * Why is the locking complex? Because configfs uses the VFS to handle + * all locking, but this function is called outside the normal + * VFS->configfs path. So it must take VFS locks to prevent the + * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is + * why you can't call these functions underneath configfs callbacks. + * + * Note, btw, that this can be called at *any* time, even when a configfs + * subsystem isn't registered, or when configfs is loading or unloading. + * Just like configfs_register_subsystem(). So we take the same + * precautions. We pin the filesystem. We lock each i_mutex _in_order_ + * on our way down the tree. If we can find the target item in the + * configfs tree, it must be part of the subsystem tree as well, so we + * do not need the subsystem semaphore. Holding the i_mutex chain locks + * out mkdir() and rmdir(), who might be racing us. + */ + +/* + * configfs_depend_prep() + * + * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are + * attributes. This is similar but not the same to configfs_detach_prep(). + * Note that configfs_detach_prep() expects the parent to be locked when it + * is called, but we lock the parent *inside* configfs_depend_prep(). We + * do that so we can unlock it if we find nothing. + * + * Here we do a depth-first search of the dentry hierarchy looking for + * our object. We take i_mutex on each step of the way down. IT IS + * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, + * we'll drop the i_mutex. + * + * If the target is not found, -ENOENT is bubbled up and we have released + * all locks. If the target was found, the locks will be cleared by + * configfs_depend_rollback(). + * + * This adds a requirement that all config_items be unique! + * + * This is recursive because the locking traversal is tricky. There isn't + * much on the stack, though, so folks that need this function - be careful + * about your stack! Patches will be accepted to make it iterative. + */ +static int configfs_depend_prep(struct dentry *origin, + struct config_item *target) +{ + struct configfs_dirent *child_sd, *sd = origin->d_fsdata; + int ret = 0; + + BUG_ON(!origin || !sd); + + /* Lock this guy on the way down */ + mutex_lock(&sd->s_dentry->d_inode->i_mutex); + if (sd->s_element == target) /* Boo-yah */ + goto out; + + list_for_each_entry(child_sd, &sd->s_children, s_sibling) { + if (child_sd->s_type & CONFIGFS_DIR) { + ret = configfs_depend_prep(child_sd->s_dentry, + target); + if (!ret) + goto out; /* Child path boo-yah */ + } + } + + /* We looped all our children and didn't find target */ + mutex_unlock(&sd->s_dentry->d_inode->i_mutex); + ret = -ENOENT; + +out: + return ret; +} + +/* + * This is ONLY called if configfs_depend_prep() did its job. So we can + * trust the entire path from item back up to origin. + * + * We walk backwards from item, unlocking each i_mutex. We finish by + * unlocking origin. + */ +static void configfs_depend_rollback(struct dentry *origin, + struct config_item *item) +{ + struct dentry *dentry = item->ci_dentry; + + while (dentry != origin) { + mutex_unlock(&dentry->d_inode->i_mutex); + dentry = dentry->d_parent; + } + + mutex_unlock(&origin->d_inode->i_mutex); +} + +int configfs_depend_item(struct configfs_subsystem *subsys, + struct config_item *target) +{ + int ret; + struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; + struct config_item *s_item = &subsys->su_group.cg_item; + + /* + * Pin the configfs filesystem. This means we can safely access + * the root of the configfs filesystem. + */ + ret = configfs_pin_fs(); + if (ret) + return ret; + + /* + * Next, lock the root directory. We're going to check that the + * subsystem is really registered, and so we need to lock out + * configfs_[un]register_subsystem(). + */ + mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); + + root_sd = configfs_sb->s_root->d_fsdata; + + list_for_each_entry(p, &root_sd->s_children, s_sibling) { + if (p->s_type & CONFIGFS_DIR) { + if (p->s_element == s_item) { + subsys_sd = p; + break; + } + } + } + + if (!subsys_sd) { + ret = -ENOENT; + goto out_unlock_fs; + } + + /* Ok, now we can trust subsys/s_item */ + + /* Scan the tree, locking i_mutex recursively, return 0 if found */ + ret = configfs_depend_prep(subsys_sd->s_dentry, target); + if (ret) + goto out_unlock_fs; + + /* We hold all i_mutexes from the subsystem down to the target */ + p = target->ci_dentry->d_fsdata; + p->s_dependent_count += 1; + + configfs_depend_rollback(subsys_sd->s_dentry, target); + +out_unlock_fs: + mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); + + /* + * If we succeeded, the fs is pinned via other methods. If not, + * we're done with it anyway. So release_fs() is always right. + */ + configfs_release_fs(); + + return ret; +} +EXPORT_SYMBOL(configfs_depend_item); + +/* + * Release the dependent linkage. This is much simpler than + * configfs_depend_item() because we know that that the client driver is + * pinned, thus the subsystem is pinned, and therefore configfs is pinned. + */ +void configfs_undepend_item(struct configfs_subsystem *subsys, + struct config_item *target) +{ + struct configfs_dirent *sd; + + /* + * Since we can trust everything is pinned, we just need i_mutex + * on the item. + */ + mutex_lock(&target->ci_dentry->d_inode->i_mutex); + + sd = target->ci_dentry->d_fsdata; + BUG_ON(sd->s_dependent_count < 1); + + sd->s_dependent_count -= 1; + + /* + * After this unlock, we cannot trust the item to stay alive! + * DO NOT REFERENCE item after this unlock. + */ + mutex_unlock(&target->ci_dentry->d_inode->i_mutex); +} +EXPORT_SYMBOL(configfs_undepend_item); static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { @@ -842,11 +1101,14 @@ if (ret) { /* Tear down everything we built up */ down(&subsys->su_sem); + + client_disconnect_notify(parent_item, item); if (group) unlink_group(group); else unlink_obj(item); client_drop_item(parent_item, item); + up(&subsys->su_sem); if (module_got) @@ -881,6 +1143,13 @@ if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; + /* + * Here's where we check for dependents. We're protected by + * i_mutex. + */ + if (sd->s_dependent_count) + return -EBUSY; + /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; @@ -911,11 +1180,13 @@ configfs_detach_group(item); down(&subsys->su_sem); + client_disconnect_notify(parent_item, item); unlink_group(to_config_group(item)); } else { configfs_detach_item(item); down(&subsys->su_sem); + client_disconnect_notify(parent_item, item); unlink_obj(item); } diff -Nurb linux-2.6.22-570/fs/configfs/file.c linux-2.6.22-591/fs/configfs/file.c --- linux-2.6.22-570/fs/configfs/file.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/configfs/file.c 2007-12-21 15:36:12.000000000 -0500 @@ -27,19 +27,26 @@ #include #include #include +#include #include -#include #include #include "configfs_internal.h" +/* + * A simple attribute can only be 4096 characters. Why 4k? Because the + * original code limited it to PAGE_SIZE. That's a bad idea, though, + * because an attribute of 16k on ia64 won't work on x86. So we limit to + * 4k, our minimum common page size. + */ +#define SIMPLE_ATTR_SIZE 4096 struct configfs_buffer { size_t count; loff_t pos; char * page; struct configfs_item_operations * ops; - struct semaphore sem; + struct mutex mutex; int needs_read_fill; }; @@ -69,7 +76,7 @@ count = ops->show_attribute(item,attr,buffer->page); buffer->needs_read_fill = 0; - BUG_ON(count > (ssize_t)PAGE_SIZE); + BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); if (count >= 0) buffer->count = count; else @@ -102,7 +109,7 @@ struct configfs_buffer * buffer = file->private_data; ssize_t retval = 0; - down(&buffer->sem); + mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) goto out; @@ -112,7 +119,7 @@ retval = simple_read_from_buffer(buf, count, ppos, buffer->page, buffer->count); out: - up(&buffer->sem); + mutex_unlock(&buffer->mutex); return retval; } @@ -137,8 +144,8 @@ if (!buffer->page) return -ENOMEM; - if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; + if (count >= SIMPLE_ATTR_SIZE) + count = SIMPLE_ATTR_SIZE - 1; error = copy_from_user(buffer->page,buf,count); buffer->needs_read_fill = 1; /* if buf is assumed to contain a string, terminate it by \0, @@ -193,13 +200,13 @@ struct configfs_buffer * buffer = file->private_data; ssize_t len; - down(&buffer->sem); + mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) len = flush_write_buffer(file->f_path.dentry, buffer, count); if (len > 0) *ppos += len; - up(&buffer->sem); + mutex_unlock(&buffer->mutex); return len; } @@ -253,7 +260,7 @@ error = -ENOMEM; goto Enomem; } - init_MUTEX(&buffer->sem); + mutex_init(&buffer->mutex); buffer->needs_read_fill = 1; buffer->ops = ops; file->private_data = buffer; @@ -292,6 +299,7 @@ if (buffer) { if (buffer->page) free_page((unsigned long)buffer->page); + mutex_destroy(&buffer->mutex); kfree(buffer); } return 0; diff -Nurb linux-2.6.22-570/fs/configfs/item.c linux-2.6.22-591/fs/configfs/item.c --- linux-2.6.22-570/fs/configfs/item.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/configfs/item.c 2007-12-21 15:36:12.000000000 -0500 @@ -62,7 +62,6 @@ * dynamically allocated string that @item->ci_name points to. * Otherwise, use the static @item->ci_namebuf array. */ - int config_item_set_name(struct config_item * item, const char * fmt, ...) { int error = 0; @@ -139,12 +138,7 @@ return item; } -/** - * config_item_cleanup - free config_item resources. - * @item: item. - */ - -void config_item_cleanup(struct config_item * item) +static void config_item_cleanup(struct config_item * item) { struct config_item_type * t = item->ci_type; struct config_group * s = item->ci_group; @@ -179,12 +173,10 @@ kref_put(&item->ci_kref, config_item_release); } - /** * config_group_init - initialize a group for use * @k: group */ - void config_group_init(struct config_group *group) { config_item_init(&group->cg_item); @@ -201,8 +193,8 @@ * looking for a matching config_item. If matching item is found * take a reference and return the item. */ - -struct config_item * config_group_find_obj(struct config_group * group, const char * name) +struct config_item *config_group_find_obj(struct config_group *group, + const char * name) { struct list_head * entry; struct config_item * ret = NULL; @@ -219,7 +211,6 @@ return ret; } - EXPORT_SYMBOL(config_item_init); EXPORT_SYMBOL(config_group_init); EXPORT_SYMBOL(config_item_get); diff -Nurb linux-2.6.22-570/fs/drop_caches.c linux-2.6.22-591/fs/drop_caches.c --- linux-2.6.22-570/fs/drop_caches.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/drop_caches.c 2007-12-21 15:36:12.000000000 -0500 @@ -3,6 +3,7 @@ */ #include +#include #include #include #include @@ -12,7 +13,7 @@ /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; -static void drop_pagecache_sb(struct super_block *sb) +void drop_pagecache_sb(struct super_block *sb) { struct inode *inode; @@ -24,6 +25,7 @@ } spin_unlock(&inode_lock); } +EXPORT_SYMBOL(drop_pagecache_sb); void drop_pagecache(void) { diff -Nurb linux-2.6.22-570/fs/ecryptfs/inode.c linux-2.6.22-591/fs/ecryptfs/inode.c --- linux-2.6.22-570/fs/ecryptfs/inode.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ecryptfs/inode.c 2007-12-21 15:36:12.000000000 -0500 @@ -280,7 +280,9 @@ int rc = 0; struct dentry *lower_dir_dentry; struct dentry *lower_dentry; + struct dentry *dentry_save; struct vfsmount *lower_mnt; + struct vfsmount *mnt_save; char *encoded_name; unsigned int encoded_namelen; struct ecryptfs_crypt_stat *crypt_stat = NULL; @@ -308,9 +310,13 @@ } ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " "= [%d]\n", encoded_name, encoded_namelen); - lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, - encoded_namelen - 1); + dentry_save = nd->dentry; + mnt_save = nd->mnt; + lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry, + (encoded_namelen - 1), nd); kfree(encoded_name); + nd->mnt = mnt_save; + nd->dentry = dentry_save; if (IS_ERR(lower_dentry)) { ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); rc = PTR_ERR(lower_dentry); diff -Nurb linux-2.6.22-570/fs/ecryptfs/main.c linux-2.6.22-591/fs/ecryptfs/main.c --- linux-2.6.22-570/fs/ecryptfs/main.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ecryptfs/main.c 2007-12-21 15:36:12.000000000 -0500 @@ -840,8 +840,6 @@ goto out; } kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); - sysfs_attr_version.attr.owner = THIS_MODULE; - sysfs_attr_version_str.attr.owner = THIS_MODULE; rc = do_sysfs_registration(); if (rc) { printk(KERN_ERR "sysfs registration failed\n"); diff -Nurb linux-2.6.22-570/fs/exec.c linux-2.6.22-591/fs/exec.c --- linux-2.6.22-570/fs/exec.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/exec.c 2007-12-21 15:36:14.000000000 -0500 @@ -861,9 +861,9 @@ current->sas_ss_sp = current->sas_ss_size = 0; if (current->euid == current->uid && current->egid == current->gid) - current->mm->dumpable = 1; + set_dumpable(current->mm, 1); else - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); name = bprm->filename; @@ -889,12 +889,12 @@ if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) { suid_keys(current); - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { suid_keys(current); - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); } /* An exec changes our domain. We are no longer part of the thread @@ -1486,6 +1486,55 @@ return core_waiters; } +/* + * set_dumpable converts traditional three-value dumpable to two flags and + * stores them into mm->flags. It modifies lower two bits of mm->flags, but + * these bits are not changed atomically. So get_dumpable can observe the + * intermediate state. To avoid doing unexpected behavior, get get_dumpable + * return either old dumpable or new one by paying attention to the order of + * modifying the bits. + * + * dumpable | mm->flags (binary) + * old new | initial interim final + * ---------+----------------------- + * 0 1 | 00 01 01 + * 0 2 | 00 10(*) 11 + * 1 0 | 01 00 00 + * 1 2 | 01 11 11 + * 2 0 | 11 10(*) 00 + * 2 1 | 11 11 01 + * + * (*) get_dumpable regards interim value of 10 as 11. + */ +void set_dumpable(struct mm_struct *mm, int value) +{ + switch (value) { + case 0: + clear_bit(MMF_DUMPABLE, &mm->flags); + smp_wmb(); + clear_bit(MMF_DUMP_SECURELY, &mm->flags); + break; + case 1: + set_bit(MMF_DUMPABLE, &mm->flags); + smp_wmb(); + clear_bit(MMF_DUMP_SECURELY, &mm->flags); + break; + case 2: + set_bit(MMF_DUMP_SECURELY, &mm->flags); + smp_wmb(); + set_bit(MMF_DUMPABLE, &mm->flags); + break; + } +} + +int get_dumpable(struct mm_struct *mm) +{ + int ret; + + ret = mm->flags & 0x3; + return (ret >= 2) ? 2 : ret; +} + int do_coredump(long signr, int exit_code, struct pt_regs * regs) { char corename[CORENAME_MAX_SIZE + 1]; @@ -1504,7 +1553,7 @@ if (!binfmt || !binfmt->core_dump) goto fail; down_write(&mm->mmap_sem); - if (!mm->dumpable) { + if (!get_dumpable(mm)) { up_write(&mm->mmap_sem); goto fail; } @@ -1514,11 +1563,11 @@ * process nor do we know its entire history. We only know it * was tainted so we dump it as root in mode 2. */ - if (mm->dumpable == 2) { /* Setuid core dump mode */ + if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ current->fsuid = 0; /* Dump root private */ } - mm->dumpable = 0; + set_dumpable(mm, 0); retval = coredump_wait(exit_code); if (retval < 0) diff -Nurb linux-2.6.22-570/fs/gfs2/ops_address.c linux-2.6.22-591/fs/gfs2/ops_address.c --- linux-2.6.22-570/fs/gfs2/ops_address.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/gfs2/ops_address.c 2007-12-21 15:36:12.000000000 -0500 @@ -250,7 +250,7 @@ if (file) { gf = file->private_data; if (test_bit(GFF_EXLOCK, &gf->f_flags)) - /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ + /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ goto skip_lock; } gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); diff -Nurb linux-2.6.22-570/fs/gfs2/ops_file.c linux-2.6.22-591/fs/gfs2/ops_file.c --- linux-2.6.22-570/fs/gfs2/ops_file.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/gfs2/ops_file.c 2007-12-21 15:36:12.000000000 -0500 @@ -364,6 +364,8 @@ else vma->vm_ops = &gfs2_vm_ops_private; + vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR; + gfs2_glock_dq_uninit(&i_gh); return error; diff -Nurb linux-2.6.22-570/fs/gfs2/ops_vm.c linux-2.6.22-591/fs/gfs2/ops_vm.c --- linux-2.6.22-570/fs/gfs2/ops_vm.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/gfs2/ops_vm.c 2007-12-21 15:36:12.000000000 -0500 @@ -27,13 +27,13 @@ #include "trans.h" #include "util.h" -static struct page *gfs2_private_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static struct page *gfs2_private_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { - struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); + struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); set_bit(GIF_PAGED, &ip->i_flags); - return filemap_nopage(area, address, type); + return filemap_fault(vma, fdata); } static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) @@ -104,16 +104,14 @@ return error; } -static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct gfs2_file *gf = file->private_data; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_holder i_gh; struct page *result = NULL; - unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + - area->vm_pgoff; int alloc_required; int error; @@ -124,21 +122,27 @@ set_bit(GIF_PAGED, &ip->i_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); - error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, + error = gfs2_write_alloc_required(ip, + (u64)fdata->pgoff << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, &alloc_required); - if (error) + if (error) { + fdata->type = VM_FAULT_OOM; /* XXX: are these right? */ goto out; + } set_bit(GFF_EXLOCK, &gf->f_flags); - result = filemap_nopage(area, address, type); + result = filemap_fault(vma, fdata); clear_bit(GFF_EXLOCK, &gf->f_flags); - if (!result || result == NOPAGE_OOM) + if (!result) goto out; if (alloc_required) { error = alloc_page_backing(ip, result); if (error) { + if (vma->vm_flags & VM_CAN_INVALIDATE) + unlock_page(result); page_cache_release(result); + fdata->type = VM_FAULT_OOM; result = NULL; goto out; } @@ -152,10 +156,10 @@ } struct vm_operations_struct gfs2_vm_ops_private = { - .nopage = gfs2_private_nopage, + .fault = gfs2_private_fault, }; struct vm_operations_struct gfs2_vm_ops_sharewrite = { - .nopage = gfs2_sharewrite_nopage, + .fault = gfs2_sharewrite_fault, }; diff -Nurb linux-2.6.22-570/fs/inode.c linux-2.6.22-591/fs/inode.c --- linux-2.6.22-570/fs/inode.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/inode.c 2007-12-21 15:36:12.000000000 -0500 @@ -149,7 +149,7 @@ mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER); + mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; @@ -525,7 +525,13 @@ * new_inode - obtain an inode * @sb: superblock * - * Allocates a new inode for given superblock. + * Allocates a new inode for given superblock. The default gfp_mask + * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE. + * If HIGHMEM pages are unsuitable or it is known that pages allocated + * for the page cache are not reclaimable or migratable, + * mapping_set_gfp_mask() must be called with suitable flags on the + * newly created inode's mapping + * */ struct inode *new_inode(struct super_block *sb) { diff -Nurb linux-2.6.22-570/fs/jbd/journal.c linux-2.6.22-591/fs/jbd/journal.c --- linux-2.6.22-570/fs/jbd/journal.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/jbd/journal.c 2007-12-21 15:36:12.000000000 -0500 @@ -1710,7 +1710,7 @@ journal_head_cache = kmem_cache_create("journal_head", sizeof(struct journal_head), 0, /* offset */ - 0, /* flags */ + SLAB_TEMPORARY, /* flags */ NULL, /* ctor */ NULL); /* dtor */ retval = 0; @@ -2007,7 +2007,7 @@ jbd_handle_cache = kmem_cache_create("journal_handle", sizeof(handle_t), 0, /* offset */ - 0, /* flags */ + SLAB_TEMPORARY, /* flags */ NULL, /* ctor */ NULL); /* dtor */ if (jbd_handle_cache == NULL) { diff -Nurb linux-2.6.22-570/fs/jbd/revoke.c linux-2.6.22-591/fs/jbd/revoke.c --- linux-2.6.22-570/fs/jbd/revoke.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/jbd/revoke.c 2007-12-21 15:36:12.000000000 -0500 @@ -169,13 +169,17 @@ { revoke_record_cache = kmem_cache_create("revoke_record", sizeof(struct jbd_revoke_record_s), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, + NULL, NULL); if (revoke_record_cache == 0) return -ENOMEM; revoke_table_cache = kmem_cache_create("revoke_table", sizeof(struct jbd_revoke_table_s), - 0, 0, NULL, NULL); + 0, + SLAB_TEMPORARY, + NULL, NULL); if (revoke_table_cache == 0) { kmem_cache_destroy(revoke_record_cache); revoke_record_cache = NULL; diff -Nurb linux-2.6.22-570/fs/jffs2/background.c linux-2.6.22-591/fs/jffs2/background.c --- linux-2.6.22-570/fs/jffs2/background.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/jffs2/background.c 2007-12-21 15:36:12.000000000 -0500 @@ -81,6 +81,7 @@ set_user_nice(current, 10); + set_freezable(); for (;;) { allow_signal(SIGHUP); diff -Nurb linux-2.6.22-570/fs/lockd/host.c linux-2.6.22-591/fs/lockd/host.c --- linux-2.6.22-570/fs/lockd/host.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/lockd/host.c 2007-12-21 15:36:12.000000000 -0500 @@ -161,15 +161,9 @@ */ nsm_unmonitor(host); - if ((clnt = host->h_rpcclnt) != NULL) { - if (atomic_read(&clnt->cl_users)) { - printk(KERN_WARNING - "lockd: active RPC handle\n"); - clnt->cl_dead = 1; - } else { - rpc_destroy_client(host->h_rpcclnt); - } - } + clnt = host->h_rpcclnt; + if (clnt != NULL) + rpc_shutdown_client(clnt); kfree(host); } diff -Nurb linux-2.6.22-570/fs/lockd/mon.c linux-2.6.22-591/fs/lockd/mon.c --- linux-2.6.22-570/fs/lockd/mon.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/lockd/mon.c 2007-12-21 15:36:12.000000000 -0500 @@ -61,6 +61,7 @@ status); else status = 0; + rpc_shutdown_client(clnt); out: return status; } @@ -138,7 +139,6 @@ .program = &nsm_program, .version = SM_VERSION, .authflavor = RPC_AUTH_NULL, - .flags = (RPC_CLNT_CREATE_ONESHOT), }; return rpc_create(&args); diff -Nurb linux-2.6.22-570/fs/lockd/svc.c linux-2.6.22-591/fs/lockd/svc.c --- linux-2.6.22-570/fs/lockd/svc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/lockd/svc.c 2007-12-21 15:36:12.000000000 -0500 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -119,13 +120,11 @@ complete(&lockd_start_done); daemonize("lockd"); + set_freezable(); /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); - /* kick rpciod */ - rpciod_up(); - dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); if (!nlm_timeout) @@ -202,9 +201,6 @@ /* Exit the RPC thread */ svc_exit_thread(rqstp); - /* release rpciod */ - rpciod_down(); - /* Release module */ unlock_kernel(); module_put_and_exit(0); diff -Nurb linux-2.6.22-570/fs/namei.c linux-2.6.22-591/fs/namei.c --- linux-2.6.22-570/fs/namei.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/namei.c 2007-12-21 15:36:12.000000000 -0500 @@ -1386,7 +1386,8 @@ return 0; } -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +struct dentry *lookup_one_len_nd(const char *name, struct dentry *base, + int len, struct nameidata *nd) { int err; struct qstr this; @@ -1394,7 +1395,7 @@ err = __lookup_one_len(name, &this, base, len); if (err) return ERR_PTR(err); - return __lookup_hash(&this, base, NULL); + return __lookup_hash(&this, base, nd); } struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int len) @@ -3086,7 +3087,7 @@ EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ EXPORT_SYMBOL(getname); EXPORT_SYMBOL(lock_rename); -EXPORT_SYMBOL(lookup_one_len); +EXPORT_SYMBOL(lookup_one_len_nd); EXPORT_SYMBOL(page_follow_link_light); EXPORT_SYMBOL(page_put_link); EXPORT_SYMBOL(page_readlink); diff -Nurb linux-2.6.22-570/fs/namespace.c linux-2.6.22-591/fs/namespace.c --- linux-2.6.22-570/fs/namespace.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/namespace.c 2007-12-21 15:36:12.000000000 -0500 @@ -1538,7 +1538,7 @@ new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) - return NULL; + return ERR_PTR(-ENOMEM); atomic_set(&new_ns->count, 1); INIT_LIST_HEAD(&new_ns->list); @@ -1552,7 +1552,7 @@ if (!new_ns->root) { up_write(&namespace_sem); kfree(new_ns); - return NULL; + return ERR_PTR(-ENOMEM);; } spin_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); @@ -1597,7 +1597,7 @@ return new_ns; } -struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns, +struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, struct fs_struct *new_fs) { struct mnt_namespace *new_ns; diff -Nurb linux-2.6.22-570/fs/ncpfs/mmap.c linux-2.6.22-591/fs/ncpfs/mmap.c --- linux-2.6.22-570/fs/ncpfs/mmap.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ncpfs/mmap.c 2007-12-21 15:36:12.000000000 -0500 @@ -25,8 +25,8 @@ /* * Fill in the supplied page for mmap */ -static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, + struct fault_data *fdata) { struct file *file = area->vm_file; struct dentry *dentry = file->f_path.dentry; @@ -40,15 +40,17 @@ page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages as long as recvmsg and memset works on it */ - if (!page) - return page; + if (!page) { + fdata->type = VM_FAULT_OOM; + return NULL; + } pg_addr = kmap(page); - address &= PAGE_MASK; - pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT); + pos = fdata->pgoff << PAGE_SHIFT; count = PAGE_SIZE; - if (address + PAGE_SIZE > area->vm_end) { - count = area->vm_end - address; + if (fdata->address + PAGE_SIZE > area->vm_end) { + WARN_ON(1); /* shouldn't happen? */ + count = area->vm_end - fdata->address; } /* what we can read in one go */ bufsize = NCP_SERVER(inode)->buffer_size; @@ -91,15 +93,14 @@ * fetches from the network, here the analogue of disk. * -- wli */ - if (type) - *type = VM_FAULT_MAJOR; + fdata->type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); return page; } static struct vm_operations_struct ncp_file_mmap = { - .nopage = ncp_file_mmap_nopage, + .fault = ncp_file_mmap_fault, }; @@ -123,6 +124,7 @@ return -EFBIG; vma->vm_ops = &ncp_file_mmap; + vma->vm_flags |= VM_CAN_INVALIDATE; file_accessed(file); return 0; } diff -Nurb linux-2.6.22-570/fs/nfs/callback.c linux-2.6.22-591/fs/nfs/callback.c --- linux-2.6.22-570/fs/nfs/callback.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/callback.c 2007-12-21 15:36:12.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -67,6 +68,7 @@ daemonize("nfsv4-svc"); /* Process request with signals blocked, but allow SIGKILL. */ allow_signal(SIGKILL); + set_freezable(); complete(&nfs_callback_info.started); diff -Nurb linux-2.6.22-570/fs/nfs/client.c linux-2.6.22-591/fs/nfs/client.c --- linux-2.6.22-570/fs/nfs/client.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/nfs/client.c 2007-12-21 15:36:12.000000000 -0500 @@ -102,19 +102,10 @@ int nfsversion) { struct nfs_client *clp; - int error; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto error_1; - } - __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); - if (nfsversion == 4) { if (nfs_callback_up() < 0) goto error_2; @@ -154,9 +145,6 @@ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); error_2: - rpciod_down(); - __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); -error_1: kfree(clp); error_0: return NULL; @@ -198,9 +186,6 @@ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); - if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) - rpciod_down(); - kfree(clp->cl_hostname); kfree(clp); diff -Nurb linux-2.6.22-570/fs/nfs/delegation.c linux-2.6.22-591/fs/nfs/delegation.c --- linux-2.6.22-570/fs/nfs/delegation.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/delegation.c 2007-12-21 15:36:12.000000000 -0500 @@ -74,7 +74,7 @@ continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); - err = nfs4_open_delegation_recall(ctx->dentry, state); + err = nfs4_open_delegation_recall(ctx, state); if (err >= 0) err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); diff -Nurb linux-2.6.22-570/fs/nfs/delegation.h linux-2.6.22-591/fs/nfs/delegation.h --- linux-2.6.22-570/fs/nfs/delegation.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/delegation.h 2007-12-21 15:36:12.000000000 -0500 @@ -39,7 +39,7 @@ /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); diff -Nurb linux-2.6.22-570/fs/nfs/dir.c linux-2.6.22-591/fs/nfs/dir.c --- linux-2.6.22-570/fs/nfs/dir.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/nfs/dir.c 2007-12-21 15:36:12.000000000 -0500 @@ -898,14 +898,13 @@ return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) - /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); + /* Revalidate fsid using the parent directory */ + return __nfs_revalidate_inode(server, dir); return 0; } @@ -947,7 +946,7 @@ res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); + error = nfs_reval_fsid(dir, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; @@ -1247,7 +1246,7 @@ attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && (nd->flags & LOOKUP_CREATE)) + if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; lock_kernel(); @@ -1747,8 +1746,8 @@ struct nfs_inode *nfsi; struct nfs_access_entry *cache; - spin_lock(&nfs_access_lru_lock); restart: + spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; @@ -1773,6 +1772,7 @@ clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); } spin_unlock(&inode->i_lock); + spin_unlock(&nfs_access_lru_lock); iput(inode); goto restart; } diff -Nurb linux-2.6.22-570/fs/nfs/direct.c linux-2.6.22-591/fs/nfs/direct.c --- linux-2.6.22-570/fs/nfs/direct.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/direct.c 2007-12-21 15:36:12.000000000 -0500 @@ -266,7 +266,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t rsize = NFS_SERVER(inode)->rsize; unsigned int pgbase; int result; @@ -295,10 +295,15 @@ break; } if ((unsigned)result < data->npages) { + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); nfs_readdata_release(data); break; } + bytes -= pgbase; + data->npages = result; + } get_dreq(dreq); @@ -601,7 +606,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; @@ -630,10 +635,15 @@ break; } if ((unsigned)result < data->npages) { + bytes = result * PAGE_SIZE; + if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); nfs_writedata_release(data); break; } + bytes -= pgbase; + data->npages = result; + } get_dreq(dreq); @@ -763,10 +773,8 @@ (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; - - if (count < 0) goto out; + retval = -EFAULT; if (!access_ok(VERIFY_WRITE, buf, count)) goto out; @@ -814,7 +822,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - ssize_t retval; + ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; /* XXX: temporary */ @@ -827,7 +835,7 @@ (unsigned long) count, (long long) pos); if (nr_segs != 1) - return -EINVAL; + goto out; retval = generic_write_checks(file, &pos, &count, 0); if (retval) diff -Nurb linux-2.6.22-570/fs/nfs/inode.c linux-2.6.22-591/fs/nfs/inode.c --- linux-2.6.22-570/fs/nfs/inode.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/nfs/inode.c 2007-12-21 15:36:12.000000000 -0500 @@ -466,14 +466,14 @@ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - atomic_set(&ctx->count, 1); - ctx->dentry = dget(dentry); - ctx->vfsmnt = mntget(mnt); + ctx->path.dentry = dget(dentry); + ctx->path.mnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; + kref_init(&ctx->kref); } return ctx; } @@ -481,27 +481,33 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->count); + kref_get(&ctx->kref); return ctx; } -void put_nfs_open_context(struct nfs_open_context *ctx) +static void nfs_free_open_context(struct kref *kref) { - if (atomic_dec_and_test(&ctx->count)) { + struct nfs_open_context *ctx = container_of(kref, + struct nfs_open_context, kref); + if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->dentry->d_inode; + struct inode *inode = ctx->path.dentry->d_inode; spin_lock(&inode->i_lock); list_del(&ctx->list); spin_unlock(&inode->i_lock); } if (ctx->state != NULL) - nfs4_close_state(ctx->state, ctx->mode); + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); if (ctx->cred != NULL) put_rpccred(ctx->cred); - dput(ctx->dentry); - mntput(ctx->vfsmnt); + dput(ctx->path.dentry); + mntput(ctx->path.mnt); kfree(ctx); - } +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + kref_put(&ctx->kref, nfs_free_open_context); } /* @@ -977,8 +983,8 @@ goto out_changed; server = NFS_SERVER(inode); - /* Update the fsid if and only if this is the root directory */ - if (inode == inode->i_sb->s_root->d_inode + /* Update the fsid? */ + if (S_ISDIR(inode->i_mode) && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) server->fsid = fattr->fsid; @@ -1125,27 +1131,10 @@ */ void nfs4_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - /* If we are holding a delegation, return it! */ nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); - /* Now clear out any remaining state */ - while (!list_empty(&nfsi->open_states)) { - struct nfs4_state *state; - - state = list_entry(nfsi->open_states.next, - struct nfs4_state, - inode_states); - dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", - __FUNCTION__, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - state); - BUG_ON(atomic_read(&state->count) != 1); - nfs4_close_state(state, state->state); - } } #endif @@ -1188,14 +1177,11 @@ inode_init_once(&nfsi->vfs_inode); spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); diff -Nurb linux-2.6.22-570/fs/nfs/mount_clnt.c linux-2.6.22-591/fs/nfs/mount_clnt.c --- linux-2.6.22-570/fs/nfs/mount_clnt.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/mount_clnt.c 2007-12-21 15:36:12.000000000 -0500 @@ -69,6 +69,7 @@ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); + rpc_shutdown_client(mnt_clnt); return status < 0? status : (result.status? -EACCES : 0); } @@ -84,8 +85,7 @@ .program = &mnt_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_INTR), + .flags = RPC_CLNT_CREATE_INTR, }; return rpc_create(&args); diff -Nurb linux-2.6.22-570/fs/nfs/nfs3proc.c linux-2.6.22-591/fs/nfs/nfs3proc.c --- linux-2.6.22-570/fs/nfs/nfs3proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/nfs3proc.c 2007-12-21 15:36:12.000000000 -0500 @@ -335,9 +335,7 @@ * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ status = nfs3_proc_setattr(dentry, &fattr, sattr); - if (status == 0) - nfs_setattr_update_inode(dentry->d_inode, sattr); - nfs_refresh_inode(dentry->d_inode, &fattr); + nfs_post_op_update_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } if (status != 0) diff -Nurb linux-2.6.22-570/fs/nfs/nfs4_fs.h linux-2.6.22-591/fs/nfs/nfs4_fs.h --- linux-2.6.22-570/fs/nfs/nfs4_fs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/nfs4_fs.h 2007-12-21 15:36:12.000000000 -0500 @@ -165,7 +165,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); @@ -196,7 +196,7 @@ extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); @@ -222,7 +222,7 @@ #else -#define nfs4_close_state(a, b) do { } while (0) +#define nfs4_close_state(a, b, c) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff -Nurb linux-2.6.22-570/fs/nfs/nfs4proc.c linux-2.6.22-591/fs/nfs/nfs4proc.c --- linux-2.6.22-570/fs/nfs/nfs4proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/nfs4proc.c 2007-12-21 15:36:12.000000000 -0500 @@ -214,14 +214,14 @@ } struct nfs4_opendata { - atomic_t count; + struct kref kref; struct nfs_openargs o_arg; struct nfs_openres o_res; struct nfs_open_confirmargs c_arg; struct nfs_open_confirmres c_res; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; - struct dentry *dentry; + struct path path; struct dentry *dir; struct nfs4_state_owner *owner; struct iattr attrs; @@ -230,11 +230,11 @@ int cancelled; }; -static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, +static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, struct nfs4_state_owner *sp, int flags, const struct iattr *attrs) { - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dget_parent(path->dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *p; @@ -245,8 +245,8 @@ p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - atomic_set(&p->count, 1); - p->dentry = dget(dentry); + p->path.mnt = mntget(path->mnt); + p->path.dentry = dget(path->dentry); p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); @@ -254,7 +254,7 @@ p->o_arg.open_flags = flags, p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_id; - p->o_arg.name = &dentry->d_name; + p->o_arg.name = &p->path.dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; @@ -274,6 +274,7 @@ p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; + kref_init(&p->kref); return p; err_free: kfree(p); @@ -282,27 +283,23 @@ return NULL; } -static void nfs4_opendata_free(struct nfs4_opendata *p) +static void nfs4_opendata_free(struct kref *kref) { - if (p != NULL && atomic_dec_and_test(&p->count)) { + struct nfs4_opendata *p = container_of(kref, + struct nfs4_opendata, kref); + nfs_free_seqid(p->o_arg.seqid); nfs4_put_state_owner(p->owner); dput(p->dir); - dput(p->dentry); + dput(p->path.dentry); + mntput(p->path.mnt); kfree(p); - } } -/* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, - const struct rpc_call_ops *tk_ops, void *calldata) +static void nfs4_opendata_put(struct nfs4_opendata *p) { - struct rpc_task *task; - - if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) - return -ENOMEM; - rpc_execute(task); - return 0; + if (p != NULL) + kref_put(&p->kref, nfs4_opendata_free); } static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) @@ -451,7 +448,7 @@ opendata->owner->so_cred, &opendata->o_res); } - nfs4_close_state(newstate, opendata->o_arg.open_flags); + nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags); } if (newstate != state) return -ESTALE; @@ -462,7 +459,7 @@ * OPEN_RECLAIM: * reclaim state on the server after a reboot. */ -static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; struct nfs4_opendata *opendata; @@ -478,7 +475,7 @@ } delegation_type = delegation->type; } - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; @@ -486,17 +483,17 @@ nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return status; } -static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_do_open_reclaim(sp, state, dentry); + err = _nfs4_do_open_reclaim(ctx, state); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -512,12 +509,12 @@ ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); + ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs4_state_owner *sp = state->owner; struct nfs4_opendata *opendata; @@ -525,24 +522,24 @@ if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) return 0; - opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL); if (opendata == NULL) return -ENOMEM; opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, sizeof(opendata->o_arg.u.delegation.data)); ret = nfs4_open_recover(opendata, state); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } -int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs4_exception exception = { }; - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); int err; do { - err = _nfs4_open_delegation_recall(dentry, state); + err = _nfs4_open_delegation_recall(ctx, state); switch (err) { case 0: return err; @@ -601,9 +598,9 @@ nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_confirm_ops = { @@ -621,7 +618,7 @@ struct rpc_task *task; int status; - atomic_inc(&data->count); + kref_get(&data->kref); /* * If rpc_run_task() ends up calling ->rpc_release(), we * want to ensure that it takes the 'error' code path. @@ -704,9 +701,9 @@ nfs_confirm_seqid(&data->owner->so_seqid, 0); state = nfs4_opendata_to_nfs4_state(data); if (state != NULL) - nfs4_close_state(state, data->o_arg.open_flags); + nfs4_close_state(&data->path, state, data->o_arg.open_flags); out_free: - nfs4_opendata_free(data); + nfs4_opendata_put(data); } static const struct rpc_call_ops nfs4_open_ops = { @@ -727,7 +724,7 @@ struct rpc_task *task; int status; - atomic_inc(&data->count); + kref_get(&data->kref); /* * If rpc_run_task() ends up calling ->rpc_release(), we * want to ensure that it takes the 'error' code path. @@ -811,7 +808,7 @@ * reclaim state on the server after a network partition. * Assumes caller holds the appropriate lock */ -static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs_delegation *delegation = NFS_I(inode)->delegation; @@ -820,34 +817,34 @@ int ret; if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { - ret = _nfs4_do_access(inode, sp->so_cred, openflags); + ret = _nfs4_do_access(inode, ctx->cred, openflags); if (ret < 0) return ret; memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); set_bit(NFS_DELEGATED_STATE, &state->flags); return 0; } - opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, openflags, NULL); if (opendata == NULL) return -ENOMEM; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) { /* Invalidate the state owner so we don't ever use it again */ - nfs4_drop_state_owner(sp); - d_drop(dentry); + nfs4_drop_state_owner(state->owner); + d_drop(ctx->path.dentry); } - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); return ret; } -static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_open_expired(sp, state, dentry); + err = _nfs4_open_expired(ctx, state); if (err == -NFS4ERR_DELAY) nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -862,7 +859,7 @@ ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = nfs4_do_open_expired(sp, state, ctx->dentry); + ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; } @@ -953,9 +950,25 @@ } /* + * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* + * fields corresponding to attributes that were used to store the verifier. + * Make sure we clobber those fields in the later setattr call + */ +static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) +{ + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && + !(sattr->ia_valid & ATTR_ATIME_SET)) + sattr->ia_valid |= ATTR_ATIME; + + if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && + !(sattr->ia_valid & ATTR_MTIME_SET)) + sattr->ia_valid |= ATTR_MTIME; +} + +/* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -975,27 +988,30 @@ goto err_put_state_owner; down_read(&clp->cl_sem); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); + opendata = nfs4_opendata_alloc(path, sp, flags, sattr); if (opendata == NULL) goto err_release_rwsem; status = _nfs4_proc_open(opendata); if (status != 0) - goto err_opendata_free; + goto err_opendata_put; + + if (opendata->o_arg.open_flags & O_EXCL) + nfs4_exclusive_attrset(opendata, sattr); status = -ENOMEM; state = nfs4_opendata_to_nfs4_state(opendata); if (state == NULL) - goto err_opendata_free; + goto err_opendata_put; if (opendata->o_res.delegation_type != 0) nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); - nfs4_opendata_free(opendata); + nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; return 0; -err_opendata_free: - nfs4_opendata_free(opendata); +err_opendata_put: + nfs4_opendata_put(opendata); err_release_rwsem: up_read(&clp->cl_sem); err_put_state_owner: @@ -1006,14 +1022,14 @@ } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1101,6 +1117,7 @@ } struct nfs4_closedata { + struct path path; struct inode *inode; struct nfs4_state *state; struct nfs_closeargs arg; @@ -1117,6 +1134,8 @@ nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); + dput(calldata->path.dentry); + mntput(calldata->path.mnt); kfree(calldata); } @@ -1209,18 +1228,20 @@ * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct inode *inode, struct nfs4_state *state) +int nfs4_do_close(struct path *path, struct nfs4_state *state) { - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; + struct nfs4_state_owner *sp = state->owner; + struct rpc_task *task; int status = -ENOMEM; calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) goto out; - calldata->inode = inode; + calldata->inode = state->inode; calldata->state = state; - calldata->arg.fh = NFS_FH(inode); + calldata->arg.fh = NFS_FH(state->inode); calldata->arg.stateid = &state->stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); @@ -1229,36 +1250,44 @@ calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.server = server; + calldata->path.mnt = mntget(path->mnt); + calldata->path.dentry = dget(path->dentry); - status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); - if (status == 0) - goto out; - - nfs_free_seqid(calldata->arg.seqid); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; out_free_calldata: kfree(calldata); out: + nfs4_put_open_state(state); + nfs4_put_state_owner(sp); return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) { struct file *filp; - filp = lookup_instantiate_filp(nd, dentry, NULL); + filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; ctx = (struct nfs_open_context *)filp->private_data; ctx->state = state; return 0; } - nfs4_close_state(state, nd->intent.open.flags); + nfs4_close_state(path, state, nd->intent.open.flags); return PTR_ERR(filp); } struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; @@ -1277,7 +1306,7 @@ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; - state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) @@ -1287,13 +1316,17 @@ res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) dentry = res; - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return res; } int nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct rpc_cred *cred; struct nfs4_state *state; @@ -1302,7 +1335,7 @@ return PTR_ERR(cred); state = nfs4_open_delegated(dentry->d_inode, openflags, cred); if (IS_ERR(state)) - state = nfs4_do_open(dir, dentry, openflags, NULL, cred); + state = nfs4_do_open(dir, &path, openflags, NULL, cred); put_rpccred(cred); if (IS_ERR(state)) { switch (PTR_ERR(state)) { @@ -1318,10 +1351,10 @@ } } if (state->inode == dentry->d_inode) { - nfs4_intent_set_file(nd, dentry, state); + nfs4_intent_set_file(nd, &path, state); return 1; } - nfs4_close_state(state, openflags); + nfs4_close_state(&path, state, openflags); out_drop: d_drop(dentry); return 0; @@ -1752,6 +1785,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) { + struct path path = { + .mnt = nd->mnt, + .dentry = dentry, + }; struct nfs4_state *state; struct rpc_cred *cred; int status = 0; @@ -1761,7 +1798,7 @@ status = PTR_ERR(cred); goto out; } - state = nfs4_do_open(dir, dentry, flags, sattr, cred); + state = nfs4_do_open(dir, &path, flags, sattr, cred); put_rpccred(cred); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -1773,11 +1810,12 @@ status = nfs4_do_setattr(state->inode, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); + nfs_post_op_update_inode(state->inode, &fattr); } - if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) - status = nfs4_intent_set_file(nd, dentry, state); + if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) + status = nfs4_intent_set_file(nd, &path, state); else - nfs4_close_state(state, flags); + nfs4_close_state(&path, state, flags); out: return status; } @@ -3285,7 +3323,7 @@ memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, sizeof(data->lsp->ls_stateid.data)); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; - renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); } nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); out: diff -Nurb linux-2.6.22-570/fs/nfs/nfs4state.c linux-2.6.22-591/fs/nfs/nfs4state.c --- linux-2.6.22-570/fs/nfs/nfs4state.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/nfs4state.c 2007-12-21 15:36:12.000000000 -0500 @@ -341,7 +341,7 @@ /* * Close the current file. */ -void nfs4_close_state(struct nfs4_state *state, mode_t mode) +void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; @@ -375,10 +375,11 @@ spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); - if (oldstate != newstate && nfs4_do_close(inode, state) == 0) - return; + if (oldstate == newstate) { nfs4_put_open_state(state); nfs4_put_state_owner(owner); + } else + nfs4_do_close(path, state); } /* diff -Nurb linux-2.6.22-570/fs/nfs/nfs4xdr.c linux-2.6.22-591/fs/nfs/nfs4xdr.c --- linux-2.6.22-570/fs/nfs/nfs4xdr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/nfs4xdr.c 2007-12-21 15:36:12.000000000 -0500 @@ -3269,7 +3269,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { __be32 *p; - uint32_t bmlen; + uint32_t savewords, bmlen, i; int status; status = decode_op_hdr(xdr, OP_OPEN); @@ -3287,7 +3287,12 @@ goto xdr_error; READ_BUF(bmlen << 2); - p += bmlen; + savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); + for (i = 0; i < savewords; ++i) + READ32(res->attrset[i]); + + p += (bmlen - savewords); + return decode_delegation(xdr, res); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); diff -Nurb linux-2.6.22-570/fs/nfs/pagelist.c linux-2.6.22-591/fs/nfs/pagelist.c --- linux-2.6.22-570/fs/nfs/pagelist.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/pagelist.c 2007-12-21 15:36:12.000000000 -0500 @@ -85,9 +85,8 @@ req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; - atomic_set(&req->wb_count, 1); req->wb_context = get_nfs_open_context(ctx); - + kref_init(&req->wb_kref); return req; } @@ -109,29 +108,29 @@ } /** - * nfs_set_page_writeback_locked - Lock a request for writeback + * nfs_set_page_tag_locked - Tag a request as locked * @req: */ -int nfs_set_page_writeback_locked(struct nfs_page *req) +static int nfs_set_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (!nfs_lock_request(req)) return 0; - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } /** - * nfs_clear_page_writeback - Unlock request and wake up sleepers + * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers */ -void nfs_clear_page_writeback(struct nfs_page *req) +void nfs_clear_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); if (req->wb_page != NULL) { spin_lock(&nfsi->req_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); spin_unlock(&nfsi->req_lock); } nfs_unlock_request(req); @@ -160,11 +159,9 @@ * * Note: Should never be called with the spinlock held! */ -void -nfs_release_request(struct nfs_page *req) +static void nfs_free_request(struct kref *kref) { - if (!atomic_dec_and_test(&req->wb_count)) - return; + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); /* Release struct file or cached credential */ nfs_clear_request(req); @@ -172,6 +169,11 @@ nfs_page_free(req); } +void nfs_release_request(struct nfs_page *req) +{ + kref_put(&req->wb_kref, nfs_free_request); +} + static int nfs_wait_bit_interruptible(void *word) { int ret = 0; @@ -193,7 +195,7 @@ int nfs_wait_on_request(struct nfs_page *req) { - struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode); sigset_t oldmask; int ret = 0; @@ -379,10 +381,10 @@ /** * nfs_scan_list - Scan a list for matching requests * @nfsi: NFS inode - * @head: One of the NFS inode request lists * @dst: Destination list * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. + * @tag: tag to scan for * * Moves elements from one of the inode request lists. * If the number of requests is set to 0, the entire address_space @@ -390,9 +392,9 @@ * The requests are *not* checked to ensure that they form a contiguous set. * You must be holding the inode's req_lock when calling this function */ -int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, +int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, - unsigned int npages) + unsigned int npages, int tag) { struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; @@ -407,9 +409,9 @@ idx_end = idx_start + npages - 1; for (;;) { - found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES); + NFS_SCAN_MAXENTRIES, tag); if (found <= 0) break; for (i = 0; i < found; i++) { @@ -417,15 +419,18 @@ if (req->wb_index > idx_end) goto out; idx_start = req->wb_index + 1; - if (req->wb_list_head != head) - continue; - if (nfs_set_page_writeback_locked(req)) { + if (nfs_set_page_tag_locked(req)) { nfs_list_remove_request(req); + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, tag); nfs_list_add_request(req, dst); res++; + if (res == INT_MAX) + goto out; } } - + /* for latency reduction */ + cond_resched_lock(&nfsi->req_lock); } out: return res; diff -Nurb linux-2.6.22-570/fs/nfs/read.c linux-2.6.22-591/fs/nfs/read.c --- linux-2.6.22-570/fs/nfs/read.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/read.c 2007-12-21 15:36:12.000000000 -0500 @@ -145,8 +145,8 @@ unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_clear_request(req); @@ -164,7 +164,7 @@ int flags; data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -483,17 +483,19 @@ */ error = nfs_wb_page(inode, page); if (error) - goto out_error; + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; error = -ESTALE; if (NFS_STALE(inode)) - goto out_error; + goto out_unlock; if (file == NULL) { error = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) - goto out_error; + goto out_unlock; } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); @@ -502,8 +504,7 @@ put_nfs_open_context(ctx); return error; - -out_error: +out_unlock: unlock_page(page); return error; } @@ -520,21 +521,32 @@ struct inode *inode = page->mapping->host; struct nfs_page *new; unsigned int len; + int error; + + error = nfs_wb_page(inode, page); + if (error) + goto out_unlock; + if (PageUptodate(page)) + goto out_unlock; - nfs_wb_page(inode, page); len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); + new = nfs_create_request(desc->ctx, inode, page, 0, len); - if (IS_ERR(new)) { - SetPageError(page); - unlock_page(page); - return PTR_ERR(new); - } + if (IS_ERR(new)) + goto out_error; + if (len < PAGE_CACHE_SIZE) zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_pageio_add_request(desc->pgio, new); return 0; +out_error: + error = PTR_ERR(new); + SetPageError(page); +out_unlock: + unlock_page(page); + return error; } int nfs_readpages(struct file *filp, struct address_space *mapping, diff -Nurb linux-2.6.22-570/fs/nfs/super.c linux-2.6.22-591/fs/nfs/super.c --- linux-2.6.22-570/fs/nfs/super.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/nfs/super.c 2007-12-21 15:36:12.000000000 -0500 @@ -292,6 +292,7 @@ { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, { NFS_MOUNT_TAGGED, ",tag", "" }, { 0, NULL, NULL } }; @@ -432,7 +433,20 @@ */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { + struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); + struct rpc_clnt *rpc; + shrink_submounts(vfsmnt, &nfs_automount_list); + + if (!(flags & MNT_FORCE)) + return; + /* -EIO all pending I/O */ + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); } /* @@ -602,13 +616,51 @@ { struct nfs_server *server = data, *old = NFS_SB(sb); - if (old->nfs_client != server->nfs_client) + if (memcmp(&old->nfs_client->cl_addr, + &server->nfs_client->cl_addr, + sizeof(old->nfs_client->cl_addr)) != 0) + return 0; + /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ + if (old->flags & NFS_MOUNT_UNSHARED) return 0; if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; return 1; } +#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) + +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +{ + const struct nfs_server *a = s->s_fs_info; + const struct rpc_clnt *clnt_a = a->client; + const struct rpc_clnt *clnt_b = b->client; + + if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + goto Ebusy; + if (a->nfs_client != b->nfs_client) + goto Ebusy; + if (a->flags != b->flags) + goto Ebusy; + if (a->wsize != b->wsize) + goto Ebusy; + if (a->rsize != b->rsize) + goto Ebusy; + if (a->acregmin != b->acregmin) + goto Ebusy; + if (a->acregmax != b->acregmax) + goto Ebusy; + if (a->acdirmin != b->acdirmin) + goto Ebusy; + if (a->acdirmax != b->acdirmax) + goto Ebusy; + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + goto Ebusy; + return 0; +Ebusy: + return -EBUSY; +} + static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { @@ -617,6 +669,7 @@ struct nfs_fh mntfh; struct nfs_mount_data *data = raw_data; struct dentry *mntroot; + int (*compare_super)(struct super_block *,void *) = nfs_compare_super; int error; /* Validate the mount data */ @@ -631,16 +684,22 @@ goto out_err_noserver; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -693,6 +752,7 @@ struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *,void *) = nfs_compare_super; int error; dprintk("--> nfs_xdev_get_sb()\n"); @@ -704,8 +764,11 @@ goto out_err_noserver; } + if (server->flags & NFS_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -810,6 +873,7 @@ struct dentry *mntroot; char *mntpath = NULL, *hostname = NULL, ip_addr[16]; void *p; + int (*compare_super)(struct super_block *,void *) = nfs_compare_super; int error; if (data == NULL) { @@ -881,16 +945,22 @@ goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; } if (s->s_fs_info != server) { + error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; + if (error < 0) + goto error_splat_super; } if (!s->s_root) { @@ -951,6 +1021,7 @@ struct super_block *s; struct nfs_server *server; struct dentry *mntroot; + int (*compare_super)(struct super_block *,void *) = nfs_compare_super; int error; dprintk("--> nfs4_xdev_get_sb()\n"); @@ -962,8 +1033,11 @@ goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1018,6 +1092,7 @@ struct nfs_server *server; struct dentry *mntroot; struct nfs_fh mntfh; + int (*compare_super)(struct super_block *,void *) = nfs_compare_super; int error; dprintk("--> nfs4_referral_get_sb()\n"); @@ -1029,8 +1104,11 @@ goto out_err_noserver; } + if (server->flags & NFS4_MOUNT_UNSHARED) + compare_super = NULL; + /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; diff -Nurb linux-2.6.22-570/fs/nfs/write.c linux-2.6.22-591/fs/nfs/write.c --- linux-2.6.22-570/fs/nfs/write.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfs/write.c 2007-12-21 15:36:12.000000000 -0500 @@ -117,7 +117,7 @@ if (PagePrivate(page)) { req = (struct nfs_page *)page_private(page); if (req != NULL) - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); } return req; } @@ -191,8 +191,6 @@ } /* Update file length */ nfs_grow_file(page, offset, count); - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); return 0; } @@ -291,7 +289,7 @@ BUG(); } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_WRITEBACK); + NFS_PAGE_TAG_LOCKED); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); spin_unlock(req_lock); nfs_pageio_add_request(pgio, req); @@ -400,7 +398,7 @@ if (PageDirty(req->wb_page)) set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 0; } @@ -409,7 +407,7 @@ */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); @@ -457,13 +455,15 @@ static void nfs_mark_request_commit(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); - nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; set_bit(PG_NEED_COMMIT, &(req)->wb_flags); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); spin_unlock(&nfsi->req_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); @@ -526,14 +526,14 @@ idx_end = idx_start + npages - 1; next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; BUG_ON(!NFS_WBACK_BUSY(req)); - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); @@ -577,10 +577,9 @@ int res = 0; if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); + res = nfs_scan_list(nfsi, dst, idx_start, npages, + NFS_PAGE_TAG_COMMIT); nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); } return res; } @@ -751,12 +750,17 @@ static void nfs_writepage_release(struct nfs_page *req) { - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + if (PageError(req->wb_page)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else if (!nfs_reschedule_unstable_write(req)) { + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); } else nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } static inline int flush_task_priority(int how) @@ -786,7 +790,7 @@ * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; + data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); @@ -885,7 +889,7 @@ } nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -928,7 +932,7 @@ nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -954,8 +958,8 @@ struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1020,8 +1024,8 @@ page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); @@ -1039,12 +1043,14 @@ dprintk(" marked for commit\n"); goto next; } + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } @@ -1157,7 +1163,7 @@ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - inode = first->wb_context->dentry->d_inode; + inode = first->wb_context->path.dentry->d_inode; data->inode = inode; data->cred = first->wb_context->cred; @@ -1207,7 +1213,7 @@ nfs_list_remove_request(req); nfs_mark_request_commit(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -1234,8 +1240,8 @@ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { @@ -1249,6 +1255,9 @@ * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ + /* Set the PG_uptodate flag */ + nfs_mark_uptodate(req->wb_page, req->wb_pgbase, + req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; @@ -1257,7 +1266,7 @@ dprintk(" mismatch\n"); nfs_redirty_request(req); next: - nfs_clear_page_writeback(req); + nfs_clear_page_tag_locked(req); } } diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4callback.c linux-2.6.22-591/fs/nfsd/nfs4callback.c --- linux-2.6.22-570/fs/nfsd/nfs4callback.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfsd/nfs4callback.c 2007-12-21 15:36:12.000000000 -0500 @@ -429,29 +429,23 @@ goto out_err; } - /* Kick rpciod, put the call on the wire. */ - if (rpciod_up() != 0) - goto out_clnt; - /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) - goto out_rpciod; + goto out_release_clp; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); - goto out_rpciod; + goto out_release_clp; } return; -out_rpciod: +out_release_clp: atomic_dec(&clp->cl_count); - rpciod_down(); -out_clnt: rpc_shutdown_client(cb->cb_client); out_err: cb->cb_client = NULL; diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4state.c linux-2.6.22-591/fs/nfsd/nfs4state.c --- linux-2.6.22-570/fs/nfsd/nfs4state.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfsd/nfs4state.c 2007-12-21 15:36:12.000000000 -0500 @@ -378,7 +378,6 @@ if (clnt) { clp->cl_callback.cb_client = NULL; rpc_shutdown_client(clnt); - rpciod_down(); } } diff -Nurb linux-2.6.22-570/fs/nfsd/nfssvc.c linux-2.6.22-591/fs/nfsd/nfssvc.c --- linux-2.6.22-570/fs/nfsd/nfssvc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/nfsd/nfssvc.c 2007-12-21 15:36:12.000000000 -0500 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -432,6 +433,7 @@ * dirty pages. */ current->flags |= PF_LESS_THROTTLE; + set_freezable(); /* * The main request loop diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.c linux-2.6.22-591/fs/ocfs2/alloc.c --- linux-2.6.22-570/fs/ocfs2/alloc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/alloc.c 2007-12-21 15:36:12.000000000 -0500 @@ -50,6 +50,8 @@ #include "buffer_head_io.h" static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); +static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, + struct ocfs2_extent_block *eb); /* * Structures which describe a path through a btree, and functions to @@ -117,6 +119,31 @@ } /* + * All the elements of src into dest. After this call, src could be freed + * without affecting dest. + * + * Both paths should have the same root. Any non-root elements of dest + * will be freed. + */ +static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) +{ + int i; + + BUG_ON(path_root_bh(dest) != path_root_bh(src)); + BUG_ON(path_root_el(dest) != path_root_el(src)); + + ocfs2_reinit_path(dest, 1); + + for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { + dest->p_node[i].bh = src->p_node[i].bh; + dest->p_node[i].el = src->p_node[i].el; + + if (dest->p_node[i].bh) + get_bh(dest->p_node[i].bh); + } +} + +/* * Make the *dest path the same as src and re-initialize src path to * have a root only. */ @@ -212,10 +239,41 @@ return ret; } +/* + * Return the index of the extent record which contains cluster #v_cluster. + * -1 is returned if it was not found. + * + * Should work fine on interior and exterior nodes. + */ +int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) +{ + int ret = -1; + int i; + struct ocfs2_extent_rec *rec; + u32 rec_end, rec_start, clusters; + + for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { + rec = &el->l_recs[i]; + + rec_start = le32_to_cpu(rec->e_cpos); + clusters = ocfs2_rec_clusters(el, rec); + + rec_end = rec_start + clusters; + + if (v_cluster >= rec_start && v_cluster < rec_end) { + ret = i; + break; + } + } + + return ret; +} + enum ocfs2_contig_type { CONTIG_NONE = 0, CONTIG_LEFT, - CONTIG_RIGHT + CONTIG_RIGHT, + CONTIG_LEFTRIGHT, }; @@ -253,6 +311,14 @@ { u64 blkno = le64_to_cpu(insert_rec->e_blkno); + /* + * Refuse to coalesce extent records with different flag + * fields - we don't want to mix unwritten extents with user + * data. + */ + if (ext->e_flags != insert_rec->e_flags) + return CONTIG_NONE; + if (ocfs2_extents_adjacent(ext, insert_rec) && ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) return CONTIG_RIGHT; @@ -277,7 +343,14 @@ APPEND_TAIL, }; +enum ocfs2_split_type { + SPLIT_NONE = 0, + SPLIT_LEFT, + SPLIT_RIGHT, +}; + struct ocfs2_insert_type { + enum ocfs2_split_type ins_split; enum ocfs2_append_type ins_appending; enum ocfs2_contig_type ins_contig; int ins_contig_index; @@ -285,6 +358,13 @@ int ins_tree_depth; }; +struct ocfs2_merge_ctxt { + enum ocfs2_contig_type c_contig_type; + int c_has_empty_extent; + int c_split_covers_rec; + int c_used_tail_recs; +}; + /* * How many free extents have we got before we need more meta data? */ @@ -384,13 +464,7 @@ strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); eb->h_blkno = cpu_to_le64(first_blkno); eb->h_fs_generation = cpu_to_le32(osb->fs_generation); - -#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS - /* we always use slot zero's suballocator */ - eb->h_suballoc_slot = 0; -#else eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); -#endif eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); eb->h_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); @@ -461,7 +535,7 @@ struct inode *inode, struct buffer_head *fe_bh, struct buffer_head *eb_bh, - struct buffer_head *last_eb_bh, + struct buffer_head **last_eb_bh, struct ocfs2_alloc_context *meta_ac) { int status, new_blocks, i; @@ -476,7 +550,7 @@ mlog_entry_void(); - BUG_ON(!last_eb_bh); + BUG_ON(!last_eb_bh || !*last_eb_bh); fe = (struct ocfs2_dinode *) fe_bh->b_data; @@ -507,7 +581,7 @@ goto bail; } - eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; + eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be @@ -568,7 +642,7 @@ * journal_dirty erroring as it won't unless we've aborted the * handle (in which case we would never be here) so reserving * the write with journal_access is all we need to do. */ - status = ocfs2_journal_access(handle, inode, last_eb_bh, + status = ocfs2_journal_access(handle, inode, *last_eb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); @@ -601,10 +675,10 @@ * next_leaf on the previously last-extent-block. */ fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); - eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; + eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); - status = ocfs2_journal_dirty(handle, last_eb_bh); + status = ocfs2_journal_dirty(handle, *last_eb_bh); if (status < 0) mlog_errno(status); status = ocfs2_journal_dirty(handle, fe_bh); @@ -616,6 +690,14 @@ mlog_errno(status); } + /* + * Some callers want to track the rightmost leaf so pass it + * back here. + */ + brelse(*last_eb_bh); + get_bh(new_eb_bhs[0]); + *last_eb_bh = new_eb_bhs[0]; + status = 0; bail: if (new_eb_bhs) { @@ -829,6 +911,87 @@ } /* + * Grow a b-tree so that it has more records. + * + * We might shift the tree depth in which case existing paths should + * be considered invalid. + * + * Tree depth after the grow is returned via *final_depth. + * + * *last_eb_bh will be updated by ocfs2_add_branch(). + */ +static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, + struct buffer_head *di_bh, int *final_depth, + struct buffer_head **last_eb_bh, + struct ocfs2_alloc_context *meta_ac) +{ + int ret, shift; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *bh = NULL; + + BUG_ON(meta_ac == NULL); + + shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); + if (shift < 0) { + ret = shift; + mlog_errno(ret); + goto out; + } + + /* We traveled all the way to the bottom of the allocation tree + * and didn't find room for any more extents - we need to add + * another tree level */ + if (shift) { + BUG_ON(bh); + mlog(0, "need to shift tree depth (current = %d)\n", depth); + + /* ocfs2_shift_tree_depth will return us a buffer with + * the new extent block (so we can pass that to + * ocfs2_add_branch). */ + ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, + meta_ac, &bh); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + depth++; + if (depth == 1) { + /* + * Special case: we have room now if we shifted from + * tree_depth 0, so no more work needs to be done. + * + * We won't be calling add_branch, so pass + * back *last_eb_bh as the new leaf. At depth + * zero, it should always be null so there's + * no reason to brelse. + */ + BUG_ON(*last_eb_bh); + get_bh(bh); + *last_eb_bh = bh; + goto out; + } + } + + /* call ocfs2_add_branch to add the final part of the tree with + * the new data. */ + mlog(0, "add branch. bh = %p\n", bh); + ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, + meta_ac); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + +out: + if (final_depth) + *final_depth = depth; + brelse(bh); + return ret; +} + +/* * This is only valid for leaf nodes, which are the only ones that can * have empty extents anyway. */ @@ -934,6 +1097,22 @@ } +static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el) +{ + int size, num_recs = le16_to_cpu(el->l_next_free_rec); + + BUG_ON(num_recs == 0); + + if (ocfs2_is_empty_extent(&el->l_recs[0])) { + num_recs--; + size = num_recs * sizeof(struct ocfs2_extent_rec); + memmove(&el->l_recs[0], &el->l_recs[1], size); + memset(&el->l_recs[num_recs], 0, + sizeof(struct ocfs2_extent_rec)); + el->l_next_free_rec = cpu_to_le16(num_recs); + } +} + /* * Create an empty extent record . * @@ -1211,6 +1390,10 @@ * immediately to their right. */ left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); + if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { + BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); + left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); + } left_clusters -= le32_to_cpu(left_rec->e_cpos); left_rec->e_int_clusters = cpu_to_le32(left_clusters); @@ -1531,10 +1714,16 @@ return ret; } +/* + * Extend the transaction by enough credits to complete the rotation, + * and still leave at least the original number of credits allocated + * to this transaction. + */ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, + int op_credits, struct ocfs2_path *path) { - int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; + int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; if (handle->h_buffer_credits < credits) return ocfs2_extend_trans(handle, credits); @@ -1568,6 +1757,29 @@ return 0; } +static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) +{ + int next_free = le16_to_cpu(el->l_next_free_rec); + unsigned int range; + struct ocfs2_extent_rec *rec; + + if (next_free == 0) + return 0; + + rec = &el->l_recs[0]; + if (ocfs2_is_empty_extent(rec)) { + /* Empty list. */ + if (next_free == 1) + return 0; + rec = &el->l_recs[1]; + } + + range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); + if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) + return 1; + return 0; +} + /* * Rotate all the records in a btree right one record, starting at insert_cpos. * @@ -1586,11 +1798,12 @@ */ static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle, + enum ocfs2_split_type split, u32 insert_cpos, struct ocfs2_path *right_path, struct ocfs2_path **ret_left_path) { - int ret, start; + int ret, start, orig_credits = handle->h_buffer_credits; u32 cpos; struct ocfs2_path *left_path = NULL; @@ -1657,9 +1870,9 @@ (unsigned long long) path_leaf_bh(left_path)->b_blocknr); - if (ocfs2_rotate_requires_path_adjustment(left_path, + if (split == SPLIT_NONE && + ocfs2_rotate_requires_path_adjustment(left_path, insert_cpos)) { - mlog(0, "Path adjustment required\n"); /* * We've rotated the tree as much as we @@ -1687,7 +1900,7 @@ right_path->p_tree_depth); ret = ocfs2_extend_rotate_transaction(handle, start, - right_path); + orig_credits, right_path); if (ret) { mlog_errno(ret); goto out; @@ -1700,6 +1913,24 @@ goto out; } + if (split != SPLIT_NONE && + ocfs2_leftmost_rec_contains(path_leaf_el(right_path), + insert_cpos)) { + /* + * A rotate moves the rightmost left leaf + * record over to the leftmost right leaf + * slot. If we're doing an extent split + * instead of a real insert, then we have to + * check that the extent to be split wasn't + * just moved over. If it was, then we can + * exit here, passing left_path back - + * ocfs2_split_extent() is smart enough to + * search both leaves. + */ + *ret_left_path = left_path; + goto out_ret_path; + } + /* * There is no need to re-read the next right path * as we know that it'll be our current left @@ -1722,124 +1953,935 @@ return ret; } -/* - * Do the final bits of extent record insertion at the target leaf - * list. If this leaf is part of an allocation tree, it is assumed - * that the tree above has been prepared. - */ -static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, - struct ocfs2_extent_list *el, - struct ocfs2_insert_type *insert, - struct inode *inode) +static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, + struct ocfs2_path *path) { - int i = insert->ins_contig_index; - unsigned int range; + int i, idx; struct ocfs2_extent_rec *rec; + struct ocfs2_extent_list *el; + struct ocfs2_extent_block *eb; + u32 range; - BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); + /* Path should always be rightmost. */ + eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; + BUG_ON(eb->h_next_leaf_blk != 0ULL); - /* - * Contiguous insert - either left or right. - */ - if (insert->ins_contig != CONTIG_NONE) { - rec = &el->l_recs[i]; - if (insert->ins_contig == CONTIG_LEFT) { - rec->e_blkno = insert_rec->e_blkno; - rec->e_cpos = insert_rec->e_cpos; - } - le16_add_cpu(&rec->e_leaf_clusters, - le16_to_cpu(insert_rec->e_leaf_clusters)); - return; - } + el = &eb->h_list; + BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); + idx = le16_to_cpu(el->l_next_free_rec) - 1; + rec = &el->l_recs[idx]; + range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); - /* - * Handle insert into an empty leaf. - */ - if (le16_to_cpu(el->l_next_free_rec) == 0 || - ((le16_to_cpu(el->l_next_free_rec) == 1) && - ocfs2_is_empty_extent(&el->l_recs[0]))) { - el->l_recs[0] = *insert_rec; - el->l_next_free_rec = cpu_to_le16(1); - return; - } + for (i = 0; i < path->p_tree_depth; i++) { + el = path->p_node[i].el; + idx = le16_to_cpu(el->l_next_free_rec) - 1; + rec = &el->l_recs[idx]; - /* - * Appending insert. - */ - if (insert->ins_appending == APPEND_TAIL) { - i = le16_to_cpu(el->l_next_free_rec) - 1; - rec = &el->l_recs[i]; - range = le32_to_cpu(rec->e_cpos) - + le16_to_cpu(rec->e_leaf_clusters); - BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); + rec->e_int_clusters = cpu_to_le32(range); + le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); - mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= - le16_to_cpu(el->l_count), - "inode %lu, depth %u, count %u, next free %u, " - "rec.cpos %u, rec.clusters %u, " - "insert.cpos %u, insert.clusters %u\n", - inode->i_ino, - le16_to_cpu(el->l_tree_depth), - le16_to_cpu(el->l_count), - le16_to_cpu(el->l_next_free_rec), - le32_to_cpu(el->l_recs[i].e_cpos), - le16_to_cpu(el->l_recs[i].e_leaf_clusters), - le32_to_cpu(insert_rec->e_cpos), - le16_to_cpu(insert_rec->e_leaf_clusters)); - i++; - el->l_recs[i] = *insert_rec; - le16_add_cpu(&el->l_next_free_rec, 1); - return; + ocfs2_journal_dirty(handle, path->p_node[i].bh); } - - /* - * Ok, we have to rotate. - * - * At this point, it is safe to assume that inserting into an - * empty leaf and appending to a leaf have both been handled - * above. - * - * This leaf needs to have space, either by the empty 1st - * extent record, or by virtue of an l_next_rec < l_count. - */ - ocfs2_rotate_leaf(el, insert_rec); -} - -static inline void ocfs2_update_dinode_clusters(struct inode *inode, - struct ocfs2_dinode *di, - u32 clusters) -{ - le32_add_cpu(&di->i_clusters, clusters); - spin_lock(&OCFS2_I(inode)->ip_lock); - OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); - spin_unlock(&OCFS2_I(inode)->ip_lock); } -static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, - struct ocfs2_extent_rec *insert_rec, +static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, + struct ocfs2_path *left_path, struct ocfs2_path *right_path, - struct ocfs2_path **ret_left_path) + int subtree_index, + struct ocfs2_cached_dealloc_ctxt *dealloc) { - int ret, i, next_free; - struct buffer_head *bh; + int ret, i; + struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; + struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; struct ocfs2_extent_list *el; - struct ocfs2_path *left_path = NULL; + struct ocfs2_extent_block *eb; + struct buffer_head *bh; - *ret_left_path = NULL; + el = path_leaf_el(left_path); - /* - * This shouldn't happen for non-trees. The extent rec cluster - * count manipulation below only works for interior nodes. - */ - BUG_ON(right_path->p_tree_depth == 0); + eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; - /* - * If our appending insert is at the leftmost edge of a leaf, - * then we might need to update the rightmost records of the - * neighboring path. - */ - el = path_leaf_el(right_path); - next_free = le16_to_cpu(el->l_next_free_rec); + for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) + if (root_el->l_recs[i].e_blkno == eb->h_blkno) + break; + + BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec)); + + memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); + le16_add_cpu(&root_el->l_next_free_rec, -1); + + eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; + eb->h_next_leaf_blk = 0; + + ocfs2_journal_dirty(handle, root_bh); + ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + + for(i = subtree_index + 1; i < path_num_items(right_path); i++) { + bh = right_path->p_node[i].bh; + + eb = (struct ocfs2_extent_block *)bh->b_data; + /* + * Not all nodes might have had their final count + * decremented by the caller - handle this here. + */ + el = &eb->h_list; + if (le16_to_cpu(el->l_next_free_rec) > 1) { + mlog(ML_ERROR, + "Inode %llu, attempted to remove extent block " + "%llu with %u records\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(el->l_next_free_rec)); + + ocfs2_journal_dirty(handle, bh); + ocfs2_remove_from_cache(inode, bh); + continue; + } + + el->l_next_free_rec = 0; + memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); + + ocfs2_journal_dirty(handle, bh); + + ret = ocfs2_cache_extent_block_free(dealloc, eb); + if (ret) + mlog_errno(ret); + + ocfs2_remove_from_cache(inode, bh); + } +} + +static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, + struct ocfs2_path *left_path, + struct ocfs2_path *right_path, + int subtree_index, + struct ocfs2_cached_dealloc_ctxt *dealloc, + int *deleted) +{ + int ret, i, del_right_subtree = 0; + struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; + struct ocfs2_extent_block *eb; + + *deleted = 0; + + right_leaf_el = path_leaf_el(right_path); + left_leaf_el = path_leaf_el(left_path); + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0])) + return 0; + + if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) + return -EAGAIN; + + eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data; + if (eb->h_next_leaf_blk == 0ULL && + le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) { + /* + * We have to update i_last_eb_blk during the meta + * data delete. + */ + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + del_right_subtree = 1; + } + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for(i = subtree_index + 1; i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]); + memset(&right_leaf_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); + if (eb->h_next_leaf_blk == 0ULL) { + /* + * XXX: move recs over to get rid of empty extent, + * decrease next_free. how does this play with the + * delete code below? + */ + ocfs2_remove_empty_extent(right_leaf_el); + } + + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + if (ret) + mlog_errno(ret); + ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); + if (ret) + mlog_errno(ret); + + if (del_right_subtree) { + ocfs2_unlink_subtree(inode, handle, left_path, right_path, + subtree_index, dealloc); + ocfs2_update_edge_lengths(inode, handle, left_path); + + eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; + di->i_last_eb_blk = eb->h_blkno; + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret) + mlog_errno(ret); + + *deleted = 1; + } else + ocfs2_complete_edge_insert(inode, handle, left_path, right_path, + subtree_index); + +out: + return ret; +} + +/* + * Given a full path, determine what cpos value would return us a path + * containing the leaf immediately to the right of the current one. + * + * Will return zero if the path passed in is already the rightmost path. + * + * This looks similar, but is subtly different to + * ocfs2_find_cpos_for_left_leaf(). + */ +static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, + struct ocfs2_path *path, u32 *cpos) +{ + int i, j, ret = 0; + u64 blkno; + struct ocfs2_extent_list *el; + + *cpos = 0; + + if (path->p_tree_depth == 0) + return 0; + + blkno = path_leaf_bh(path)->b_blocknr; + + /* Start at the tree node just above the leaf and work our way up. */ + i = path->p_tree_depth - 1; + while (i >= 0) { + int next_free; + + el = path->p_node[i].el; + + /* + * Find the extent record just after the one in our + * path. + */ + next_free = le16_to_cpu(el->l_next_free_rec); + for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { + if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { + if (j == (next_free - 1)) { + if (i == 0) { + /* + * We've determined that the + * path specified is already + * the rightmost one - return a + * cpos of zero. + */ + goto out; + } + /* + * The rightmost record points to our + * leaf - we need to travel up the + * tree one level. + */ + goto next_node; + } + + *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos); + goto out; + } + } + + /* + * If we got here, we never found a valid node where + * the tree indicated one should be. + */ + ocfs2_error(sb, + "Invalid extent tree at extent block %llu\n", + (unsigned long long)blkno); + ret = -EROFS; + goto out; + +next_node: + blkno = path->p_node[i].bh->b_blocknr; + i--; + } + +out: + return ret; +} + +static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, + handle_t *handle, + struct buffer_head *bh, + struct ocfs2_extent_list *el, + int *rotated_any) +{ + int ret; + + if (rotated_any) + *rotated_any = 0; + + if (!ocfs2_is_empty_extent(&el->l_recs[0])) + return 0; + + if (le16_to_cpu(el->l_next_free_rec) == 1) + return -EAGAIN; + + ret = ocfs2_journal_access(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ocfs2_remove_empty_extent(el); + + ret = ocfs2_journal_dirty(handle, bh); + if (ret) + mlog_errno(ret); + + if (rotated_any) + *rotated_any = 1; +out: + return ret; +} + +/* + * Left rotation of btree records. + * + * In many ways, this is (unsurprisingly) the opposite of right + * rotation. We start at some non-rightmost path containing an empty + * extent in the leaf block. The code works its way to the rightmost + * path by rotating records to the left in every subtree. + * + * There are a few places where we might want to do this: + * - merging extent records + * - rightleft contiguousness during insert + * - merging two previously unwritten extents + * - truncate + * - via ocfs2_truncate, if we ever fix it up to use this code + * - via ioctl at the request of user (reverse fallocate) + * - "compressing" a tree with empty extents + * - as a result of a user defrag request + * - perhaps as a preventative measure if we notice a tree needs + * this during any of the above operations. + * + * The major difference between those states above are the ability to + * lock one of the meta data allocators so that we can remove unused + * extent blocks. It might be unrealistic for us to assume that any + * merging cases will want to lock the meta data allocator. Luckily, + * the merges are an optimization. + * + * So, merging won't happen if it would result in an empty rightmost + * path (this is illegal). + * + * This function will move extents left until it runs out of leaves to + * rotate, or it hits a right leaf that already contains an empty + * extent, in which case it will exit early. This means that we might + * never rotate anything if the 1st right leaf contains an empty + * extent. + * + * Truncate cases will have to happen as a second step. I'm not + * completely sure how we want to handle those yet. + */ +static int ocfs2_rotate_tree_left(struct inode *inode, + handle_t *handle, + struct ocfs2_path *path, + struct ocfs2_cached_dealloc_ctxt *dealloc, + int *rotated_any) +{ + int ret, subtree_root, deleted, orig_credits = handle->h_buffer_credits; + u32 right_cpos; + struct ocfs2_path *left_path = NULL; + struct ocfs2_path *right_path = NULL; + + BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); + + if (rotated_any) + *rotated_any = 0; + + ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, + &right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (path->p_tree_depth == 0 || right_cpos == 0) { + /* + * Two cases where rotation of adjacent leaves isn't + * necessary: + * - in-inode extents (no btree) + * - path passed is already rightmost + */ + ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, + path_leaf_bh(path), + path_leaf_el(path), + rotated_any); + if (ret) + mlog_errno(ret); + goto out; + } + + left_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ocfs2_cp_path(left_path, path); + + right_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!right_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + while (right_cpos) { + ret = ocfs2_find_path(inode, right_path, right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + subtree_root = ocfs2_find_subtree_root(inode, left_path, + right_path); + + mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", + subtree_root, + (unsigned long long) + right_path->p_node[subtree_root].bh->b_blocknr, + right_path->p_tree_depth); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_root, + orig_credits, left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_rotate_subtree_left(inode, handle, left_path, + right_path, subtree_root, + dealloc, &deleted); + if (ret) { + if (ret != -EAGAIN) + mlog_errno(ret); + goto out; + } + + if (rotated_any) + *rotated_any = 1; + + /* + * The subtree rotate might have removed records on + * the rightmost edge. If so, then rotation is + * complete. + */ + if (deleted) + break; + + ocfs2_mv_path(left_path, right_path); + + ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, + &right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + } + +out: + ocfs2_free_path(right_path); + ocfs2_free_path(left_path); + + return ret; +} + +static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, + int index) +{ + struct ocfs2_extent_rec *rec = &el->l_recs[index]; + unsigned int size; + + if (rec->e_leaf_clusters == 0) { + /* + * We consumed all of the merged-from record. An empty + * extent cannot exist anywhere but the 1st array + * position, so move things over if the merged-from + * record doesn't occupy that position. + * + * This creates a new empty extent so the caller + * should be smart enough to have removed any existing + * ones. + */ + if (index > 0) { + BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); + size = index * sizeof(struct ocfs2_extent_rec); + memmove(&el->l_recs[1], &el->l_recs[0], size); + } + + /* + * Always memset - the caller doesn't check whether it + * created an empty extent, so there could be junk in + * the other fields. + */ + memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); + } +} + +/* + * Remove split_rec clusters from the record at index and merge them + * onto the beginning of the record at index + 1. + */ +static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, + handle_t *handle, + struct ocfs2_extent_rec *split_rec, + struct ocfs2_extent_list *el, int index) +{ + int ret; + unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); + struct ocfs2_extent_rec *left_rec; + struct ocfs2_extent_rec *right_rec; + + BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); + + left_rec = &el->l_recs[index]; + right_rec = &el->l_recs[index + 1]; + + ret = ocfs2_journal_access(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters); + + le32_add_cpu(&right_rec->e_cpos, -split_clusters); + le64_add_cpu(&right_rec->e_blkno, + -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); + le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); + + ocfs2_cleanup_merge(el, index); + + ret = ocfs2_journal_dirty(handle, bh); + if (ret) + mlog_errno(ret); + +out: + return ret; +} + +/* + * Remove split_rec clusters from the record at index and merge them + * onto the tail of the record at index - 1. + */ +static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, + handle_t *handle, + struct ocfs2_extent_rec *split_rec, + struct ocfs2_extent_list *el, int index) +{ + int ret, has_empty_extent = 0; + unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); + struct ocfs2_extent_rec *left_rec; + struct ocfs2_extent_rec *right_rec; + + BUG_ON(index <= 0); + + left_rec = &el->l_recs[index - 1]; + right_rec = &el->l_recs[index]; + if (ocfs2_is_empty_extent(&el->l_recs[0])) + has_empty_extent = 1; + + ret = ocfs2_journal_access(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (has_empty_extent && index == 1) { + /* + * The easy case - we can just plop the record right in. + */ + *left_rec = *split_rec; + + has_empty_extent = 0; + } else { + le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); + } + + le32_add_cpu(&right_rec->e_cpos, split_clusters); + le64_add_cpu(&right_rec->e_blkno, + ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); + le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); + + ocfs2_cleanup_merge(el, index); + + ret = ocfs2_journal_dirty(handle, bh); + if (ret) + mlog_errno(ret); + +out: + return ret; +} + +static int ocfs2_try_to_merge_extent(struct inode *inode, + handle_t *handle, + struct ocfs2_path *left_path, + int split_index, + struct ocfs2_extent_rec *split_rec, + struct ocfs2_cached_dealloc_ctxt *dealloc, + struct ocfs2_merge_ctxt *ctxt) + +{ + int ret = 0, rotated, delete_tail_recs = 0; + struct ocfs2_extent_list *el = path_leaf_el(left_path); + struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; + + BUG_ON(ctxt->c_contig_type == CONTIG_NONE); + + if (ctxt->c_split_covers_rec) { + delete_tail_recs++; + + if (ctxt->c_contig_type == CONTIG_LEFTRIGHT || + ctxt->c_has_empty_extent) + delete_tail_recs++; + + if (ctxt->c_has_empty_extent) { + /* + * The merge code will need to create an empty + * extent to take the place of the newly + * emptied slot. Remove any pre-existing empty + * extents - having more than one in a leaf is + * illegal. + */ + ret = ocfs2_rotate_tree_left(inode, handle, left_path, + dealloc, &rotated); + if (rotated) { + split_index--; + rec = &el->l_recs[split_index]; + } + if (ret) { + if (ret == -EAGAIN) { + ret = 0; + goto straight_insert; + } + + mlog_errno(ret); + goto out; + } + } + } + + if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { + /* + * Left-right contig implies this. + */ + BUG_ON(!ctxt->c_split_covers_rec); + BUG_ON(split_index == 0); + + /* + * Since the leftright insert always covers the entire + * extent, this call will delete the insert record + * entirely, resulting in an empty extent record added to + * the extent block. + * + * Since the adding of an empty extent shifts + * everything back to the right, there's no need to + * update split_index here. + */ + ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), + handle, split_rec, el, split_index); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * We can only get this from logic error above. + */ + BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); + + /* + * The left merge left us with an empty extent, remove + * it. + */ + ret = ocfs2_rotate_tree_left(inode, handle, left_path, + dealloc, &rotated); + if (rotated) { + split_index--; + rec = &el->l_recs[split_index]; + } + if (ret) { + if (ret == -EAGAIN) { + ret = 0; + goto straight_insert; + } + + mlog_errno(ret); + goto out; + } + + /* + * Note that we don't pass split_rec here on purpose - + * we've merged it into the left side. + */ + ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), + handle, rec, el, split_index); + if (ret) { + mlog_errno(ret); + goto out; + } + + BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); + + ret = ocfs2_rotate_tree_left(inode, handle, left_path, + dealloc, NULL); + /* + * Error from this last rotate is not critical, so + * print but don't bubble it up. + */ + if (ret && ret != -EAGAIN) + mlog_errno(ret); + ret = 0; + } else { + /* + * Merge a record to the left or right. + * + * 'contig_type' is relative to the existing record, + * so for example, if we're "right contig", it's to + * the record on the left (hence the left merge). + */ + if (ctxt->c_contig_type == CONTIG_RIGHT) { + ret = ocfs2_merge_rec_left(inode, + path_leaf_bh(left_path), + handle, split_rec, el, + split_index); + if (ret) { + mlog_errno(ret); + goto out; + } + } else { + ret = ocfs2_merge_rec_right(inode, + path_leaf_bh(left_path), + handle, split_rec, el, + split_index); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + if (ctxt->c_split_covers_rec) { + /* + * The merge may have left an empty extent in + * our leaf. Try to rotate it away. + */ + ret = ocfs2_rotate_tree_left(inode, handle, left_path, + dealloc, &rotated); + if (ret) + mlog_errno(ret); + ret = 0; + } + } + +out: + return ret; + +straight_insert: + el->l_recs[split_index] = *split_rec; + goto out; +} + +static void ocfs2_subtract_from_rec(struct super_block *sb, + enum ocfs2_split_type split, + struct ocfs2_extent_rec *rec, + struct ocfs2_extent_rec *split_rec) +{ + u64 len_blocks; + + len_blocks = ocfs2_clusters_to_blocks(sb, + le16_to_cpu(split_rec->e_leaf_clusters)); + + if (split == SPLIT_LEFT) { + /* + * Region is on the left edge of the existing + * record. + */ + le32_add_cpu(&rec->e_cpos, + le16_to_cpu(split_rec->e_leaf_clusters)); + le64_add_cpu(&rec->e_blkno, len_blocks); + le16_add_cpu(&rec->e_leaf_clusters, + -le16_to_cpu(split_rec->e_leaf_clusters)); + } else { + /* + * Region is on the right edge of the existing + * record. + */ + le16_add_cpu(&rec->e_leaf_clusters, + -le16_to_cpu(split_rec->e_leaf_clusters)); + } +} + +/* + * Do the final bits of extent record insertion at the target leaf + * list. If this leaf is part of an allocation tree, it is assumed + * that the tree above has been prepared. + */ +static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, + struct ocfs2_extent_list *el, + struct ocfs2_insert_type *insert, + struct inode *inode) +{ + int i = insert->ins_contig_index; + unsigned int range; + struct ocfs2_extent_rec *rec; + + BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); + + if (insert->ins_split != SPLIT_NONE) { + i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); + BUG_ON(i == -1); + rec = &el->l_recs[i]; + ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, + insert_rec); + goto rotate; + } + + /* + * Contiguous insert - either left or right. + */ + if (insert->ins_contig != CONTIG_NONE) { + rec = &el->l_recs[i]; + if (insert->ins_contig == CONTIG_LEFT) { + rec->e_blkno = insert_rec->e_blkno; + rec->e_cpos = insert_rec->e_cpos; + } + le16_add_cpu(&rec->e_leaf_clusters, + le16_to_cpu(insert_rec->e_leaf_clusters)); + return; + } + + /* + * Handle insert into an empty leaf. + */ + if (le16_to_cpu(el->l_next_free_rec) == 0 || + ((le16_to_cpu(el->l_next_free_rec) == 1) && + ocfs2_is_empty_extent(&el->l_recs[0]))) { + el->l_recs[0] = *insert_rec; + el->l_next_free_rec = cpu_to_le16(1); + return; + } + + /* + * Appending insert. + */ + if (insert->ins_appending == APPEND_TAIL) { + i = le16_to_cpu(el->l_next_free_rec) - 1; + rec = &el->l_recs[i]; + range = le32_to_cpu(rec->e_cpos) + + le16_to_cpu(rec->e_leaf_clusters); + BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); + + mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= + le16_to_cpu(el->l_count), + "inode %lu, depth %u, count %u, next free %u, " + "rec.cpos %u, rec.clusters %u, " + "insert.cpos %u, insert.clusters %u\n", + inode->i_ino, + le16_to_cpu(el->l_tree_depth), + le16_to_cpu(el->l_count), + le16_to_cpu(el->l_next_free_rec), + le32_to_cpu(el->l_recs[i].e_cpos), + le16_to_cpu(el->l_recs[i].e_leaf_clusters), + le32_to_cpu(insert_rec->e_cpos), + le16_to_cpu(insert_rec->e_leaf_clusters)); + i++; + el->l_recs[i] = *insert_rec; + le16_add_cpu(&el->l_next_free_rec, 1); + return; + } + +rotate: + /* + * Ok, we have to rotate. + * + * At this point, it is safe to assume that inserting into an + * empty leaf and appending to a leaf have both been handled + * above. + * + * This leaf needs to have space, either by the empty 1st + * extent record, or by virtue of an l_next_rec < l_count. + */ + ocfs2_rotate_leaf(el, insert_rec); +} + +static inline void ocfs2_update_dinode_clusters(struct inode *inode, + struct ocfs2_dinode *di, + u32 clusters) +{ + le32_add_cpu(&di->i_clusters, clusters); + spin_lock(&OCFS2_I(inode)->ip_lock); + OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); + spin_unlock(&OCFS2_I(inode)->ip_lock); +} + +static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, + struct ocfs2_extent_rec *insert_rec, + struct ocfs2_path *right_path, + struct ocfs2_path **ret_left_path) +{ + int ret, i, next_free; + struct buffer_head *bh; + struct ocfs2_extent_list *el; + struct ocfs2_path *left_path = NULL; + + *ret_left_path = NULL; + + /* + * This shouldn't happen for non-trees. The extent rec cluster + * count manipulation below only works for interior nodes. + */ + BUG_ON(right_path->p_tree_depth == 0); + + /* + * If our appending insert is at the leftmost edge of a leaf, + * then we might need to update the rightmost records of the + * neighboring path. + */ + el = path_leaf_el(right_path); + next_free = le16_to_cpu(el->l_next_free_rec); if (next_free == 0 || (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { u32 left_cpos; @@ -1931,6 +2973,83 @@ return ret; } +static void ocfs2_split_record(struct inode *inode, + struct ocfs2_path *left_path, + struct ocfs2_path *right_path, + struct ocfs2_extent_rec *split_rec, + enum ocfs2_split_type split) +{ + int index; + u32 cpos = le32_to_cpu(split_rec->e_cpos); + struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; + struct ocfs2_extent_rec *rec, *tmprec; + + right_el = path_leaf_el(right_path);; + if (left_path) + left_el = path_leaf_el(left_path); + + el = right_el; + insert_el = right_el; + index = ocfs2_search_extent_list(el, cpos); + if (index != -1) { + if (index == 0 && left_path) { + BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); + + /* + * This typically means that the record + * started in the left path but moved to the + * right as a result of rotation. We either + * move the existing record to the left, or we + * do the later insert there. + * + * In this case, the left path should always + * exist as the rotate code will have passed + * it back for a post-insert update. + */ + + if (split == SPLIT_LEFT) { + /* + * It's a left split. Since we know + * that the rotate code gave us an + * empty extent in the left path, we + * can just do the insert there. + */ + insert_el = left_el; + } else { + /* + * Right split - we have to move the + * existing record over to the left + * leaf. The insert will be into the + * newly created empty extent in the + * right leaf. + */ + tmprec = &right_el->l_recs[index]; + ocfs2_rotate_leaf(left_el, tmprec); + el = left_el; + + memset(tmprec, 0, sizeof(*tmprec)); + index = ocfs2_search_extent_list(left_el, cpos); + BUG_ON(index == -1); + } + } + } else { + BUG_ON(!left_path); + BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0])); + /* + * Left path is easy - we can just allow the insert to + * happen. + */ + el = left_el; + insert_el = left_el; + index = ocfs2_search_extent_list(el, cpos); + BUG_ON(index == -1); + } + + rec = &el->l_recs[index]; + ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); + ocfs2_rotate_leaf(insert_el, split_rec); +} + /* * This function only does inserts on an allocation b-tree. For dinode * lists, ocfs2_insert_at_leaf() is called directly. @@ -1948,7 +3067,6 @@ { int ret, subtree_index; struct buffer_head *leaf_bh = path_leaf_bh(right_path); - struct ocfs2_extent_list *el; /* * Pass both paths to the journal. The majority of inserts @@ -1984,9 +3102,18 @@ } } - el = path_leaf_el(right_path); + if (insert->ins_split != SPLIT_NONE) { + /* + * We could call ocfs2_insert_at_leaf() for some types + * of splits, but it's easier to just let one seperate + * function sort it all out. + */ + ocfs2_split_record(inode, left_path, right_path, + insert_rec, insert->ins_split); + } else + ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), + insert, inode); - ocfs2_insert_at_leaf(insert_rec, el, insert, inode); ret = ocfs2_journal_dirty(handle, leaf_bh); if (ret) mlog_errno(ret); @@ -2075,7 +3202,7 @@ * can wind up skipping both of these two special cases... */ if (rotate) { - ret = ocfs2_rotate_tree_right(inode, handle, + ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, le32_to_cpu(insert_rec->e_cpos), right_path, &left_path); if (ret) { @@ -2100,6 +3227,7 @@ } out_update_clusters: + if (type->ins_split == SPLIT_NONE) ocfs2_update_dinode_clusters(inode, di, le16_to_cpu(insert_rec->e_leaf_clusters)); @@ -2114,6 +3242,44 @@ return ret; } +static enum ocfs2_contig_type +ocfs2_figure_merge_contig_type(struct inode *inode, + struct ocfs2_extent_list *el, int index, + struct ocfs2_extent_rec *split_rec) +{ + struct ocfs2_extent_rec *rec; + enum ocfs2_contig_type ret = CONTIG_NONE; + + /* + * We're careful to check for an empty extent record here - + * the merge code will know what to do if it sees one. + */ + + if (index > 0) { + rec = &el->l_recs[index - 1]; + if (index == 1 && ocfs2_is_empty_extent(rec)) { + if (split_rec->e_cpos == el->l_recs[index].e_cpos) + ret = CONTIG_RIGHT; + } else { + ret = ocfs2_extent_contig(inode, rec, split_rec); + } + } + + if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { + enum ocfs2_contig_type contig_type; + + rec = &el->l_recs[index + 1]; + contig_type = ocfs2_extent_contig(inode, rec, split_rec); + + if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) + ret = CONTIG_LEFTRIGHT; + else if (ret == CONTIG_NONE) + ret = contig_type; + } + + return ret; +} + static void ocfs2_figure_contig_type(struct inode *inode, struct ocfs2_insert_type *insert, struct ocfs2_extent_list *el, @@ -2205,6 +3371,8 @@ struct ocfs2_path *path = NULL; struct buffer_head *bh = NULL; + insert->ins_split = SPLIT_NONE; + el = &di->id2.i_list; insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); @@ -2305,130 +3473,425 @@ ocfs2_figure_appending_type(insert, el, insert_rec); } -out: - ocfs2_free_path(path); +out: + ocfs2_free_path(path); + + if (ret == 0) + *last_eb_bh = bh; + else + brelse(bh); + return ret; +} + +/* + * Insert an extent into an inode btree. + * + * The caller needs to update fe->i_clusters + */ +int ocfs2_insert_extent(struct ocfs2_super *osb, + handle_t *handle, + struct inode *inode, + struct buffer_head *fe_bh, + u32 cpos, + u64 start_blk, + u32 new_clusters, + u8 flags, + struct ocfs2_alloc_context *meta_ac) +{ + int status; + struct buffer_head *last_eb_bh = NULL; + struct buffer_head *bh = NULL; + struct ocfs2_insert_type insert = {0, }; + struct ocfs2_extent_rec rec; + + mlog(0, "add %u clusters at position %u to inode %llu\n", + new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); + + mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && + (OCFS2_I(inode)->ip_clusters != cpos), + "Device %s, asking for sparse allocation: inode %llu, " + "cpos %u, clusters %u\n", + osb->dev_str, + (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, + OCFS2_I(inode)->ip_clusters); + + memset(&rec, 0, sizeof(rec)); + rec.e_cpos = cpu_to_le32(cpos); + rec.e_blkno = cpu_to_le64(start_blk); + rec.e_leaf_clusters = cpu_to_le16(new_clusters); + rec.e_flags = flags; + + status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, + &insert); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + mlog(0, "Insert.appending: %u, Insert.Contig: %u, " + "Insert.contig_index: %d, Insert.free_records: %d, " + "Insert.tree_depth: %d\n", + insert.ins_appending, insert.ins_contig, insert.ins_contig_index, + insert.ins_free_records, insert.ins_tree_depth); + + if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { + status = ocfs2_grow_tree(inode, handle, fe_bh, + &insert.ins_tree_depth, &last_eb_bh, + meta_ac); + if (status) { + mlog_errno(status); + goto bail; + } + } + + /* Finally, we can add clusters. This might rotate the tree for us. */ + status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); + if (status < 0) + mlog_errno(status); + else + ocfs2_extent_map_insert_rec(inode, &rec); + +bail: + if (bh) + brelse(bh); + + if (last_eb_bh) + brelse(last_eb_bh); + + mlog_exit(status); + return status; +} + +static int ocfs2_split_and_insert(struct inode *inode, + handle_t *handle, + struct ocfs2_path *path, + struct buffer_head *di_bh, + struct buffer_head **last_eb_bh, + int split_index, + struct ocfs2_extent_rec *orig_split_rec, + struct ocfs2_alloc_context *meta_ac) +{ + int ret = 0, depth; + unsigned int insert_range, rec_range, do_leftright = 0; + struct ocfs2_extent_rec tmprec; + struct ocfs2_extent_list *rightmost_el; + struct ocfs2_extent_rec rec; + struct ocfs2_extent_rec split_rec = *orig_split_rec; + struct ocfs2_insert_type insert; + struct ocfs2_extent_block *eb; + struct ocfs2_dinode *di; + +leftright: + /* + * Store a copy of the record on the stack - it might move + * around as the tree is manipulated below. + */ + rec = path_leaf_el(path)->l_recs[split_index]; + + di = (struct ocfs2_dinode *)di_bh->b_data; + rightmost_el = &di->id2.i_list; + + depth = le16_to_cpu(rightmost_el->l_tree_depth); + if (depth) { + BUG_ON(!(*last_eb_bh)); + eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; + rightmost_el = &eb->h_list; + } + + if (le16_to_cpu(rightmost_el->l_next_free_rec) == + le16_to_cpu(rightmost_el->l_count)) { + int old_depth = depth; + + ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, + meta_ac); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (old_depth != depth) { + eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; + rightmost_el = &eb->h_list; + } + } + + memset(&insert, 0, sizeof(struct ocfs2_insert_type)); + insert.ins_appending = APPEND_NONE; + insert.ins_contig = CONTIG_NONE; + insert.ins_free_records = le16_to_cpu(rightmost_el->l_count) + - le16_to_cpu(rightmost_el->l_next_free_rec); + insert.ins_tree_depth = depth; + + insert_range = le32_to_cpu(split_rec.e_cpos) + + le16_to_cpu(split_rec.e_leaf_clusters); + rec_range = le32_to_cpu(rec.e_cpos) + + le16_to_cpu(rec.e_leaf_clusters); + + if (split_rec.e_cpos == rec.e_cpos) { + insert.ins_split = SPLIT_LEFT; + } else if (insert_range == rec_range) { + insert.ins_split = SPLIT_RIGHT; + } else { + /* + * Left/right split. We fake this as a right split + * first and then make a second pass as a left split. + */ + insert.ins_split = SPLIT_RIGHT; + + memset(&tmprec, 0, sizeof(tmprec)); + + tmprec.e_cpos = cpu_to_le32(insert_range); + tmprec.e_leaf_clusters = cpu_to_le16(rec_range - insert_range); + tmprec.e_flags = rec.e_flags; + tmprec.e_blkno = split_rec.e_blkno; + le64_add_cpu(&tmprec.e_blkno, + ocfs2_clusters_to_blocks(inode->i_sb, + le16_to_cpu(split_rec.e_leaf_clusters))); + split_rec = tmprec; + + BUG_ON(do_leftright); + do_leftright = 1; + } + + ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, + &insert); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (do_leftright == 1) { + u32 cpos; + struct ocfs2_extent_list *el; + + do_leftright++; + split_rec = *orig_split_rec; + + ocfs2_reinit_path(path, 1); + + cpos = le32_to_cpu(split_rec.e_cpos); + ret = ocfs2_find_path(inode, path, cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + el = path_leaf_el(path); + split_index = ocfs2_search_extent_list(el, cpos); + goto leftright; + } +out: + + return ret; +} + +/* + * Mark part or all of the extent record at split_index in the leaf + * pointed to by path as written. This removes the unwritten + * extent flag. + * + * Care is taken to handle contiguousness so as to not grow the tree. + * + * meta_ac is not strictly necessary - we only truly need it if growth + * of the tree is required. All other cases will degrade into a less + * optimal tree layout. + * + * last_eb_bh should be the rightmost leaf block for any inode with a + * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. + * + * This code is optimized for readability - several passes might be + * made over certain portions of the tree. All of those blocks will + * have been brought into cache (and pinned via the journal), so the + * extra overhead is not expressed in terms of disk reads. + */ +static int __ocfs2_mark_extent_written(struct inode *inode, + struct buffer_head *di_bh, + handle_t *handle, + struct ocfs2_path *path, + int split_index, + struct ocfs2_extent_rec *split_rec, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + int ret = 0; + struct ocfs2_extent_list *el = path_leaf_el(path); + struct buffer_head *eb_bh, *last_eb_bh = NULL; + struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; + struct ocfs2_merge_ctxt ctxt; + struct ocfs2_extent_list *rightmost_el; + + if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) { + ret = -EIO; + mlog_errno(ret); + goto out; + } + + if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || + ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < + (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { + ret = -EIO; + mlog_errno(ret); + goto out; + } + + eb_bh = path_leaf_bh(path); + ret = ocfs2_journal_access(handle, inode, eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, + split_index, + split_rec); + + /* + * The core merge / split code wants to know how much room is + * left in this inodes allocation tree, so we pass the + * rightmost extent list. + */ + if (path->p_tree_depth) { + struct ocfs2_extent_block *eb; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), + le64_to_cpu(di->i_last_eb_blk), + &last_eb_bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_exit(ret); + goto out; + } + + eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { + OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); + ret = -EROFS; + goto out; + } - if (ret == 0) - *last_eb_bh = bh; + rightmost_el = &eb->h_list; + } else + rightmost_el = path_root_el(path); + + ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec); + if (ctxt.c_used_tail_recs > 0 && + ocfs2_is_empty_extent(&rightmost_el->l_recs[0])) + ctxt.c_used_tail_recs--; + + if (rec->e_cpos == split_rec->e_cpos && + rec->e_leaf_clusters == split_rec->e_leaf_clusters) + ctxt.c_split_covers_rec = 1; else - brelse(bh); + ctxt.c_split_covers_rec = 0; + + ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); + + mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " + "has_empty: %u, split_covers: %u\n", split_index, + ctxt.c_contig_type, ctxt.c_used_tail_recs, + ctxt.c_has_empty_extent, ctxt.c_split_covers_rec); + + if (ctxt.c_contig_type == CONTIG_NONE) { + if (ctxt.c_split_covers_rec) + el->l_recs[split_index] = *split_rec; + else + ret = ocfs2_split_and_insert(inode, handle, path, di_bh, + &last_eb_bh, split_index, + split_rec, meta_ac); + if (ret) + mlog_errno(ret); + } else { + ret = ocfs2_try_to_merge_extent(inode, handle, path, + split_index, split_rec, + dealloc, &ctxt); + if (ret) + mlog_errno(ret); + } + + ocfs2_journal_dirty(handle, eb_bh); + +out: + brelse(last_eb_bh); return ret; } /* - * Insert an extent into an inode btree. + * Mark the already-existing extent at cpos as written for len clusters. * - * The caller needs to update fe->i_clusters + * If the existing extent is larger than the request, initiate a + * split. An attempt will be made at merging with adjacent extents. + * + * The caller is responsible for passing down meta_ac if we'll need it. */ -int ocfs2_insert_extent(struct ocfs2_super *osb, - handle_t *handle, - struct inode *inode, - struct buffer_head *fe_bh, - u32 cpos, - u64 start_blk, - u32 new_clusters, - struct ocfs2_alloc_context *meta_ac) +int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, + handle_t *handle, u32 cpos, u32 len, u32 phys, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_cached_dealloc_ctxt *dealloc) { - int status, shift; - struct buffer_head *last_eb_bh = NULL; - struct buffer_head *bh = NULL; - struct ocfs2_insert_type insert = {0, }; - struct ocfs2_extent_rec rec; - - mlog(0, "add %u clusters at position %u to inode %llu\n", - new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); - - mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && - (OCFS2_I(inode)->ip_clusters != cpos), - "Device %s, asking for sparse allocation: inode %llu, " - "cpos %u, clusters %u\n", - osb->dev_str, - (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, - OCFS2_I(inode)->ip_clusters); + int ret, index; + u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); + struct ocfs2_extent_rec split_rec; + struct ocfs2_path *left_path = NULL; + struct ocfs2_extent_list *el; - memset(&rec, 0, sizeof(rec)); - rec.e_cpos = cpu_to_le32(cpos); - rec.e_blkno = cpu_to_le64(start_blk); - rec.e_leaf_clusters = cpu_to_le16(new_clusters); + mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n", + inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno); - status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, - &insert); - if (status < 0) { - mlog_errno(status); - goto bail; + if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { + ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " + "that are being written to, but the feature bit " + "is not set in the super block.", + (unsigned long long)OCFS2_I(inode)->ip_blkno); + ret = -EROFS; + goto out; } - mlog(0, "Insert.appending: %u, Insert.Contig: %u, " - "Insert.contig_index: %d, Insert.free_records: %d, " - "Insert.tree_depth: %d\n", - insert.ins_appending, insert.ins_contig, insert.ins_contig_index, - insert.ins_free_records, insert.ins_tree_depth); - /* - * Avoid growing the tree unless we're out of records and the - * insert type requres one. + * XXX: This should be fixed up so that we just re-insert the + * next extent records. */ - if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) - goto out_add; + ocfs2_extent_map_trunc(inode, 0); - shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); - if (shift < 0) { - status = shift; - mlog_errno(status); - goto bail; + left_path = ocfs2_new_inode_path(di_bh); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; } - /* We traveled all the way to the bottom of the allocation tree - * and didn't find room for any more extents - we need to add - * another tree level */ - if (shift) { - BUG_ON(bh); - mlog(0, "need to shift tree depth " - "(current = %d)\n", insert.ins_tree_depth); - - /* ocfs2_shift_tree_depth will return us a buffer with - * the new extent block (so we can pass that to - * ocfs2_add_branch). */ - status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, - meta_ac, &bh); - if (status < 0) { - mlog_errno(status); - goto bail; - } - insert.ins_tree_depth++; - /* Special case: we have room now if we shifted from - * tree_depth 0 */ - if (insert.ins_tree_depth == 1) - goto out_add; + ret = ocfs2_find_path(inode, left_path, cpos); + if (ret) { + mlog_errno(ret); + goto out; } + el = path_leaf_el(left_path); - /* call ocfs2_add_branch to add the final part of the tree with - * the new data. */ - mlog(0, "add branch. bh = %p\n", bh); - status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, - meta_ac); - if (status < 0) { - mlog_errno(status); - goto bail; + index = ocfs2_search_extent_list(el, cpos); + if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { + ocfs2_error(inode->i_sb, + "Inode %llu has an extent at cpos %u which can no " + "longer be found.\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); + ret = -EROFS; + goto out; } -out_add: - /* Finally, we can add clusters. This might rotate the tree for us. */ - status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); - if (status < 0) - mlog_errno(status); - else - ocfs2_extent_map_insert_rec(inode, &rec); - -bail: - if (bh) - brelse(bh); + memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); + split_rec.e_cpos = cpu_to_le32(cpos); + split_rec.e_leaf_clusters = cpu_to_le16(len); + split_rec.e_blkno = cpu_to_le64(start_blkno); + split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; + split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; - if (last_eb_bh) - brelse(last_eb_bh); + ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, + index, &split_rec, meta_ac, dealloc); + if (ret) + mlog_errno(ret); - mlog_exit(status); - return status; +out: + ocfs2_free_path(left_path); + return ret; } static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) @@ -2957,6 +4420,219 @@ return status; } +/* + * Delayed de-allocation of suballocator blocks. + * + * Some sets of block de-allocations might involve multiple suballocator inodes. + * + * The locking for this can get extremely complicated, especially when + * the suballocator inodes to delete from aren't known until deep + * within an unrelated codepath. + * + * ocfs2_extent_block structures are a good example of this - an inode + * btree could have been grown by any number of nodes each allocating + * out of their own suballoc inode. + * + * These structures allow the delay of block de-allocation until a + * later time, when locking of multiple cluster inodes won't cause + * deadlock. + */ + +/* + * Describes a single block free from a suballocator + */ +struct ocfs2_cached_block_free { + struct ocfs2_cached_block_free *free_next; + u64 free_blk; + unsigned int free_bit; +}; + +struct ocfs2_per_slot_free_list { + struct ocfs2_per_slot_free_list *f_next_suballocator; + int f_inode_type; + int f_slot; + struct ocfs2_cached_block_free *f_first; +}; + +static int ocfs2_free_cached_items(struct ocfs2_super *osb, + int sysfile_type, + int slot, + struct ocfs2_cached_block_free *head) +{ + int ret; + u64 bg_blkno; + handle_t *handle; + struct inode *inode; + struct buffer_head *di_bh = NULL; + struct ocfs2_cached_block_free *tmp; + + inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); + if (!inode) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + + mutex_lock(&inode->i_mutex); + + ret = ocfs2_meta_lock(inode, &di_bh, 1); + if (ret) { + mlog_errno(ret); + goto out_mutex; + } + + handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock; + } + + while (head) { + bg_blkno = ocfs2_which_suballoc_group(head->free_blk, + head->free_bit); + mlog(0, "Free bit: (bit %u, blkno %llu)\n", + head->free_bit, (unsigned long long)head->free_blk); + + ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, + head->free_bit, bg_blkno, 1); + if (ret) { + mlog_errno(ret); + goto out_journal; + } + + ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); + if (ret) { + mlog_errno(ret); + goto out_journal; + } + + tmp = head; + head = head->free_next; + kfree(tmp); + } + +out_journal: + ocfs2_commit_trans(osb, handle); + +out_unlock: + ocfs2_meta_unlock(inode, 1); + brelse(di_bh); +out_mutex: + mutex_unlock(&inode->i_mutex); + iput(inode); +out: + while(head) { + /* Premature exit may have left some dangling items. */ + tmp = head; + head = head->free_next; + kfree(tmp); + } + + return ret; +} + +int ocfs2_run_deallocs(struct ocfs2_super *osb, + struct ocfs2_cached_dealloc_ctxt *ctxt) +{ + int ret = 0, ret2; + struct ocfs2_per_slot_free_list *fl; + + if (!ctxt) + return 0; + + while (ctxt->c_first_suballocator) { + fl = ctxt->c_first_suballocator; + + if (fl->f_first) { + mlog(0, "Free items: (type %u, slot %d)\n", + fl->f_inode_type, fl->f_slot); + ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, + fl->f_slot, fl->f_first); + if (ret2) + mlog_errno(ret2); + if (!ret) + ret = ret2; + } + + ctxt->c_first_suballocator = fl->f_next_suballocator; + kfree(fl); + } + + return ret; +} + +static struct ocfs2_per_slot_free_list * +ocfs2_find_per_slot_free_list(int type, + int slot, + struct ocfs2_cached_dealloc_ctxt *ctxt) +{ + struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; + + while (fl) { + if (fl->f_inode_type == type && fl->f_slot == slot) + return fl; + + fl = fl->f_next_suballocator; + } + + fl = kmalloc(sizeof(*fl), GFP_NOFS); + if (fl) { + fl->f_inode_type = type; + fl->f_slot = slot; + fl->f_first = NULL; + fl->f_next_suballocator = ctxt->c_first_suballocator; + + ctxt->c_first_suballocator = fl; + } + return fl; +} + +static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + int type, int slot, u64 blkno, + unsigned int bit) +{ + int ret; + struct ocfs2_per_slot_free_list *fl; + struct ocfs2_cached_block_free *item; + + fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); + if (fl == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + item = kmalloc(sizeof(*item), GFP_NOFS); + if (item == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", + type, slot, bit, (unsigned long long)blkno); + + item->free_blk = blkno; + item->free_bit = bit; + item->free_next = fl->f_first; + + fl->f_first = item; + + ret = 0; +out: + return ret; +} + +static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, + struct ocfs2_extent_block *eb) +{ + return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, + le16_to_cpu(eb->h_suballoc_slot), + le64_to_cpu(eb->h_blkno), + le16_to_cpu(eb->h_suballoc_bit)); +} + /* This function will figure out whether the currently last extent * block will be deleted, and if it will, what the new last extent * block will be so we can update his h_next_leaf_blk field, as well @@ -3238,27 +4914,10 @@ BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); - if (le16_to_cpu(eb->h_suballoc_slot) == 0) { - /* - * This code only understands how to - * lock the suballocator in slot 0, - * which is fine because allocation is - * only ever done out of that - * suballocator too. A future version - * might change that however, so avoid - * a free if we don't know how to - * handle it. This way an fs incompat - * bit will not be necessary. - */ - ret = ocfs2_free_extent_block(handle, - tc->tc_ext_alloc_inode, - tc->tc_ext_alloc_bh, - eb); - + ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); /* An error here is not fatal. */ if (ret < 0) mlog_errno(ret); - } } else { deleted_eb = 0; } @@ -3631,8 +5290,6 @@ mlog_entry_void(); - down_write(&OCFS2_I(inode)->ip_alloc_sem); - new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, i_size_read(inode)); @@ -3754,7 +5411,6 @@ goto start; bail: - up_write(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_schedule_truncate_log_flush(osb, 1); @@ -3764,6 +5420,8 @@ if (handle) ocfs2_commit_trans(osb, handle); + ocfs2_run_deallocs(osb, &tc->tc_dealloc); + ocfs2_free_path(path); /* This will drop the ext_alloc cluster lock for us */ @@ -3774,23 +5432,18 @@ } /* - * Expects the inode to already be locked. This will figure out which - * inodes need to be locked and will put them on the returned truncate - * context. + * Expects the inode to already be locked. */ int ocfs2_prepare_truncate(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *fe_bh, struct ocfs2_truncate_context **tc) { - int status, metadata_delete, i; + int status; unsigned int new_i_clusters; struct ocfs2_dinode *fe; struct ocfs2_extent_block *eb; - struct ocfs2_extent_list *el; struct buffer_head *last_eb_bh = NULL; - struct inode *ext_alloc_inode = NULL; - struct buffer_head *ext_alloc_bh = NULL; mlog_entry_void(); @@ -3810,12 +5463,9 @@ mlog_errno(status); goto bail; } + ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); - metadata_delete = 0; if (fe->id2.i_list.l_tree_depth) { - /* If we have a tree, then the truncate may result in - * metadata deletes. Figure this out from the - * rightmost leaf block.*/ status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), &last_eb_bh, OCFS2_BH_CACHED, inode); if (status < 0) { @@ -3830,43 +5480,10 @@ status = -EIO; goto bail; } - el = &(eb->h_list); - - i = 0; - if (ocfs2_is_empty_extent(&el->l_recs[0])) - i = 1; - /* - * XXX: Should we check that next_free_rec contains - * the extent? - */ - if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters) - metadata_delete = 1; } (*tc)->tc_last_eb_bh = last_eb_bh; - if (metadata_delete) { - mlog(0, "Will have to delete metadata for this trunc. " - "locking allocator.\n"); - ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); - if (!ext_alloc_inode) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - mutex_lock(&ext_alloc_inode->i_mutex); - (*tc)->tc_ext_alloc_inode = ext_alloc_inode; - - status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); - if (status < 0) { - mlog_errno(status); - goto bail; - } - (*tc)->tc_ext_alloc_bh = ext_alloc_bh; - (*tc)->tc_ext_alloc_locked = 1; - } - status = 0; bail: if (status < 0) { @@ -3880,16 +5497,13 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) { - if (tc->tc_ext_alloc_inode) { - if (tc->tc_ext_alloc_locked) - ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); - - mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); - iput(tc->tc_ext_alloc_inode); - } - - if (tc->tc_ext_alloc_bh) - brelse(tc->tc_ext_alloc_bh); + /* + * The caller is responsible for completing deallocation + * before freeing the context. + */ + if (tc->tc_dealloc.c_first_suballocator != NULL) + mlog(ML_NOTICE, + "Truncate completion has non-empty dealloc context\n"); if (tc->tc_last_eb_bh) brelse(tc->tc_last_eb_bh); diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.h linux-2.6.22-591/fs/ocfs2/alloc.h --- linux-2.6.22-570/fs/ocfs2/alloc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/alloc.h 2007-12-21 15:36:12.000000000 -0500 @@ -34,7 +34,13 @@ u32 cpos, u64 start_blk, u32 new_clusters, + u8 flags, struct ocfs2_alloc_context *meta_ac); +struct ocfs2_cached_dealloc_ctxt; +int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, + handle_t *handle, u32 cpos, u32 len, u32 phys, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_cached_dealloc_ctxt *dealloc); int ocfs2_num_free_extents(struct ocfs2_super *osb, struct inode *inode, struct ocfs2_dinode *fe); @@ -63,9 +69,27 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, struct ocfs2_dinode *tl_copy); +/* + * Process local structure which describes the block unlinks done + * during an operation. This is populated via + * ocfs2_cache_block_dealloc(). + * + * ocfs2_run_deallocs() should be called after the potentially + * de-allocating routines. No journal handles should be open, and most + * locks should have been dropped. + */ +struct ocfs2_cached_dealloc_ctxt { + struct ocfs2_per_slot_free_list *c_first_suballocator; +}; +static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) +{ + c->c_first_suballocator = NULL; +} +int ocfs2_run_deallocs(struct ocfs2_super *osb, + struct ocfs2_cached_dealloc_ctxt *ctxt); + struct ocfs2_truncate_context { - struct inode *tc_ext_alloc_inode; - struct buffer_head *tc_ext_alloc_bh; + struct ocfs2_cached_dealloc_ctxt tc_dealloc; int tc_ext_alloc_locked; /* is it cluster locked? */ /* these get destroyed once it's passed to ocfs2_commit_truncate. */ struct buffer_head *tc_last_eb_bh; @@ -84,6 +108,7 @@ int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, u32 cpos, struct buffer_head **leaf_bh); +int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); /* * Helper function to look at the # of clusters in an extent record. diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.c linux-2.6.22-591/fs/ocfs2/aops.c --- linux-2.6.22-570/fs/ocfs2/aops.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/aops.c 2007-12-21 15:36:12.000000000 -0500 @@ -232,7 +232,7 @@ * might now be discovering a truncate that hit on another node. * block_read_full_page->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers - * (generic_file_read, fault->nopage) are clever enough to check i_size + * (generic_file_read, vm_ops->fault) are clever enough to check i_size * and notice that the page they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? @@ -705,6 +705,8 @@ bh = bh->b_this_page, block_start += bsize) { block_end = block_start + bsize; + clear_buffer_new(bh); + /* * Ignore blocks outside of our i/o range - * they may belong to unallocated clusters. @@ -719,9 +721,8 @@ * For an allocating write with cluster size >= page * size, we always write the entire page. */ - - if (buffer_new(bh)) - clear_buffer_new(bh); + if (new) + set_buffer_new(bh); if (!buffer_mapped(bh)) { map_bh(bh, inode->i_sb, *p_blkno); @@ -760,18 +761,13 @@ bh = head; block_start = 0; do { - void *kaddr; - block_end = block_start + bsize; if (block_end <= from) goto next_bh; if (block_start >= to) break; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_start, bh->b_size, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -783,217 +779,240 @@ return ret; } +#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE) +#define OCFS2_MAX_CTXT_PAGES 1 +#else +#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE) +#endif + +#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) + /* - * This will copy user data from the buffer page in the splice - * context. - * - * For now, we ignore SPLICE_F_MOVE as that would require some extra - * communication out all the way to ocfs2_write(). + * Describe the state of a single cluster to be written to. */ -int ocfs2_map_and_write_splice_data(struct inode *inode, - struct ocfs2_write_ctxt *wc, u64 *p_blkno, - unsigned int *ret_from, unsigned int *ret_to) +struct ocfs2_write_cluster_desc { + u32 c_cpos; + u32 c_phys; + /* + * Give this a unique field because c_phys eventually gets + * filled. + */ + unsigned c_new; + unsigned c_unwritten; +}; + +static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) { - int ret; - unsigned int to, from, cluster_start, cluster_end; - char *src, *dst; - struct ocfs2_splice_write_priv *sp = wc->w_private; - struct pipe_buffer *buf = sp->s_buf; - unsigned long bytes, src_from; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + return d->c_new || d->c_unwritten; +} - ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, - &cluster_end); +struct ocfs2_write_ctxt { + /* Logical cluster position / len of write */ + u32 w_cpos; + u32 w_clen; - from = sp->s_offset; - src_from = sp->s_buf_offset; - bytes = wc->w_count; + struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; - if (wc->w_large_pages) { /* - * For cluster size < page size, we have to - * calculate pos within the cluster and obey - * the rightmost boundary. - */ - bytes = min(bytes, (unsigned long)(osb->s_clustersize - - (wc->w_pos & (osb->s_clustersize - 1)))); - } - to = from + bytes; - - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from < cluster_start); - BUG_ON(to > cluster_end); - - if (wc->w_this_page_new) - ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, - cluster_start, cluster_end, 1); - else - ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, - from, to, 0); - if (ret) { - mlog_errno(ret); - goto out; + * This is true if page_size > cluster_size. + * + * It triggers a set of special cases during write which might + * have to deal with allocating writes to partial pages. + */ + unsigned int w_large_pages; + + /* + * Pages involved in this write. + * + * w_target_page is the page being written to by the user. + * + * w_pages is an array of pages which always contains + * w_target_page, and in the case of an allocating write with + * page_size < cluster size, it will contain zero'd and mapped + * pages adjacent to w_target_page which need to be written + * out in so that future reads from that region will get + * zero's. + */ + struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; + unsigned int w_num_pages; + struct page *w_target_page; + + /* + * ocfs2_write_end() uses this to know what the real range to + * write in the target should be. + */ + unsigned int w_target_from; + unsigned int w_target_to; + + /* + * We could use journal_current_handle() but this is cleaner, + * IMHO -Mark + */ + handle_t *w_handle; + + struct buffer_head *w_di_bh; + + struct ocfs2_cached_dealloc_ctxt w_dealloc; +}; + +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + int i; + + for(i = 0; i < wc->w_num_pages; i++) { + if (wc->w_pages[i] == NULL) + continue; + + unlock_page(wc->w_pages[i]); + mark_page_accessed(wc->w_pages[i]); + page_cache_release(wc->w_pages[i]); } - src = buf->ops->map(sp->s_pipe, buf, 1); - dst = kmap_atomic(wc->w_this_page, KM_USER1); - memcpy(dst + from, src + src_from, bytes); - kunmap_atomic(wc->w_this_page, KM_USER1); - buf->ops->unmap(sp->s_pipe, buf, src); + brelse(wc->w_di_bh); + kfree(wc); +} + +static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, + struct ocfs2_super *osb, loff_t pos, + unsigned len, struct buffer_head *di_bh) +{ + struct ocfs2_write_ctxt *wc; + + wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); + if (!wc) + return -ENOMEM; + + wc->w_cpos = pos >> osb->s_clustersize_bits; + wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len); + get_bh(di_bh); + wc->w_di_bh = di_bh; - wc->w_finished_copy = 1; + if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) + wc->w_large_pages = 1; + else + wc->w_large_pages = 0; - *ret_from = from; - *ret_to = to; -out: + ocfs2_init_dealloc_ctxt(&wc->w_dealloc); - return bytes ? (unsigned int)bytes : ret; + *wcp = wc; + + return 0; } /* - * This will copy user data from the iovec in the buffered write - * context. + * If a page has any new buffers, zero them out here, and mark them uptodate + * and dirty so they'll be written out (in order to prevent uninitialised + * block data from leaking). And clear the new bit. */ -int ocfs2_map_and_write_user_data(struct inode *inode, - struct ocfs2_write_ctxt *wc, u64 *p_blkno, - unsigned int *ret_from, unsigned int *ret_to) +static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) { - int ret; - unsigned int to, from, cluster_start, cluster_end; - unsigned long bytes, src_from; - char *dst; - struct ocfs2_buffered_write_priv *bp = wc->w_private; - const struct iovec *cur_iov = bp->b_cur_iov; - char __user *buf; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - - ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, - &cluster_end); + unsigned int block_start, block_end; + struct buffer_head *head, *bh; - buf = cur_iov->iov_base + bp->b_cur_off; - src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; + BUG_ON(!PageLocked(page)); + if (!page_has_buffers(page)) + return; - from = wc->w_pos & (PAGE_CACHE_SIZE - 1); + bh = head = page_buffers(page); + block_start = 0; + do { + block_end = block_start + bh->b_size; - /* - * This is a lot of comparisons, but it reads quite - * easily, which is important here. - */ - /* Stay within the src page */ - bytes = PAGE_SIZE - src_from; - /* Stay within the vector */ - bytes = min(bytes, - (unsigned long)(cur_iov->iov_len - bp->b_cur_off)); - /* Stay within count */ - bytes = min(bytes, (unsigned long)wc->w_count); - /* - * For clustersize > page size, just stay within - * target page, otherwise we have to calculate pos - * within the cluster and obey the rightmost - * boundary. - */ - if (wc->w_large_pages) { - /* - * For cluster size < page size, we have to - * calculate pos within the cluster and obey - * the rightmost boundary. - */ - bytes = min(bytes, (unsigned long)(osb->s_clustersize - - (wc->w_pos & (osb->s_clustersize - 1)))); - } else { - /* - * cluster size > page size is the most common - * case - we just stay within the target page - * boundary. - */ - bytes = min(bytes, PAGE_CACHE_SIZE - from); - } + if (buffer_new(bh)) { + if (block_end > from && block_start < to) { + if (!PageUptodate(page)) { + unsigned start, end; - to = from + bytes; + start = max(from, block_start); + end = min(to, block_end); - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from < cluster_start); - BUG_ON(to > cluster_end); + zero_user_page(page, start, end - start, KM_USER0); + set_buffer_uptodate(bh); + } - if (wc->w_this_page_new) - ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, - cluster_start, cluster_end, 1); - else - ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, - from, to, 0); - if (ret) { - mlog_errno(ret); - goto out; + clear_buffer_new(bh); + mark_buffer_dirty(bh); + } } - dst = kmap(wc->w_this_page); - memcpy(dst + from, bp->b_src_buf + src_from, bytes); - kunmap(wc->w_this_page); + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); +} - /* - * XXX: This is slow, but simple. The caller of - * ocfs2_buffered_write_cluster() is responsible for - * passing through the iovecs, so it's difficult to - * predict what our next step is in here after our - * initial write. A future version should be pushing - * that iovec manipulation further down. - * - * By setting this, we indicate that a copy from user - * data was done, and subsequent calls for this - * cluster will skip copying more data. +/* + * Only called when we have a failure during allocating write to write + * zero's to the newly allocated region. */ - wc->w_finished_copy = 1; +static void ocfs2_write_failure(struct inode *inode, + struct ocfs2_write_ctxt *wc, + loff_t user_pos, unsigned user_len) +{ + int i; + unsigned from, to; + struct page *tmppage; - *ret_from = from; - *ret_to = to; -out: + ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); + + if (wc->w_large_pages) { + from = wc->w_target_from; + to = wc->w_target_to; + } else { + from = 0; + to = PAGE_CACHE_SIZE; + } - return bytes ? (unsigned int)bytes : ret; + for(i = 0; i < wc->w_num_pages; i++) { + tmppage = wc->w_pages[i]; + + if (ocfs2_should_order_data(inode)) + walk_page_buffers(wc->w_handle, page_buffers(tmppage), + from, to, NULL, + ocfs2_journal_dirty_data); + + block_commit_write(tmppage, from, to); + } } -/* - * Map, fill and write a page to disk. - * - * The work of copying data is done via callback. Newly allocated - * pages which don't take user data will be zero'd (set 'new' to - * indicate an allocating write) - * - * Returns a negative error code or the number of bytes copied into - * the page. - */ -static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, - u64 *p_blkno, struct page *page, - struct ocfs2_write_ctxt *wc, int new) +static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, + struct ocfs2_write_ctxt *wc, + struct page *page, u32 cpos, + loff_t user_pos, unsigned user_len, + int new) { - int ret, copied = 0; - unsigned int from = 0, to = 0; + int ret; + unsigned int map_from = 0, map_to = 0; unsigned int cluster_start, cluster_end; - unsigned int zero_from = 0, zero_to = 0; + unsigned int user_data_from = 0, user_data_to = 0; - ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, + ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, &cluster_start, &cluster_end); - if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index - && !wc->w_finished_copy) { + if (page == wc->w_target_page) { + map_from = user_pos & (PAGE_CACHE_SIZE - 1); + map_to = map_from + user_len; - wc->w_this_page = page; - wc->w_this_page_new = new; - ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); - if (ret < 0) { + if (new) + ret = ocfs2_map_page_blocks(page, p_blkno, inode, + cluster_start, cluster_end, + new); + else + ret = ocfs2_map_page_blocks(page, p_blkno, inode, + map_from, map_to, new); + if (ret) { mlog_errno(ret); goto out; } - copied = ret; - - zero_from = from; - zero_to = to; + user_data_from = map_from; + user_data_to = map_to; if (new) { - from = cluster_start; - to = cluster_end; + map_from = cluster_start; + map_to = cluster_end; } + + wc->w_target_from = map_from; + wc->w_target_to = map_to; } else { /* * If we haven't allocated the new page yet, we @@ -1002,11 +1021,11 @@ */ BUG_ON(!new); - from = cluster_start; - to = cluster_end; + map_from = cluster_start; + map_to = cluster_end; ret = ocfs2_map_page_blocks(page, p_blkno, inode, - cluster_start, cluster_end, 1); + cluster_start, cluster_end, new); if (ret) { mlog_errno(ret); goto out; @@ -1025,108 +1044,113 @@ */ if (new && !PageUptodate(page)) ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), - wc->w_cpos, zero_from, zero_to); + cpos, user_data_from, user_data_to); flush_dcache_page(page); - if (ocfs2_should_order_data(inode)) { - ret = walk_page_buffers(handle, - page_buffers(page), - from, to, NULL, - ocfs2_journal_dirty_data); - if (ret < 0) - mlog_errno(ret); - } - - /* - * We don't use generic_commit_write() because we need to - * handle our own i_size update. - */ - ret = block_commit_write(page, from, to); - if (ret) - mlog_errno(ret); out: - - return copied ? copied : ret; + return ret; } /* - * Do the actual write of some data into an inode. Optionally allocate - * in order to fulfill the write. - * - * cpos is the logical cluster offset within the file to write at - * - * 'phys' is the physical mapping of that offset. a 'phys' value of - * zero indicates that allocation is required. In this case, data_ac - * and meta_ac should be valid (meta_ac can be null if metadata - * allocation isn't required). + * This function will only grab one clusters worth of pages. */ -static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, - struct buffer_head *di_bh, - struct ocfs2_alloc_context *data_ac, - struct ocfs2_alloc_context *meta_ac, - struct ocfs2_write_ctxt *wc) +static int ocfs2_grab_pages_for_write(struct address_space *mapping, + struct ocfs2_write_ctxt *wc, + u32 cpos, loff_t user_pos, int new, + struct page *mmap_page) { - int ret, i, numpages = 1, new; - unsigned int copied = 0; - u32 tmp_pos; - u64 v_blkno, p_blkno; - struct address_space *mapping = file->f_mapping; + int ret = 0, i; + unsigned long start, target_index, index; struct inode *inode = mapping->host; - unsigned long index, start; - struct page **cpages; - new = phys == 0 ? 1 : 0; + target_index = user_pos >> PAGE_CACHE_SHIFT; /* * Figure out how many pages we'll be manipulating here. For * non allocating write, we just change the one * page. Otherwise, we'll need a whole clusters worth. */ - if (new) - numpages = ocfs2_pages_per_cluster(inode->i_sb); - - cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); - if (!cpages) { - ret = -ENOMEM; - mlog_errno(ret); - return ret; - } - - /* - * Fill our page array first. That way we've grabbed enough so - * that we can zero and flush if we error after adding the - * extent. - */ if (new) { - start = ocfs2_align_clusters_to_page_index(inode->i_sb, - wc->w_cpos); - v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos); + wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); + start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); } else { - start = wc->w_pos >> PAGE_CACHE_SHIFT; - v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; + wc->w_num_pages = 1; + start = target_index; } - for(i = 0; i < numpages; i++) { + for(i = 0; i < wc->w_num_pages; i++) { index = start + i; - cpages[i] = find_or_create_page(mapping, index, GFP_NOFS); - if (!cpages[i]) { + if (index == target_index && mmap_page) { + /* + * ocfs2_pagemkwrite() is a little different + * and wants us to directly use the page + * passed in. + */ + lock_page(mmap_page); + + if (mmap_page->mapping != mapping) { + unlock_page(mmap_page); + /* + * Sanity check - the locking in + * ocfs2_pagemkwrite() should ensure + * that this code doesn't trigger. + */ + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + + page_cache_get(mmap_page); + wc->w_pages[i] = mmap_page; + } else { + wc->w_pages[i] = find_or_create_page(mapping, index, + GFP_NOFS); + if (!wc->w_pages[i]) { ret = -ENOMEM; mlog_errno(ret); goto out; } } + if (index == target_index) + wc->w_target_page = wc->w_pages[i]; + } +out: + return ret; +} + +/* + * Prepare a single cluster for write one cluster into the file. + */ +static int ocfs2_write_cluster(struct address_space *mapping, + u32 phys, unsigned int unwritten, + struct ocfs2_alloc_context *data_ac, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_write_ctxt *wc, u32 cpos, + loff_t user_pos, unsigned user_len) +{ + int ret, i, new, should_zero = 0; + u64 v_blkno, p_blkno; + struct inode *inode = mapping->host; + + new = phys == 0 ? 1 : 0; + if (new || unwritten) + should_zero = 1; + if (new) { + u32 tmp_pos; + /* * This is safe to call with the page locks - it won't take * any additional semaphores or cluster locks. */ - tmp_pos = wc->w_cpos; + tmp_pos = cpos; ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, - &tmp_pos, 1, di_bh, handle, - data_ac, meta_ac, NULL); + &tmp_pos, 1, 0, wc->w_di_bh, + wc->w_handle, data_ac, + meta_ac, NULL); /* * This shouldn't happen because we must have already * calculated the correct meta data allocation required. The @@ -1143,159 +1167,433 @@ mlog_errno(ret); goto out; } + } else if (unwritten) { + ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, + wc->w_handle, cpos, 1, phys, + meta_ac, &wc->w_dealloc); + if (ret < 0) { + mlog_errno(ret); + goto out; + } } + if (should_zero) + v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); + else + v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; + + /* + * The only reason this should fail is due to an inability to + * find the extent added. + */ ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, NULL); if (ret < 0) { + ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, " + "at logical block %llu", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)v_blkno); + goto out; + } + + BUG_ON(p_blkno == 0); + + for(i = 0; i < wc->w_num_pages; i++) { + int tmpret; + + tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, + wc->w_pages[i], cpos, + user_pos, user_len, + should_zero); + if (tmpret) { + mlog_errno(tmpret); + if (ret == 0) + tmpret = ret; + } + } /* - * XXX: Should we go readonly here? + * We only have cleanup to do in case of allocating write. */ + if (ret && new) + ocfs2_write_failure(inode, wc, user_pos, user_len); - mlog_errno(ret); - goto out; - } +out: - BUG_ON(p_blkno == 0); + return ret; +} - for(i = 0; i < numpages; i++) { - ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], - wc, new); - if (ret < 0) { +static int ocfs2_write_cluster_by_desc(struct address_space *mapping, + struct ocfs2_alloc_context *data_ac, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_write_ctxt *wc, + loff_t pos, unsigned len) +{ + int ret, i; + struct ocfs2_write_cluster_desc *desc; + + for (i = 0; i < wc->w_clen; i++) { + desc = &wc->w_desc[i]; + + ret = ocfs2_write_cluster(mapping, desc->c_phys, + desc->c_unwritten, data_ac, meta_ac, + wc, desc->c_cpos, pos, len); + if (ret) { mlog_errno(ret); goto out; } - - copied += ret; } + ret = 0; out: - for(i = 0; i < numpages; i++) { - unlock_page(cpages[i]); - mark_page_accessed(cpages[i]); - page_cache_release(cpages[i]); - } - kfree(cpages); - - return copied ? copied : ret; + return ret; } -static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, - struct ocfs2_super *osb, loff_t pos, - size_t count, ocfs2_page_writer *cb, - void *cb_priv) +/* + * ocfs2_write_end() wants to know which parts of the target page it + * should complete the write on. It's easiest to compute them ahead of + * time when a more complete view of the write is available. + */ +static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, + struct ocfs2_write_ctxt *wc, + loff_t pos, unsigned len, int alloc) { - wc->w_count = count; - wc->w_pos = pos; - wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits; - wc->w_finished_copy = 0; + struct ocfs2_write_cluster_desc *desc; - if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) - wc->w_large_pages = 1; - else - wc->w_large_pages = 0; + wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1); + wc->w_target_to = wc->w_target_from + len; + + if (alloc == 0) + return; + + /* + * Allocating write - we may have different boundaries based + * on page size and cluster size. + * + * NOTE: We can no longer compute one value from the other as + * the actual write length and user provided length may be + * different. + */ - wc->w_write_data_page = cb; - wc->w_private = cb_priv; + if (wc->w_large_pages) { + /* + * We only care about the 1st and last cluster within + * our range and whether they should be zero'd or not. Either + * value may be extended out to the start/end of a + * newly allocated cluster. + */ + desc = &wc->w_desc[0]; + if (ocfs2_should_zero_cluster(desc)) + ocfs2_figure_cluster_boundaries(osb, + desc->c_cpos, + &wc->w_target_from, + NULL); + + desc = &wc->w_desc[wc->w_clen - 1]; + if (ocfs2_should_zero_cluster(desc)) + ocfs2_figure_cluster_boundaries(osb, + desc->c_cpos, + NULL, + &wc->w_target_to); + } else { + wc->w_target_from = 0; + wc->w_target_to = PAGE_CACHE_SIZE; + } } /* - * Write a cluster to an inode. The cluster may not be allocated yet, - * in which case it will be. This only exists for buffered writes - - * O_DIRECT takes a more "traditional" path through the kernel. + * Populate each single-cluster write descriptor in the write context + * with information about the i/o to be done. * - * The caller is responsible for incrementing pos, written counts, etc - * - * For file systems that don't support sparse files, pre-allocation - * and page zeroing up until cpos should be done prior to this - * function call. - * - * Callers should be holding i_sem, and the rw cluster lock. + * Returns the number of clusters that will have to be allocated, as + * well as a worst case estimate of the number of extent records that + * would have to be created during a write to an unwritten region. + */ +static int ocfs2_populate_write_desc(struct inode *inode, + struct ocfs2_write_ctxt *wc, + unsigned int *clusters_to_alloc, + unsigned int *extents_to_split) +{ + int ret; + struct ocfs2_write_cluster_desc *desc; + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + u32 phys = 0; + int i; + + *clusters_to_alloc = 0; + *extents_to_split = 0; + + for (i = 0; i < wc->w_clen; i++) { + desc = &wc->w_desc[i]; + desc->c_cpos = wc->w_cpos + i; + + if (num_clusters == 0) { + /* + * Need to look up the next extent record. + */ + ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, + &num_clusters, &ext_flags); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * Assume worst case - that we're writing in + * the middle of the extent. * - * Returns the number of user bytes written, or less than zero for - * error. + * We can assume that the write proceeds from + * left to right, in which case the extent + * insert code is smart enough to coalesce the + * next splits into the previous records created. */ -ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, - size_t count, ocfs2_page_writer *actor, - void *priv) + if (ext_flags & OCFS2_EXT_UNWRITTEN) + *extents_to_split = *extents_to_split + 2; + } else if (phys) { + /* + * Only increment phys if it doesn't describe + * a hole. + */ + phys++; + } + + desc->c_phys = phys; + if (phys == 0) { + desc->c_new = 1; + *clusters_to_alloc = *clusters_to_alloc + 1; + } + if (ext_flags & OCFS2_EXT_UNWRITTEN) + desc->c_unwritten = 1; + + num_clusters--; + } + + ret = 0; +out: + return ret; +} + +int ocfs2_write_begin_nolock(struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + struct buffer_head *di_bh, struct page *mmap_page) { int ret, credits = OCFS2_INODE_UPDATE_CREDITS; - ssize_t written = 0; - u32 phys; - struct inode *inode = file->f_mapping->host; + unsigned int clusters_to_alloc, extents_to_split; + struct ocfs2_write_ctxt *wc; + struct inode *inode = mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle; - struct ocfs2_write_ctxt wc; - ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv); + ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); + if (ret) { + mlog_errno(ret); + return ret; + } - ret = ocfs2_meta_lock(inode, &di_bh, 1); + ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, + &extents_to_split); if (ret) { mlog_errno(ret); goto out; } - di = (struct ocfs2_dinode *)di_bh->b_data; + + di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; /* - * Take alloc sem here to prevent concurrent lookups. That way - * the mapping, zeroing and tree manipulation within - * ocfs2_write() will be safe against ->readpage(). This - * should also serve to lock out allocation from a shared - * writeable region. + * We set w_target_from, w_target_to here so that + * ocfs2_write_end() knows which range in the target page to + * write out. An allocation requires that we write the entire + * cluster range. */ - down_write(&OCFS2_I(inode)->ip_alloc_sem); - - ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); + if (clusters_to_alloc || extents_to_split) { + /* + * XXX: We are stretching the limits of + * ocfs2_lock_allocators(). It greatly over-estimates + * the work to be done. + */ + ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, + extents_to_split, &data_ac, &meta_ac); if (ret) { mlog_errno(ret); - goto out_meta; + goto out; } - /* phys == 0 means that allocation is required. */ - if (phys == 0) { - ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); + credits = ocfs2_calc_extend_credits(inode->i_sb, di, + clusters_to_alloc); + + } + + ocfs2_set_target_boundaries(osb, wc, pos, len, + clusters_to_alloc + extents_to_split); + + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + + wc->w_handle = handle; + + /* + * We don't want this to fail in ocfs2_write_end(), so do it + * here. + */ + ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_meta; + goto out_commit; } - credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); + /* + * Fill our page array first. That way we've grabbed enough so + * that we can zero and flush if we error after adding the + * extent. + */ + ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, + clusters_to_alloc + extents_to_split, + mmap_page); + if (ret) { + mlog_errno(ret); + goto out_commit; } - ret = ocfs2_data_lock(inode, 1); + ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, + len); if (ret) { mlog_errno(ret); - goto out_meta; + goto out_commit; } - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); + if (data_ac) + ocfs2_free_alloc_context(data_ac); + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + + *pagep = wc->w_target_page; + *fsdata = wc; + return 0; +out_commit: + ocfs2_commit_trans(osb, handle); + +out: + ocfs2_free_write_ctxt(wc); + + if (data_ac) + ocfs2_free_alloc_context(data_ac); + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + return ret; +} + +int ocfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + int ret; + struct buffer_head *di_bh = NULL; + struct inode *inode = mapping->host; + + ret = ocfs2_meta_lock(inode, &di_bh, 1); + if (ret) { mlog_errno(ret); - goto out_data; + return ret; } - written = ocfs2_write(file, phys, handle, di_bh, data_ac, - meta_ac, &wc); - if (written < 0) { - ret = written; + /* + * Take alloc sem here to prevent concurrent lookups. That way + * the mapping, zeroing and tree manipulation within + * ocfs2_write() will be safe against ->readpage(). This + * should also serve to lock out allocation from a shared + * writeable region. + */ + down_write(&OCFS2_I(inode)->ip_alloc_sem); + + ret = ocfs2_data_lock(inode, 1); + if (ret) { mlog_errno(ret); - goto out_commit; + goto out_fail; } - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, + fsdata, di_bh, NULL); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_fail_data; + } + + brelse(di_bh); + + return 0; + +out_fail_data: + ocfs2_data_unlock(inode, 1); +out_fail: + up_write(&OCFS2_I(inode)->ip_alloc_sem); + + brelse(di_bh); + ocfs2_meta_unlock(inode, 1); + + return ret; +} + +int ocfs2_write_end_nolock(struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int i; + unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1); + struct inode *inode = mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_write_ctxt *wc = fsdata; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; + handle_t *handle = wc->w_handle; + struct page *tmppage; + + if (unlikely(copied < len)) { + if (!PageUptodate(wc->w_target_page)) + copied = 0; + + ocfs2_zero_new_buffers(wc->w_target_page, start+copied, + start+len); + } + flush_dcache_page(wc->w_target_page); + + for(i = 0; i < wc->w_num_pages; i++) { + tmppage = wc->w_pages[i]; + + if (tmppage == wc->w_target_page) { + from = wc->w_target_from; + to = wc->w_target_to; + + BUG_ON(from > PAGE_CACHE_SIZE || + to > PAGE_CACHE_SIZE || + to < from); + } else { + /* + * Pages adjacent to the target (if any) imply + * a hole-filling write in which case we want + * to flush their entire range. + */ + from = 0; + to = PAGE_CACHE_SIZE; + } + + if (ocfs2_should_order_data(inode)) + walk_page_buffers(wc->w_handle, page_buffers(tmppage), + from, to, NULL, + ocfs2_journal_dirty_data); + + block_commit_write(tmppage, from, to); } - pos += written; + pos += copied; if (pos > inode->i_size) { i_size_write(inode, pos); mark_inode_dirty(inode); @@ -1306,28 +1604,31 @@ di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ret = ocfs2_journal_dirty(handle, di_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, wc->w_di_bh); -out_commit: ocfs2_commit_trans(osb, handle); -out_data: - ocfs2_data_unlock(inode, 1); + ocfs2_run_deallocs(osb, &wc->w_dealloc); + + ocfs2_free_write_ctxt(wc); + + return copied; +} + +int ocfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int ret; + struct inode *inode = mapping->host; + + ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); -out_meta: + ocfs2_data_unlock(inode, 1); up_write(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_meta_unlock(inode, 1); -out: - brelse(di_bh); - if (data_ac) - ocfs2_free_alloc_context(data_ac); - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - - return written ? written : ret; + return ret; } const struct address_space_operations ocfs2_aops = { diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.h linux-2.6.22-591/fs/ocfs2/aops.h --- linux-2.6.22-570/fs/ocfs2/aops.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/aops.h 2007-12-21 15:36:12.000000000 -0500 @@ -42,57 +42,22 @@ int (*fn)( handle_t *handle, struct buffer_head *bh)); -struct ocfs2_write_ctxt; -typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, - u64 *, unsigned int *, unsigned int *); - -ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, - size_t count, ocfs2_page_writer *actor, - void *priv); - -struct ocfs2_write_ctxt { - size_t w_count; - loff_t w_pos; - u32 w_cpos; - unsigned int w_finished_copy; - - /* This is true if page_size > cluster_size */ - unsigned int w_large_pages; - - /* Filler callback and private data */ - ocfs2_page_writer *w_write_data_page; - void *w_private; - - /* Only valid for the filler callback */ - struct page *w_this_page; - unsigned int w_this_page_new; -}; - -struct ocfs2_buffered_write_priv { - char *b_src_buf; - const struct iovec *b_cur_iov; /* Current iovec */ - size_t b_cur_off; /* Offset in the - * current iovec */ -}; -int ocfs2_map_and_write_user_data(struct inode *inode, - struct ocfs2_write_ctxt *wc, - u64 *p_blkno, - unsigned int *ret_from, - unsigned int *ret_to); - -struct ocfs2_splice_write_priv { - struct splice_desc *s_sd; - struct pipe_buffer *s_buf; - struct pipe_inode_info *s_pipe; - /* Neither offset value is ever larger than one page */ - unsigned int s_offset; - unsigned int s_buf_offset; -}; -int ocfs2_map_and_write_splice_data(struct inode *inode, - struct ocfs2_write_ctxt *wc, - u64 *p_blkno, - unsigned int *ret_from, - unsigned int *ret_to); +int ocfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + +int ocfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + +int ocfs2_write_end_nolock(struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + +int ocfs2_write_begin_nolock(struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + struct buffer_head *di_bh, struct page *mmap_page); /* all ocfs2_dio_end_io()'s fault */ #define ocfs2_iocb_is_rw_locked(iocb) \ diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.c --- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.c 2007-12-21 15:36:12.000000000 -0500 @@ -1335,6 +1335,7 @@ ret = wait_event_interruptible(o2hb_steady_queue, atomic_read(®->hr_steady_iterations) == 0); if (ret) { + /* We got interrupted (hello ptrace!). Clean up */ spin_lock(&o2hb_live_lock); hb_task = reg->hr_task; reg->hr_task = NULL; @@ -1345,7 +1346,16 @@ goto out; } + /* Ok, we were woken. Make sure it wasn't by drop_item() */ + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + spin_unlock(&o2hb_live_lock); + + if (hb_task) ret = count; + else + ret = -EIO; + out: if (filp) fput(filp); @@ -1523,6 +1533,15 @@ if (hb_task) kthread_stop(hb_task); + /* + * If we're racing a dev_write(), we need to wake them. They will + * check reg->hr_task + */ + if (atomic_read(®->hr_steady_iterations) != 0) { + atomic_set(®->hr_steady_iterations, 0); + wake_up(&o2hb_steady_queue); + } + config_item_put(item); } @@ -1665,7 +1684,67 @@ } EXPORT_SYMBOL_GPL(o2hb_setup_callback); -int o2hb_register_callback(struct o2hb_callback_func *hc) +static struct o2hb_region *o2hb_find_region(const char *region_uuid) +{ + struct o2hb_region *p, *reg = NULL; + + assert_spin_locked(&o2hb_live_lock); + + list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { + if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { + reg = p; + break; + } + } + + return reg; +} + +static int o2hb_region_get(const char *region_uuid) +{ + int ret = 0; + struct o2hb_region *reg; + + spin_lock(&o2hb_live_lock); + + reg = o2hb_find_region(region_uuid); + if (!reg) + ret = -ENOENT; + spin_unlock(&o2hb_live_lock); + + if (ret) + goto out; + + ret = o2nm_depend_this_node(); + if (ret) + goto out; + + ret = o2nm_depend_item(®->hr_item); + if (ret) + o2nm_undepend_this_node(); + +out: + return ret; +} + +static void o2hb_region_put(const char *region_uuid) +{ + struct o2hb_region *reg; + + spin_lock(&o2hb_live_lock); + + reg = o2hb_find_region(region_uuid); + + spin_unlock(&o2hb_live_lock); + + if (reg) { + o2nm_undepend_item(®->hr_item); + o2nm_undepend_this_node(); + } +} + +int o2hb_register_callback(const char *region_uuid, + struct o2hb_callback_func *hc) { struct o2hb_callback_func *tmp; struct list_head *iter; @@ -1681,6 +1760,12 @@ goto out; } + if (region_uuid) { + ret = o2hb_region_get(region_uuid); + if (ret) + goto out; + } + down_write(&o2hb_callback_sem); list_for_each(iter, &hbcall->list) { @@ -1702,16 +1787,21 @@ } EXPORT_SYMBOL_GPL(o2hb_register_callback); -void o2hb_unregister_callback(struct o2hb_callback_func *hc) +void o2hb_unregister_callback(const char *region_uuid, + struct o2hb_callback_func *hc) { BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", __builtin_return_address(0), hc); + /* XXX Can this happen _with_ a region reference? */ if (list_empty(&hc->hc_item)) return; + if (region_uuid) + o2hb_region_put(region_uuid); + down_write(&o2hb_callback_sem); list_del_init(&hc->hc_item); diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.h --- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.h 2007-12-21 15:36:12.000000000 -0500 @@ -69,8 +69,10 @@ o2hb_cb_func *func, void *data, int priority); -int o2hb_register_callback(struct o2hb_callback_func *hc); -void o2hb_unregister_callback(struct o2hb_callback_func *hc); +int o2hb_register_callback(const char *region_uuid, + struct o2hb_callback_func *hc); +void o2hb_unregister_callback(const char *region_uuid, + struct o2hb_callback_func *hc); void o2hb_fill_node_map(unsigned long *map, unsigned bytes); void o2hb_init(void); diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/masklog.c linux-2.6.22-591/fs/ocfs2/cluster/masklog.c --- linux-2.6.22-570/fs/ocfs2/cluster/masklog.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/cluster/masklog.c 2007-12-21 15:36:12.000000000 -0500 @@ -74,7 +74,6 @@ #define define_mask(_name) { \ .attr = { \ .name = #_name, \ - .owner = THIS_MODULE, \ .mode = S_IRUGO | S_IWUSR, \ }, \ .mask = ML_##_name, \ diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.c --- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.c 2007-12-21 15:36:12.000000000 -0500 @@ -900,6 +900,46 @@ }, }; +int o2nm_depend_item(struct config_item *item) +{ + return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); +} + +void o2nm_undepend_item(struct config_item *item) +{ + configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item); +} + +int o2nm_depend_this_node(void) +{ + int ret = 0; + struct o2nm_node *local_node; + + local_node = o2nm_get_node_by_num(o2nm_this_node()); + if (!local_node) { + ret = -EINVAL; + goto out; + } + + ret = o2nm_depend_item(&local_node->nd_item); + o2nm_node_put(local_node); + +out: + return ret; +} + +void o2nm_undepend_this_node(void) +{ + struct o2nm_node *local_node; + + local_node = o2nm_get_node_by_num(o2nm_this_node()); + BUG_ON(!local_node); + + o2nm_undepend_item(&local_node->nd_item); + o2nm_node_put(local_node); +} + + static void __exit exit_o2nm(void) { if (ocfs2_table_header) diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.h --- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.h 2007-12-21 15:36:12.000000000 -0500 @@ -77,4 +77,9 @@ void o2nm_node_get(struct o2nm_node *node); void o2nm_node_put(struct o2nm_node *node); +int o2nm_depend_item(struct config_item *item); +void o2nm_undepend_item(struct config_item *item); +int o2nm_depend_this_node(void); +void o2nm_undepend_this_node(void); + #endif /* O2CLUSTER_NODEMANAGER_H */ diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/tcp.c linux-2.6.22-591/fs/ocfs2/cluster/tcp.c --- linux-2.6.22-570/fs/ocfs2/cluster/tcp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/cluster/tcp.c 2007-12-21 15:36:12.000000000 -0500 @@ -261,14 +261,12 @@ static void o2net_complete_nodes_nsw(struct o2net_node *nn) { - struct list_head *iter, *tmp; + struct o2net_status_wait *nsw, *tmp; unsigned int num_kills = 0; - struct o2net_status_wait *nsw; assert_spin_locked(&nn->nn_lock); - list_for_each_safe(iter, tmp, &nn->nn_status_list) { - nsw = list_entry(iter, struct o2net_status_wait, ns_node_item); + list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) { o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); num_kills++; } @@ -764,13 +762,10 @@ void o2net_unregister_handler_list(struct list_head *list) { - struct list_head *pos, *n; - struct o2net_msg_handler *nmh; + struct o2net_msg_handler *nmh, *n; write_lock(&o2net_handler_lock); - list_for_each_safe(pos, n, list) { - nmh = list_entry(pos, struct o2net_msg_handler, - nh_unregister_item); + list_for_each_entry_safe(nmh, n, list, nh_unregister_item) { mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); rb_erase(&nmh->nh_node, &o2net_handler_tree); @@ -1638,8 +1633,8 @@ void o2net_unregister_hb_callbacks(void) { - o2hb_unregister_callback(&o2net_hb_up); - o2hb_unregister_callback(&o2net_hb_down); + o2hb_unregister_callback(NULL, &o2net_hb_up); + o2hb_unregister_callback(NULL, &o2net_hb_down); } int o2net_register_hb_callbacks(void) @@ -1651,9 +1646,9 @@ o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); - ret = o2hb_register_callback(&o2net_hb_up); + ret = o2hb_register_callback(NULL, &o2net_hb_up); if (ret == 0) - ret = o2hb_register_callback(&o2net_hb_down); + ret = o2hb_register_callback(NULL, &o2net_hb_down); if (ret) o2net_unregister_hb_callbacks(); diff -Nurb linux-2.6.22-570/fs/ocfs2/dir.c linux-2.6.22-591/fs/ocfs2/dir.c --- linux-2.6.22-570/fs/ocfs2/dir.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/dir.c 2007-12-21 15:36:12.000000000 -0500 @@ -368,7 +368,7 @@ u32 offset = OCFS2_I(dir)->ip_clusters; status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, - 1, parent_fe_bh, handle, + 1, 0, parent_fe_bh, handle, data_ac, meta_ac, NULL); BUG_ON(status == -EAGAIN); if (status < 0) { diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c linux-2.6.22-591/fs/ocfs2/dlm/dlmdomain.c --- linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/dlm/dlmdomain.c 2007-12-21 15:36:12.000000000 -0500 @@ -1128,8 +1128,8 @@ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) { - o2hb_unregister_callback(&dlm->dlm_hb_up); - o2hb_unregister_callback(&dlm->dlm_hb_down); + o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); + o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); o2net_unregister_handler_list(&dlm->dlm_domain_handlers); } @@ -1141,13 +1141,13 @@ o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); - status = o2hb_register_callback(&dlm->dlm_hb_down); + status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); if (status) goto bail; o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); - status = o2hb_register_callback(&dlm->dlm_hb_up); + status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); if (status) goto bail; diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c linux-2.6.22-591/fs/ocfs2/dlm/dlmmaster.c --- linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/dlm/dlmmaster.c 2007-12-21 15:36:12.000000000 -0500 @@ -192,25 +192,20 @@ static void dlm_dump_mles(struct dlm_ctxt *dlm) { struct dlm_master_list_entry *mle; - struct list_head *iter; mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); spin_lock(&dlm->master_lock); - list_for_each(iter, &dlm->master_list) { - mle = list_entry(iter, struct dlm_master_list_entry, list); + list_for_each_entry(mle, &dlm->master_list, list) dlm_print_one_mle(mle); - } spin_unlock(&dlm->master_lock); } int dlm_dump_all_mles(const char __user *data, unsigned int len) { - struct list_head *iter; struct dlm_ctxt *dlm; spin_lock(&dlm_domain_lock); - list_for_each(iter, &dlm_domains) { - dlm = list_entry (iter, struct dlm_ctxt, list); + list_for_each_entry(dlm, &dlm_domains, list) { mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); dlm_dump_mles(dlm); } @@ -454,12 +449,10 @@ char *name, unsigned int namelen) { struct dlm_master_list_entry *tmpmle; - struct list_head *iter; assert_spin_locked(&dlm->master_lock); - list_for_each(iter, &dlm->master_list) { - tmpmle = list_entry(iter, struct dlm_master_list_entry, list); + list_for_each_entry(tmpmle, &dlm->master_list, list) { if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); @@ -472,13 +465,10 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) { struct dlm_master_list_entry *mle; - struct list_head *iter; assert_spin_locked(&dlm->spinlock); - list_for_each(iter, &dlm->mle_hb_events) { - mle = list_entry(iter, struct dlm_master_list_entry, - hb_events); + list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { if (node_up) dlm_mle_node_up(dlm, mle, NULL, idx); else @@ -2434,7 +2424,7 @@ int ret; int i; int count = 0; - struct list_head *queue, *iter; + struct list_head *queue; struct dlm_lock *lock; assert_spin_locked(&res->spinlock); @@ -2453,8 +2443,7 @@ ret = 0; queue = &res->granted; for (i = 0; i < 3; i++) { - list_for_each(iter, queue) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, queue, list) { ++count; if (lock->ml.node == dlm->node_num) { mlog(0, "found a lock owned by this node still " @@ -2923,18 +2912,16 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - struct list_head *iter, *iter2; struct list_head *queue = &res->granted; int i, bit; - struct dlm_lock *lock; + struct dlm_lock *lock, *next; assert_spin_locked(&res->spinlock); BUG_ON(res->owner == dlm->node_num); for (i=0; i<3; i++) { - list_for_each_safe(iter, iter2, queue) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry_safe(lock, next, queue, list) { if (lock->ml.node != dlm->node_num) { mlog(0, "putting lock for node %u\n", lock->ml.node); @@ -2976,7 +2963,6 @@ { int i; struct list_head *queue = &res->granted; - struct list_head *iter; struct dlm_lock *lock; int nodenum; @@ -2984,10 +2970,9 @@ spin_lock(&res->spinlock); for (i=0; i<3; i++) { - list_for_each(iter, queue) { + list_for_each_entry(lock, queue, list) { /* up to the caller to make sure this node * is alive */ - lock = list_entry (iter, struct dlm_lock, list); if (lock->ml.node != dlm->node_num) { spin_unlock(&res->spinlock); return lock->ml.node; @@ -3234,8 +3219,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) { - struct list_head *iter, *iter2; - struct dlm_master_list_entry *mle; + struct dlm_master_list_entry *mle, *next; struct dlm_lock_resource *res; unsigned int hash; @@ -3245,9 +3229,7 @@ /* clean the master list */ spin_lock(&dlm->master_lock); - list_for_each_safe(iter, iter2, &dlm->master_list) { - mle = list_entry(iter, struct dlm_master_list_entry, list); - + list_for_each_entry_safe(mle, next, &dlm->master_list, list) { BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && mle->type != DLM_MLE_MIGRATION); diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c linux-2.6.22-591/fs/ocfs2/dlm/dlmrecovery.c --- linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/dlm/dlmrecovery.c 2007-12-21 15:36:12.000000000 -0500 @@ -158,8 +158,7 @@ struct dlm_ctxt *dlm = container_of(work, struct dlm_ctxt, dispatched_work); LIST_HEAD(tmp_list); - struct list_head *iter, *iter2; - struct dlm_work_item *item; + struct dlm_work_item *item, *next; dlm_workfunc_t *workfunc; int tot=0; @@ -167,13 +166,12 @@ list_splice_init(&dlm->work_list, &tmp_list); spin_unlock(&dlm->work_lock); - list_for_each_safe(iter, iter2, &tmp_list) { + list_for_each_entry(item, &tmp_list, list) { tot++; } mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); - list_for_each_safe(iter, iter2, &tmp_list) { - item = list_entry(iter, struct dlm_work_item, list); + list_for_each_entry_safe(item, next, &tmp_list, list) { workfunc = item->func; list_del_init(&item->list); @@ -549,7 +547,6 @@ { int status = 0; struct dlm_reco_node_data *ndata; - struct list_head *iter; int all_nodes_done; int destroy = 0; int pass = 0; @@ -567,8 +564,7 @@ /* safe to access the node data list without a lock, since this * process is the only one to change the list */ - list_for_each(iter, &dlm->reco.node_data) { - ndata = list_entry (iter, struct dlm_reco_node_data, list); + list_for_each_entry(ndata, &dlm->reco.node_data, list) { BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); ndata->state = DLM_RECO_NODE_DATA_REQUESTING; @@ -655,9 +651,7 @@ * done, or if anyone died */ all_nodes_done = 1; spin_lock(&dlm_reco_state_lock); - list_for_each(iter, &dlm->reco.node_data) { - ndata = list_entry (iter, struct dlm_reco_node_data, list); - + list_for_each_entry(ndata, &dlm->reco.node_data, list) { mlog(0, "checking recovery state of node %u\n", ndata->node_num); switch (ndata->state) { @@ -774,16 +768,14 @@ static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) { - struct list_head *iter, *iter2; - struct dlm_reco_node_data *ndata; + struct dlm_reco_node_data *ndata, *next; LIST_HEAD(tmplist); spin_lock(&dlm_reco_state_lock); list_splice_init(&dlm->reco.node_data, &tmplist); spin_unlock(&dlm_reco_state_lock); - list_for_each_safe(iter, iter2, &tmplist) { - ndata = list_entry (iter, struct dlm_reco_node_data, list); + list_for_each_entry_safe(ndata, next, &tmplist, list) { list_del_init(&ndata->list); kfree(ndata); } @@ -876,7 +868,6 @@ struct dlm_lock_resource *res; struct dlm_ctxt *dlm; LIST_HEAD(resources); - struct list_head *iter; int ret; u8 dead_node, reco_master; int skip_all_done = 0; @@ -920,8 +911,7 @@ /* any errors returned will be due to the new_master dying, * the dlm_reco_thread should detect this */ - list_for_each(iter, &resources) { - res = list_entry (iter, struct dlm_lock_resource, recovering); + list_for_each_entry(res, &resources, recovering) { ret = dlm_send_one_lockres(dlm, res, mres, reco_master, DLM_MRES_RECOVERY); if (ret < 0) { @@ -983,7 +973,6 @@ { struct dlm_ctxt *dlm = data; struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; - struct list_head *iter; struct dlm_reco_node_data *ndata = NULL; int ret = -EINVAL; @@ -1000,8 +989,7 @@ dlm->reco.dead_node, done->node_idx, dlm->node_num); spin_lock(&dlm_reco_state_lock); - list_for_each(iter, &dlm->reco.node_data) { - ndata = list_entry (iter, struct dlm_reco_node_data, list); + list_for_each_entry(ndata, &dlm->reco.node_data, list) { if (ndata->node_num != done->node_idx) continue; @@ -1049,13 +1037,11 @@ struct list_head *list, u8 dead_node) { - struct dlm_lock_resource *res; - struct list_head *iter, *iter2; + struct dlm_lock_resource *res, *next; struct dlm_lock *lock; spin_lock(&dlm->spinlock); - list_for_each_safe(iter, iter2, &dlm->reco.resources) { - res = list_entry (iter, struct dlm_lock_resource, recovering); + list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { /* always prune any $RECOVERY entries for dead nodes, * otherwise hangs can occur during later recovery */ if (dlm_is_recovery_lock(res->lockname.name, @@ -1169,7 +1155,7 @@ u8 flags, u8 master) { /* mres here is one full page */ - memset(mres, 0, PAGE_SIZE); + clear_page(mres); mres->lockname_len = namelen; memcpy(mres->lockname, lockname, namelen); mres->num_locks = 0; @@ -1252,7 +1238,7 @@ struct dlm_migratable_lockres *mres, u8 send_to, u8 flags) { - struct list_head *queue, *iter; + struct list_head *queue; int total_locks, i; u64 mig_cookie = 0; struct dlm_lock *lock; @@ -1278,9 +1264,7 @@ total_locks = 0; for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { queue = dlm_list_idx_to_ptr(res, i); - list_for_each(iter, queue) { - lock = list_entry (iter, struct dlm_lock, list); - + list_for_each_entry(lock, queue, list) { /* add another lock. */ total_locks++; if (!dlm_add_lock_to_array(lock, mres, i)) @@ -1717,7 +1701,6 @@ struct dlm_lockstatus *lksb = NULL; int ret = 0; int i, j, bad; - struct list_head *iter; struct dlm_lock *lock = NULL; u8 from = O2NM_MAX_NODES; unsigned int added = 0; @@ -1755,8 +1738,7 @@ spin_lock(&res->spinlock); for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each(iter, tmpq) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, tmpq, list) { if (lock->ml.cookie != ml->cookie) lock = NULL; else @@ -1930,8 +1912,8 @@ struct dlm_lock_resource *res) { int i; - struct list_head *queue, *iter, *iter2; - struct dlm_lock *lock; + struct list_head *queue; + struct dlm_lock *lock, *next; res->state |= DLM_LOCK_RES_RECOVERING; if (!list_empty(&res->recovering)) { @@ -1947,8 +1929,7 @@ /* find any pending locks and put them back on proper list */ for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { queue = dlm_list_idx_to_ptr(res, i); - list_for_each_safe(iter, iter2, queue) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry_safe(lock, next, queue, list) { dlm_lock_get(lock); if (lock->convert_pending) { /* move converting lock back to granted */ @@ -2013,18 +1994,15 @@ u8 dead_node, u8 new_master) { int i; - struct list_head *iter, *iter2; struct hlist_node *hash_iter; struct hlist_head *bucket; - - struct dlm_lock_resource *res; + struct dlm_lock_resource *res, *next; mlog_entry_void(); assert_spin_locked(&dlm->spinlock); - list_for_each_safe(iter, iter2, &dlm->reco.resources) { - res = list_entry (iter, struct dlm_lock_resource, recovering); + list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { if (res->owner == dead_node) { list_del_init(&res->recovering); spin_lock(&res->spinlock); @@ -2099,7 +2077,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 dead_node) { - struct list_head *iter, *queue; + struct list_head *queue; struct dlm_lock *lock; int blank_lvb = 0, local = 0; int i; @@ -2121,8 +2099,7 @@ for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { queue = dlm_list_idx_to_ptr(res, i); - list_for_each(iter, queue) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, queue, list) { if (lock->ml.node == search_node) { if (dlm_lvb_needs_invalidation(lock, local)) { /* zero the lksb lvb and lockres lvb */ @@ -2143,8 +2120,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 dead_node) { - struct list_head *iter, *tmpiter; - struct dlm_lock *lock; + struct dlm_lock *lock, *next; unsigned int freed = 0; /* this node is the lockres master: @@ -2155,24 +2131,21 @@ assert_spin_locked(&res->spinlock); /* TODO: check pending_asts, pending_basts here */ - list_for_each_safe(iter, tmpiter, &res->granted) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry_safe(lock, next, &res->granted, list) { if (lock->ml.node == dead_node) { list_del_init(&lock->list); dlm_lock_put(lock); freed++; } } - list_for_each_safe(iter, tmpiter, &res->converting) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry_safe(lock, next, &res->converting, list) { if (lock->ml.node == dead_node) { list_del_init(&lock->list); dlm_lock_put(lock); freed++; } } - list_for_each_safe(iter, tmpiter, &res->blocked) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry_safe(lock, next, &res->blocked, list) { if (lock->ml.node == dead_node) { list_del_init(&lock->list); dlm_lock_put(lock); diff -Nurb linux-2.6.22-570/fs/ocfs2/dlmglue.c linux-2.6.22-591/fs/ocfs2/dlmglue.c --- linux-2.6.22-570/fs/ocfs2/dlmglue.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/dlmglue.c 2007-12-21 15:36:12.000000000 -0500 @@ -600,15 +600,13 @@ static void lockres_set_flags(struct ocfs2_lock_res *lockres, unsigned long newflags) { - struct list_head *pos, *tmp; - struct ocfs2_mask_waiter *mw; + struct ocfs2_mask_waiter *mw, *tmp; assert_spin_locked(&lockres->l_lock); lockres->l_flags = newflags; - list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) { - mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item); + list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) continue; diff -Nurb linux-2.6.22-570/fs/ocfs2/endian.h linux-2.6.22-591/fs/ocfs2/endian.h --- linux-2.6.22-570/fs/ocfs2/endian.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/endian.h 2007-12-21 15:36:12.000000000 -0500 @@ -32,6 +32,11 @@ *var = cpu_to_le32(le32_to_cpu(*var) + val); } +static inline void le64_add_cpu(__le64 *var, u64 val) +{ + *var = cpu_to_le64(le64_to_cpu(*var) + val); +} + static inline void le32_and_cpu(__le32 *var, u32 val) { *var = cpu_to_le32(le32_to_cpu(*var) & val); diff -Nurb linux-2.6.22-570/fs/ocfs2/extent_map.c linux-2.6.22-591/fs/ocfs2/extent_map.c --- linux-2.6.22-570/fs/ocfs2/extent_map.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/extent_map.c 2007-12-21 15:36:12.000000000 -0500 @@ -109,17 +109,14 @@ */ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) { - struct list_head *p, *n; - struct ocfs2_extent_map_item *emi; + struct ocfs2_extent_map_item *emi, *n; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_extent_map *em = &oi->ip_extent_map; LIST_HEAD(tmp_list); unsigned int range; spin_lock(&oi->ip_lock); - list_for_each_safe(p, n, &em->em_list) { - emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); - + list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { if (emi->ei_cpos >= cpos) { /* Full truncate of this record. */ list_move(&emi->ei_list, &tmp_list); @@ -136,8 +133,7 @@ } spin_unlock(&oi->ip_lock); - list_for_each_safe(p, n, &tmp_list) { - emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); + list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { list_del(&emi->ei_list); kfree(emi); } @@ -377,37 +373,6 @@ return ret; } -/* - * Return the index of the extent record which contains cluster #v_cluster. - * -1 is returned if it was not found. - * - * Should work fine on interior and exterior nodes. - */ -static int ocfs2_search_extent_list(struct ocfs2_extent_list *el, - u32 v_cluster) -{ - int ret = -1; - int i; - struct ocfs2_extent_rec *rec; - u32 rec_end, rec_start, clusters; - - for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { - rec = &el->l_recs[i]; - - rec_start = le32_to_cpu(rec->e_cpos); - clusters = ocfs2_rec_clusters(el, rec); - - rec_end = rec_start + clusters; - - if (v_cluster >= rec_start && v_cluster < rec_end) { - ret = i; - break; - } - } - - return ret; -} - int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, unsigned int *extent_flags) diff -Nurb linux-2.6.22-570/fs/ocfs2/file.c linux-2.6.22-591/fs/ocfs2/file.c --- linux-2.6.22-570/fs/ocfs2/file.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/file.c 2007-12-21 15:36:12.000000000 -0500 @@ -326,9 +326,6 @@ (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)new_i_size); - unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(inode->i_mapping, new_i_size); - fe = (struct ocfs2_dinode *) di_bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); @@ -363,16 +360,23 @@ if (new_i_size == le64_to_cpu(fe->i_size)) goto bail; + down_write(&OCFS2_I(inode)->ip_alloc_sem); + /* This forces other nodes to sync and drop their pages. Do * this even if we have a truncate without allocation change - * ocfs2 cluster sizes can be much greater than page size, so * we have to truncate them anyway. */ status = ocfs2_data_lock(inode, 1); if (status < 0) { + up_write(&OCFS2_I(inode)->ip_alloc_sem); + mlog_errno(status); goto bail; } + unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); + /* alright, we're going to need to do a full blown alloc size * change. Orphan the inode so that recovery can complete the * truncate if necessary. This does the task of marking @@ -399,6 +403,8 @@ bail_unlock_data: ocfs2_data_unlock(inode, 1); + up_write(&OCFS2_I(inode)->ip_alloc_sem); + bail: mlog_exit(status); @@ -419,6 +425,7 @@ struct inode *inode, u32 *logical_offset, u32 clusters_to_add, + int mark_unwritten, struct buffer_head *fe_bh, handle_t *handle, struct ocfs2_alloc_context *data_ac, @@ -431,9 +438,13 @@ enum ocfs2_alloc_restarted reason = RESTART_NONE; u32 bit_off, num_bits; u64 block; + u8 flags = 0; BUG_ON(!clusters_to_add); + if (mark_unwritten) + flags = OCFS2_EXT_UNWRITTEN; + free_extents = ocfs2_num_free_extents(osb, inode, fe); if (free_extents < 0) { status = free_extents; @@ -483,7 +494,7 @@ num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); status = ocfs2_insert_extent(osb, handle, inode, fe_bh, *logical_offset, block, num_bits, - meta_ac); + flags, meta_ac); if (status < 0) { mlog_errno(status); goto leave; @@ -516,25 +527,28 @@ * For a given allocation, determine which allocators will need to be * accessed, and lock them, reserving the appropriate number of bits. * - * Called from ocfs2_extend_allocation() for file systems which don't - * support holes, and from ocfs2_write() for file systems which - * understand sparse inodes. + * Sparse file systems call this from ocfs2_write_begin_nolock() + * and ocfs2_allocate_unwritten_extents(). + * + * File systems which don't support holes call this from + * ocfs2_extend_allocation(). */ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, - u32 clusters_to_add, + u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **meta_ac) { int ret, num_free_extents; + unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); *meta_ac = NULL; *data_ac = NULL; mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " - "clusters_to_add = %u\n", + "clusters_to_add = %u, extents_to_split = %u\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), - le32_to_cpu(di->i_clusters), clusters_to_add); + le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); num_free_extents = ocfs2_num_free_extents(osb, inode, di); if (num_free_extents < 0) { @@ -552,9 +566,12 @@ * * Most of the time we'll only be seeing this 1 cluster at a time * anyway. + * + * Always lock for any unwritten extents - we might want to + * remove blocks for a merge. */ if (!num_free_extents || - (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); if (ret < 0) { if (ret != -ENOSPC) @@ -585,14 +602,13 @@ return ret; } -static int ocfs2_extend_allocation(struct inode *inode, - u32 clusters_to_add) +static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, + u32 clusters_to_add, int mark_unwritten) { int status = 0; int restart_func = 0; - int drop_alloc_sem = 0; int credits; - u32 prev_clusters, logical_start; + u32 prev_clusters; struct buffer_head *bh = NULL; struct ocfs2_dinode *fe = NULL; handle_t *handle = NULL; @@ -607,7 +623,7 @@ * This function only exists for file systems which don't * support holes. */ - BUG_ON(ocfs2_sparse_alloc(osb)); + BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, OCFS2_BH_CACHED, inode); @@ -623,19 +639,10 @@ goto leave; } - logical_start = OCFS2_I(inode)->ip_clusters; - restart_all: BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); - /* blocks peope in read/write from reading our allocation - * until we're done changing it. We depend on i_mutex to block - * other extend/truncate calls while we're here. Ordering wrt - * start_trans is important here -- always do it before! */ - down_write(&OCFS2_I(inode)->ip_alloc_sem); - drop_alloc_sem = 1; - - status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, + status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, &meta_ac); if (status) { mlog_errno(status); @@ -668,6 +675,7 @@ inode, &logical_start, clusters_to_add, + mark_unwritten, bh, handle, data_ac, @@ -720,10 +728,6 @@ OCFS2_I(inode)->ip_clusters, i_size_read(inode)); leave: - if (drop_alloc_sem) { - up_write(&OCFS2_I(inode)->ip_alloc_sem); - drop_alloc_sem = 0; - } if (handle) { ocfs2_commit_trans(osb, handle); handle = NULL; @@ -749,6 +753,25 @@ return status; } +static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, + u32 clusters_to_add, int mark_unwritten) +{ + int ret; + + /* + * The alloc sem blocks peope in read/write from reading our + * allocation until we're done changing it. We depend on + * i_mutex to block other extend/truncate calls while we're + * here. + */ + down_write(&OCFS2_I(inode)->ip_alloc_sem); + ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add, + mark_unwritten); + up_write(&OCFS2_I(inode)->ip_alloc_sem); + + return ret; +} + /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to * worry about recursive locking in ->prepare_write() and @@ -890,7 +913,9 @@ } if (clusters_to_add) { - ret = ocfs2_extend_allocation(inode, clusters_to_add); + ret = ocfs2_extend_allocation(inode, + OCFS2_I(inode)->ip_clusters, + clusters_to_add, 0); if (ret < 0) { mlog_errno(ret); goto out_unlock; @@ -997,6 +1022,13 @@ goto bail_unlock; } + /* + * This will intentionally not wind up calling vmtruncate(), + * since all the work for a size change has been done above. + * Otherwise, we could get into problems with truncate as + * ip_alloc_sem is used there to protect against i_size + * changes. + */ status = inode_setattr(inode, attr); if (status < 0) { mlog_errno(status); @@ -1072,17 +1104,16 @@ return ret; } -static int ocfs2_write_remove_suid(struct inode *inode) +static int __ocfs2_write_remove_suid(struct inode *inode, + struct buffer_head *bh) { int ret; - struct buffer_head *bh = NULL; - struct ocfs2_inode_info *oi = OCFS2_I(inode); handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di; mlog_entry("(Inode %llu, mode 0%o)\n", - (unsigned long long)oi->ip_blkno, inode->i_mode); + (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (handle == NULL) { @@ -1091,17 +1122,11 @@ goto out; } - ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); - if (ret < 0) { - mlog_errno(ret); - goto out_trans; - } - ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); - goto out_bh; + goto out_trans; } inode->i_mode &= ~S_ISUID; @@ -1114,8 +1139,7 @@ ret = ocfs2_journal_dirty(handle, bh); if (ret < 0) mlog_errno(ret); -out_bh: - brelse(bh); + out_trans: ocfs2_commit_trans(osb, handle); out: @@ -1161,6 +1185,211 @@ return ret; } +static int ocfs2_write_remove_suid(struct inode *inode) +{ + int ret; + struct buffer_head *bh = NULL; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), + oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = __ocfs2_write_remove_suid(inode, bh); +out: + brelse(bh); + return ret; +} + +/* + * Allocate enough extents to cover the region starting at byte offset + * start for len bytes. Existing extents are skipped, any extents + * added are marked as "unwritten". + */ +static int ocfs2_allocate_unwritten_extents(struct inode *inode, + u64 start, u64 len) +{ + int ret; + u32 cpos, phys_cpos, clusters, alloc_size; + + /* + * We consider both start and len to be inclusive. + */ + cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; + clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); + clusters -= cpos; + + while (clusters) { + ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, + &alloc_size, NULL); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * Hole or existing extent len can be arbitrary, so + * cap it to our own allocation request. + */ + if (alloc_size > clusters) + alloc_size = clusters; + + if (phys_cpos) { + /* + * We already have an allocation at this + * region so we can safely skip it. + */ + goto next; + } + + ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); + if (ret) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + +next: + cpos += alloc_size; + clusters -= alloc_size; + } + + ret = 0; +out: + return ret; +} + +/* + * Parts of this function taken from xfs_change_file_space() + */ +int ocfs2_change_file_space(struct file *file, unsigned int cmd, + struct ocfs2_space_resv *sr) +{ + int ret; + s64 llen; + struct inode *inode = file->f_path.dentry->d_inode; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *di_bh = NULL; + handle_t *handle; + unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits); + + if (!ocfs2_writes_unwritten_extents(osb)) + return -ENOTTY; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; + + mutex_lock(&inode->i_mutex); + + /* + * This prevents concurrent writes on other nodes + */ + ret = ocfs2_rw_lock(inode, 1); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_meta_lock(inode, &di_bh, 1); + if (ret) { + mlog_errno(ret); + goto out_rw_unlock; + } + + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { + ret = -EPERM; + goto out_meta_unlock; + } + + switch (sr->l_whence) { + case 0: /*SEEK_SET*/ + break; + case 1: /*SEEK_CUR*/ + sr->l_start += file->f_pos; + break; + case 2: /*SEEK_END*/ + sr->l_start += i_size_read(inode); + break; + default: + ret = -EINVAL; + goto out_meta_unlock; + } + sr->l_whence = 0; + + llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; + + if (sr->l_start < 0 + || sr->l_start > max_off + || (sr->l_start + llen) < 0 + || (sr->l_start + llen) > max_off) { + ret = -EINVAL; + goto out_meta_unlock; + } + + if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { + if (sr->l_len <= 0) { + ret = -EINVAL; + goto out_meta_unlock; + } + } + + if (should_remove_suid(file->f_path.dentry)) { + ret = __ocfs2_write_remove_suid(inode, di_bh); + if (ret) { + mlog_errno(ret); + goto out_meta_unlock; + } + } + + down_write(&OCFS2_I(inode)->ip_alloc_sem); + /* + * This takes unsigned offsets, but the signed ones we pass + * have been checked against overflow above. + */ + ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, sr->l_len); + up_write(&OCFS2_I(inode)->ip_alloc_sem); + if (ret) { + mlog_errno(ret); + goto out_meta_unlock; + } + + /* + * We update c/mtime for these changes + */ + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_meta_unlock; + } + + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); + if (ret < 0) + mlog_errno(ret); + + ocfs2_commit_trans(osb, handle); + +out_meta_unlock: + brelse(di_bh); + ocfs2_meta_unlock(inode, 1); +out_rw_unlock: + ocfs2_rw_unlock(inode, 1); + + mutex_unlock(&inode->i_mutex); +out: + return ret; +} + static int ocfs2_prepare_inode_for_write(struct dentry *dentry, loff_t *ppos, size_t count, @@ -1331,15 +1560,16 @@ *basep = base; } -static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, +static struct page * ocfs2_get_write_source(char **ret_src_buf, const struct iovec *cur_iov, size_t iov_offset) { int ret; - char *buf; + char *buf = cur_iov->iov_base + iov_offset; struct page *src_page = NULL; + unsigned long off; - buf = cur_iov->iov_base + iov_offset; + off = (unsigned long)(buf) & ~PAGE_CACHE_MASK; if (!segment_eq(get_fs(), KERNEL_DS)) { /* @@ -1378,10 +1608,12 @@ { int ret = 0; ssize_t copied, total = 0; - size_t iov_offset = 0; + size_t iov_offset = 0, bytes; + loff_t pos; const struct iovec *cur_iov = iov; - struct ocfs2_buffered_write_priv bp; - struct page *page; + struct page *user_page, *page; + char *buf, *dst; + void *fsdata; /* * handle partial DIO write. Adjust cur_iov if needed. @@ -1389,21 +1621,38 @@ ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); do { - bp.b_cur_off = iov_offset; - bp.b_cur_iov = cur_iov; + pos = *ppos; - page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset); + if (IS_ERR(user_page)) { + ret = PTR_ERR(user_page); goto out; } - copied = ocfs2_buffered_write_cluster(file, *ppos, count, - ocfs2_map_and_write_user_data, - &bp); + /* Stay within our page boundaries */ + bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)), + (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK))); + /* Stay within the vector boundary */ + bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset); + /* Stay within count */ + bytes = min(bytes, count); + + page = NULL; + ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0, + &page, &fsdata); + if (ret) { + mlog_errno(ret); + goto out; + } - ocfs2_put_write_source(&bp, page); + dst = kmap_atomic(page, KM_USER0); + memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes); + kunmap_atomic(dst, KM_USER0); + flush_dcache_page(page); + ocfs2_put_write_source(user_page); + copied = ocfs2_write_end(file, file->f_mapping, pos, bytes, + bytes, page, fsdata); if (copied < 0) { mlog_errno(copied); ret = copied; @@ -1411,7 +1660,7 @@ } total += copied; - *ppos = *ppos + copied; + *ppos = pos + copied; count -= copied; ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); @@ -1581,52 +1830,46 @@ struct pipe_buffer *buf, struct splice_desc *sd) { - int ret, count, total = 0; + int ret, count; ssize_t copied = 0; - struct ocfs2_splice_write_priv sp; + struct file *file = sd->file; + unsigned int offset; + struct page *page = NULL; + void *fsdata; + char *src, *dst; ret = buf->ops->pin(pipe, buf); if (ret) goto out; - sp.s_sd = sd; - sp.s_buf = buf; - sp.s_pipe = pipe; - sp.s_offset = sd->pos & ~PAGE_CACHE_MASK; - sp.s_buf_offset = buf->offset; - + offset = sd->pos & ~PAGE_CACHE_MASK; count = sd->len; - if (count + sp.s_offset > PAGE_CACHE_SIZE) - count = PAGE_CACHE_SIZE - sp.s_offset; + if (count + offset > PAGE_CACHE_SIZE) + count = PAGE_CACHE_SIZE - offset; - do { - /* - * splice wants us to copy up to one page at a - * time. For pagesize > cluster size, this means we - * might enter ocfs2_buffered_write_cluster() more - * than once, so keep track of our progress here. - */ - copied = ocfs2_buffered_write_cluster(sd->file, - (loff_t)sd->pos + total, - count, - ocfs2_map_and_write_splice_data, - &sp); + ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0, + &page, &fsdata); + if (ret) { + mlog_errno(ret); + goto out; + } + + src = buf->ops->map(pipe, buf, 1); + dst = kmap_atomic(page, KM_USER1); + memcpy(dst + offset, src + buf->offset, count); + kunmap_atomic(page, KM_USER1); + buf->ops->unmap(pipe, buf, src); + + copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, + page, fsdata); if (copied < 0) { mlog_errno(copied); ret = copied; goto out; } - - count -= copied; - sp.s_offset += copied; - sp.s_buf_offset += copied; - total += copied; - } while (count); - - ret = 0; out: - return total ? total : ret; + return copied ? copied : ret; } static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, diff -Nurb linux-2.6.22-570/fs/ocfs2/file.h linux-2.6.22-591/fs/ocfs2/file.h --- linux-2.6.22-570/fs/ocfs2/file.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/file.h 2007-12-21 15:36:12.000000000 -0500 @@ -39,15 +39,16 @@ }; int ocfs2_do_extend_allocation(struct ocfs2_super *osb, struct inode *inode, - u32 *cluster_start, + u32 *logical_offset, u32 clusters_to_add, + int mark_unwritten, struct buffer_head *fe_bh, handle_t *handle, struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, - enum ocfs2_alloc_restarted *reason); + enum ocfs2_alloc_restarted *reason_ret); int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, - u32 clusters_to_add, + u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **meta_ac); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); @@ -61,4 +62,7 @@ int ocfs2_update_inode_atime(struct inode *inode, struct buffer_head *bh); +int ocfs2_change_file_space(struct file *file, unsigned int cmd, + struct ocfs2_space_resv *sr); + #endif /* OCFS2_FILE_H */ diff -Nurb linux-2.6.22-570/fs/ocfs2/heartbeat.c linux-2.6.22-591/fs/ocfs2/heartbeat.c --- linux-2.6.22-570/fs/ocfs2/heartbeat.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/heartbeat.c 2007-12-21 15:36:12.000000000 -0500 @@ -157,16 +157,16 @@ if (ocfs2_mount_local(osb)) return 0; - status = o2hb_register_callback(&osb->osb_hb_down); + status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); if (status < 0) { mlog_errno(status); goto bail; } - status = o2hb_register_callback(&osb->osb_hb_up); + status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); if (status < 0) { mlog_errno(status); - o2hb_unregister_callback(&osb->osb_hb_down); + o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); } bail: @@ -178,8 +178,8 @@ if (ocfs2_mount_local(osb)) return; - o2hb_unregister_callback(&osb->osb_hb_down); - o2hb_unregister_callback(&osb->osb_hb_up); + o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); + o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); } void ocfs2_stop_heartbeat(struct ocfs2_super *osb) @@ -209,7 +209,7 @@ envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[2] = NULL; - ret = call_usermodehelper(argv[0], argv, envp, 1); + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (ret < 0) mlog_errno(ret); } diff -Nurb linux-2.6.22-570/fs/ocfs2/ioctl.c linux-2.6.22-591/fs/ocfs2/ioctl.c --- linux-2.6.22-570/fs/ocfs2/ioctl.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/ioctl.c 2007-12-21 15:36:12.000000000 -0500 @@ -14,6 +14,7 @@ #include "ocfs2.h" #include "alloc.h" #include "dlmglue.h" +#include "file.h" #include "inode.h" #include "journal.h" @@ -115,6 +116,7 @@ { unsigned int flags; int status; + struct ocfs2_space_resv sr; switch (cmd) { case OCFS2_IOC_GETFLAGS: @@ -130,6 +132,12 @@ return ocfs2_set_inode_attr(inode, flags, OCFS2_FL_MODIFIABLE); + case OCFS2_IOC_RESVSP: + case OCFS2_IOC_RESVSP64: + if (copy_from_user(&sr, (int __user *) arg, sizeof(sr))) + return -EFAULT; + + return ocfs2_change_file_space(filp, cmd, &sr); default: return -ENOTTY; } @@ -148,6 +156,9 @@ case OCFS2_IOC32_SETFLAGS: cmd = OCFS2_IOC_SETFLAGS; break; + case OCFS2_IOC_RESVSP: + case OCFS2_IOC_RESVSP64: + break; default: return -ENOIOCTLCMD; } diff -Nurb linux-2.6.22-570/fs/ocfs2/journal.c linux-2.6.22-591/fs/ocfs2/journal.c --- linux-2.6.22-570/fs/ocfs2/journal.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/journal.c 2007-12-21 15:36:12.000000000 -0500 @@ -722,8 +722,7 @@ container_of(work, struct ocfs2_journal, j_recovery_work); struct ocfs2_super *osb = journal->j_osb; struct ocfs2_dinode *la_dinode, *tl_dinode; - struct ocfs2_la_recovery_item *item; - struct list_head *p, *n; + struct ocfs2_la_recovery_item *item, *n; LIST_HEAD(tmp_la_list); mlog_entry_void(); @@ -734,8 +733,7 @@ list_splice_init(&journal->j_la_cleanups, &tmp_la_list); spin_unlock(&journal->j_lock); - list_for_each_safe(p, n, &tmp_la_list) { - item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); + list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { list_del_init(&item->lri_list); mlog(0, "Complete recovery for slot %d\n", item->lri_slot); diff -Nurb linux-2.6.22-570/fs/ocfs2/mmap.c linux-2.6.22-591/fs/ocfs2/mmap.c --- linux-2.6.22-570/fs/ocfs2/mmap.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/mmap.c 2007-12-21 15:36:12.000000000 -0500 @@ -37,38 +37,48 @@ #include "ocfs2.h" +#include "aops.h" #include "dlmglue.h" #include "file.h" #include "inode.h" #include "mmap.h" -static struct page *ocfs2_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) { - struct page *page = NOPAGE_SIGBUS; - sigset_t blocked, oldset; - int ret; - - mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, - type); - - /* The best way to deal with signals in this path is + /* The best way to deal with signals in the vm path is * to block them upfront, rather than allowing the * locking paths to return -ERESTARTSYS. */ - sigfillset(&blocked); + sigfillset(blocked); - /* We should technically never get a bad ret return + /* We should technically never get a bad return value * from sigprocmask */ - ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); + return sigprocmask(SIG_BLOCK, blocked, oldset); +} + +static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) +{ + return sigprocmask(SIG_SETMASK, oldset, NULL); +} + +static struct page *ocfs2_fault(struct vm_area_struct *area, + struct fault_data *fdata) +{ + struct page *page = NULL; + sigset_t blocked, oldset; + int ret; + + mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff); + + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); if (ret < 0) { + fdata->type = VM_FAULT_SIGBUS; mlog_errno(ret); goto out; } - page = filemap_nopage(area, address, type); + page = filemap_fault(area, fdata); - ret = sigprocmask(SIG_SETMASK, &oldset, NULL); + ret = ocfs2_vm_op_unblock_sigs(&oldset); if (ret < 0) mlog_errno(ret); out: @@ -76,28 +86,136 @@ return page; } -static struct vm_operations_struct ocfs2_file_vm_ops = { - .nopage = ocfs2_nopage, -}; +static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, + struct page *page) +{ + int ret; + struct address_space *mapping = inode->i_mapping; + loff_t pos = page->index << PAGE_CACHE_SHIFT; + unsigned int len = PAGE_CACHE_SIZE; + pgoff_t last_index; + struct page *locked_page = NULL; + void *fsdata; + loff_t size = i_size_read(inode); -int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) + /* + * Another node might have truncated while we were waiting on + * cluster locks. + */ + last_index = size >> PAGE_CACHE_SHIFT; + if (page->index > last_index) { + ret = -EINVAL; + goto out; + } + + /* + * The i_size check above doesn't catch the case where nodes + * truncated and then re-extended the file. We'll re-check the + * page mapping after taking the page lock inside of + * ocfs2_write_begin_nolock(). + */ + if (!PageUptodate(page) || page->mapping != inode->i_mapping) { + ret = -EINVAL; + goto out; + } + + /* + * Call ocfs2_write_begin() and ocfs2_write_end() to take + * advantage of the allocation code there. We pass a write + * length of the whole page (chopped to i_size) to make sure + * the whole thing is allocated. + * + * Since we know the page is up to date, we don't have to + * worry about ocfs2_write_begin() skipping some buffer reads + * because the "write" would invalidate their data. + */ + if (page->index == last_index) + len = size & ~PAGE_CACHE_MASK; + + ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, + &fsdata, di_bh, page); + if (ret) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, + fsdata); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + BUG_ON(ret != len); + ret = 0; +out: + return ret; +} + +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) { - int ret = 0, lock_level = 0; - struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct buffer_head *di_bh = NULL; + sigset_t blocked, oldset; + int ret, ret2; + + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (ret < 0) { + mlog_errno(ret); + return ret; + } /* - * Only support shared writeable mmap for local mounts which - * don't know about holes. + * The cluster locks taken will block a truncate from another + * node. Taking the data lock will also ensure that we don't + * attempt page truncation as part of a downconvert. */ - if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && - ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && - ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { - mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); - /* This is -EINVAL because generic_file_readonly_mmap - * returns it in a similar situation. */ - return -EINVAL; + ret = ocfs2_meta_lock(inode, &di_bh, 1); + if (ret < 0) { + mlog_errno(ret); + goto out; } + /* + * The alloc sem should be enough to serialize with + * ocfs2_truncate_file() changing i_size as well as any thread + * modifying the inode btree. + */ + down_write(&OCFS2_I(inode)->ip_alloc_sem); + + ret = ocfs2_data_lock(inode, 1); + if (ret < 0) { + mlog_errno(ret); + goto out_meta_unlock; + } + + ret = __ocfs2_page_mkwrite(inode, di_bh, page); + + ocfs2_data_unlock(inode, 1); + +out_meta_unlock: + up_write(&OCFS2_I(inode)->ip_alloc_sem); + + brelse(di_bh); + ocfs2_meta_unlock(inode, 1); + +out: + ret2 = ocfs2_vm_op_unblock_sigs(&oldset); + if (ret2 < 0) + mlog_errno(ret2); + + return ret; +} + +static struct vm_operations_struct ocfs2_file_vm_ops = { + .nopage = ocfs2_fault, + .page_mkwrite = ocfs2_page_mkwrite, +}; + +int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret = 0, lock_level = 0; + ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, file->f_vfsmnt, &lock_level); if (ret < 0) { @@ -107,6 +225,7 @@ ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; return 0; } diff -Nurb linux-2.6.22-570/fs/ocfs2/namei.c linux-2.6.22-591/fs/ocfs2/namei.c --- linux-2.6.22-570/fs/ocfs2/namei.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/namei.c 2007-12-21 15:36:12.000000000 -0500 @@ -1684,7 +1684,7 @@ u32 offset = 0; inode->i_op = &ocfs2_symlink_inode_operations; - status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, + status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, new_fe_bh, handle, data_ac, NULL, NULL); diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2.h linux-2.6.22-591/fs/ocfs2/ocfs2.h --- linux-2.6.22-570/fs/ocfs2/ocfs2.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/ocfs2.h 2007-12-21 15:36:12.000000000 -0500 @@ -220,6 +220,7 @@ u16 max_slots; s16 node_num; s16 slot_num; + s16 preferred_slot; int s_sectsize_bits; int s_clustersize; int s_clustersize_bits; @@ -306,6 +307,19 @@ return 0; } +static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) +{ + /* + * Support for sparse files is a pre-requisite + */ + if (!ocfs2_sparse_alloc(osb)) + return 0; + + if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) + return 1; + return 0; +} + /* set / clear functions because cluster events can make these happen * in parallel so we want the transitions to be atomic. this also * means that any future flags osb_flags must be protected by spinlock diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h linux-2.6.22-591/fs/ocfs2/ocfs2_fs.h --- linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/ocfs2_fs.h 2007-12-21 15:36:12.000000000 -0500 @@ -88,7 +88,7 @@ #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) -#define OCFS2_FEATURE_RO_COMPAT_SUPP 0 +#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN /* * Heartbeat-only devices are missing journals and other files. The @@ -116,6 +116,11 @@ */ #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 +/* + * Unwritten extents support. + */ +#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 + /* The byte offset of the first backup block will be 1G. * The following will be 4G, 16G, 64G, 256G and 1T. */ @@ -174,6 +179,32 @@ #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) /* + * Space reservation / allocation / free ioctls and argument structure + * are designed to be compatible with XFS. + * + * ALLOCSP* and FREESP* are not and will never be supported, but are + * included here for completeness. + */ +struct ocfs2_space_resv { + __s16 l_type; + __s16 l_whence; + __s64 l_start; + __s64 l_len; /* len == 0 means until end of file */ + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +}; + +#define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) +#define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) +#define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) +#define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) +#define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) +#define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) +#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) +#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) + +/* * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) */ #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ diff -Nurb linux-2.6.22-570/fs/ocfs2/slot_map.c linux-2.6.22-591/fs/ocfs2/slot_map.c --- linux-2.6.22-570/fs/ocfs2/slot_map.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/slot_map.c 2007-12-21 15:36:12.000000000 -0500 @@ -121,17 +121,25 @@ return ret; } -static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si) +static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) { int i; s16 ret = OCFS2_INVALID_SLOT; + if (preferred >= 0 && preferred < si->si_num_slots) { + if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { + ret = preferred; + goto out; + } + } + for(i = 0; i < si->si_num_slots; i++) { if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { ret = (s16) i; break; } } +out: return ret; } @@ -248,7 +256,7 @@ if (slot == OCFS2_INVALID_SLOT) { /* if no slot yet, then just take 1st available * one. */ - slot = __ocfs2_find_empty_slot(si); + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot == OCFS2_INVALID_SLOT) { spin_unlock(&si->si_lock); mlog(ML_ERROR, "no free slots available!\n"); diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.c linux-2.6.22-591/fs/ocfs2/suballoc.c --- linux-2.6.22-570/fs/ocfs2/suballoc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/suballoc.c 2007-12-21 15:36:12.000000000 -0500 @@ -98,14 +98,6 @@ u16 chain); static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, u32 wanted); -static int ocfs2_free_suballoc_bits(handle_t *handle, - struct inode *alloc_inode, - struct buffer_head *alloc_bh, - unsigned int start_bit, - u64 bg_blkno, - unsigned int count); -static inline u64 ocfs2_which_suballoc_group(u64 block, - unsigned int bit); static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, u64 bg_blkno, u16 bg_bit_off); @@ -496,13 +488,7 @@ (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); (*ac)->ac_which = OCFS2_AC_USE_META; - -#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS - slot = 0; -#else slot = osb->slot_num; -#endif - (*ac)->ac_group_search = ocfs2_block_group_search; status = ocfs2_reserve_suballoc_bits(osb, (*ac), @@ -1626,7 +1612,7 @@ /* * expects the suballoc inode to already be locked. */ -static int ocfs2_free_suballoc_bits(handle_t *handle, +int ocfs2_free_suballoc_bits(handle_t *handle, struct inode *alloc_inode, struct buffer_head *alloc_bh, unsigned int start_bit, @@ -1703,13 +1689,6 @@ return status; } -static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) -{ - u64 group = block - (u64) bit; - - return group; -} - int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, @@ -1723,19 +1702,6 @@ inode_alloc_bh, bit, bg_blkno, 1); } -int ocfs2_free_extent_block(handle_t *handle, - struct inode *eb_alloc_inode, - struct buffer_head *eb_alloc_bh, - struct ocfs2_extent_block *eb) -{ - u64 blk = le64_to_cpu(eb->h_blkno); - u16 bit = le16_to_cpu(eb->h_suballoc_bit); - u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); - - return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, - bit, bg_blkno, 1); -} - int ocfs2_free_clusters(handle_t *handle, struct inode *bitmap_inode, struct buffer_head *bitmap_bh, diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.h linux-2.6.22-591/fs/ocfs2/suballoc.h --- linux-2.6.22-570/fs/ocfs2/suballoc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/suballoc.h 2007-12-21 15:36:12.000000000 -0500 @@ -86,20 +86,29 @@ u32 *cluster_start, u32 *num_clusters); +int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count); int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, struct ocfs2_dinode *di); -int ocfs2_free_extent_block(handle_t *handle, - struct inode *eb_alloc_inode, - struct buffer_head *eb_alloc_bh, - struct ocfs2_extent_block *eb); int ocfs2_free_clusters(handle_t *handle, struct inode *bitmap_inode, struct buffer_head *bitmap_bh, u64 start_blk, unsigned int num_clusters); +static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) +{ + u64 group = block - (u64) bit; + + return group; +} + static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, u64 bg_blkno) { diff -Nurb linux-2.6.22-570/fs/ocfs2/super.c linux-2.6.22-591/fs/ocfs2/super.c --- linux-2.6.22-570/fs/ocfs2/super.c 2007-12-21 15:36:07.000000000 -0500 +++ linux-2.6.22-591/fs/ocfs2/super.c 2007-12-21 15:36:12.000000000 -0500 @@ -82,7 +82,8 @@ MODULE_LICENSE("GPL"); static int ocfs2_parse_options(struct super_block *sb, char *options, - unsigned long *mount_opt, int is_remount); + unsigned long *mount_opt, s16 *slot, + int is_remount); static void ocfs2_put_super(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb); static int ocfs2_remount(struct super_block *sb, int *flags, char *data); @@ -114,8 +115,6 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb); static void ocfs2_destroy_inode(struct inode *inode); -static unsigned long long ocfs2_max_file_offset(unsigned int blockshift); - static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, .alloc_inode = ocfs2_alloc_inode, @@ -323,7 +322,7 @@ /* From xfs_super.c:xfs_max_file_offset * Copyright (c) 2000-2004 Silicon Graphics, Inc. */ -static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) +unsigned long long ocfs2_max_file_offset(unsigned int blockshift) { unsigned int pagefactor = 1; unsigned int bitshift = BITS_PER_LONG - 1; @@ -360,9 +359,10 @@ int incompat_features; int ret = 0; unsigned long parsed_options; + s16 slot; struct ocfs2_super *osb = OCFS2_SB(sb); - if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { + if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) { ret = -EINVAL; goto out; } @@ -546,6 +546,7 @@ struct dentry *root; int status, sector_size; unsigned long parsed_opt; + s16 slot; struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; @@ -553,7 +554,7 @@ mlog_entry("%p, %p, %i", sb, data, silent); - if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { + if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) { status = -EINVAL; goto read_super_error; } @@ -583,6 +584,7 @@ brelse(bh); bh = NULL; osb->s_mount_opt = parsed_opt; + osb->preferred_slot = slot; sb->s_magic = OCFS2_SUPER_MAGIC; @@ -728,6 +730,7 @@ static int ocfs2_parse_options(struct super_block *sb, char *options, unsigned long *mount_opt, + s16 *slot, int is_remount) { int status; @@ -737,6 +740,7 @@ options ? options : "(none)"); *mount_opt = 0; + *slot = OCFS2_INVALID_SLOT; if (!options) { status = 1; diff -Nurb linux-2.6.22-570/fs/ocfs2/super.h linux-2.6.22-591/fs/ocfs2/super.h --- linux-2.6.22-570/fs/ocfs2/super.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ocfs2/super.h 2007-12-21 15:36:12.000000000 -0500 @@ -45,4 +45,6 @@ #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) +unsigned long long ocfs2_max_file_offset(unsigned int blockshift); + #endif /* OCFS2_SUPER_H */ diff -Nurb linux-2.6.22-570/fs/open.c linux-2.6.22-591/fs/open.c --- linux-2.6.22-570/fs/open.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/open.c 2007-12-21 15:36:12.000000000 -0500 @@ -362,6 +362,92 @@ #endif /* + * sys_fallocate - preallocate blocks or free preallocated blocks + * @fd: the file descriptor + * @mode: mode specifies if fallocate should preallocate blocks OR free + * (unallocate) preallocated blocks. Currently only FA_ALLOCATE and + * FA_DEALLOCATE modes are supported. + * @offset: The offset within file, from where (un)allocation is being + * requested. It should not have a negative value. + * @len: The amount (in bytes) of space to be (un)allocated, from the offset. + * + * This system call, depending on the mode, preallocates or unallocates blocks + * for a file. The range of blocks depends on the value of offset and len + * arguments provided by the user/application. For FA_ALLOCATE mode, if this + * system call succeeds, subsequent writes to the file in the given range + * (specified by offset & len) should not fail - even if the file system + * later becomes full. Hence the preallocation done is persistent (valid + * even after reopen of the file and remount/reboot). + * + * It is expected that the ->fallocate() inode operation implemented by the + * individual file systems will update the file size and/or ctime/mtime + * depending on the mode and also on the success of the operation. + * + * Note: Incase the file system does not support preallocation, + * posix_fallocate() should fall back to the library implementation (i.e. + * allocating zero-filled new blocks to the file). + * + * Return Values + * 0 : On SUCCESS a value of zero is returned. + * error : On Failure, an error code will be returned. + * An error code of -ENOSYS or -EOPNOTSUPP should make posix_fallocate() + * fall back on library implementation of fallocate. + * + * Generic fallocate to be added for file systems that do not + * support fallocate it. + */ +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) +{ + struct file *file; + struct inode *inode; + long ret = -EINVAL; + + if (offset < 0 || len <= 0) + goto out; + + /* Return error if mode is not supported */ + ret = -EOPNOTSUPP; + if (mode != FA_ALLOCATE && mode !=FA_DEALLOCATE) + goto out; + + ret = -EBADF; + file = fget(fd); + if (!file) + goto out; + if (!(file->f_mode & FMODE_WRITE)) + goto out_fput; + + inode = file->f_path.dentry->d_inode; + + ret = -ESPIPE; + if (S_ISFIFO(inode->i_mode)) + goto out_fput; + + ret = -ENODEV; + /* + * Let individual file system decide if it supports preallocation + * for directories or not. + */ + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + goto out_fput; + + ret = -EFBIG; + /* Check for wrap through zero too */ + if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) + goto out_fput; + + if (inode->i_op && inode->i_op->fallocate) + ret = inode->i_op->fallocate(inode, mode, offset, len); + else + ret = -ENOSYS; + +out_fput: + fput(file); +out: + return ret; +} + +/* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. diff -Nurb linux-2.6.22-570/fs/partitions/check.c linux-2.6.22-591/fs/partitions/check.c --- linux-2.6.22-570/fs/partitions/check.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/partitions/check.c 2007-12-21 15:36:12.000000000 -0500 @@ -397,7 +397,6 @@ static struct attribute addpartattr = { .name = "whole_disk", .mode = S_IRUSR | S_IRGRP | S_IROTH, - .owner = THIS_MODULE, }; sysfs_create_file(&p->kobj, &addpartattr); diff -Nurb linux-2.6.22-570/fs/proc/Makefile linux-2.6.22-591/fs/proc/Makefile --- linux-2.6.22-570/fs/proc/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/proc/Makefile 2007-12-21 15:36:14.000000000 -0500 @@ -11,6 +11,7 @@ proc_tty.o proc_misc.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o +proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff -Nurb linux-2.6.22-570/fs/proc/array.c linux-2.6.22-591/fs/proc/array.c --- linux-2.6.22-570/fs/proc/array.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/proc/array.c 2007-12-21 15:36:12.000000000 -0500 @@ -291,6 +291,15 @@ return buffer; } +static inline char *task_context_switch_counts(struct task_struct *p, + char *buffer) +{ + return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n" + "nonvoluntary_ctxt_switches:\t%lu\n", + p->nvcsw, + p->nivcsw); +} + static inline char *task_cap(struct task_struct *p, char *buffer) { struct vx_info *vxi = p->vx_info; @@ -328,6 +337,7 @@ #if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif + buffer = task_context_switch_counts(task, buffer); return buffer - orig; } @@ -426,8 +436,9 @@ /* Temporary variable needed for gcc-2.96 */ /* convert timespec -> nsec*/ - start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC - + task->start_time.tv_nsec; + start_time = + (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC + + task->real_start_time.tv_nsec; /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); diff -Nurb linux-2.6.22-570/fs/proc/base.c linux-2.6.22-591/fs/proc/base.c --- linux-2.6.22-570/fs/proc/base.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/proc/base.c 2007-12-21 15:36:12.000000000 -0500 @@ -67,7 +67,7 @@ #include #include #include -#include +#include #include #include #include @@ -490,7 +490,7 @@ count = PROC_BLOCK_SIZE; length = -ENOMEM; - if (!(page = __get_free_page(GFP_KERNEL))) + if (!(page = __get_free_page(GFP_TEMPORARY))) goto out; length = PROC_I(inode)->op.proc_read(task, (char*)page); @@ -530,7 +530,7 @@ goto out; ret = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -600,7 +600,7 @@ goto out; copied = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -633,7 +633,7 @@ } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file * file, loff_t offset, int orig) { switch (orig) { case 0: @@ -711,42 +711,6 @@ .write = oom_adjust_write, }; -#ifdef CONFIG_MMU -static ssize_t clear_refs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - struct mm_struct *mm; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) - return -ESRCH; - mm = get_task_mm(task); - if (mm) { - clear_refs_smap(mm); - mmput(mm); - } - put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; -} - -static struct file_operations proc_clear_refs_operations = { - .write = clear_refs_write, -}; -#endif - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -786,7 +750,7 @@ /* No partial writes. */ return -EINVAL; } - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) return -ENOMEM; length = -EFAULT; @@ -815,71 +779,6 @@ }; #endif -#ifdef CONFIG_SECCOMP -static ssize_t seccomp_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); - char __buf[20]; - size_t len; - - if (!tsk) - return -ESRCH; - /* no need to print the trailing zero, so use only len */ - len = sprintf(__buf, "%u\n", tsk->seccomp.mode); - put_task_struct(tsk); - - return simple_read_from_buffer(buf, count, ppos, __buf, len); -} - -static ssize_t seccomp_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); - char __buf[20], *end; - unsigned int seccomp_mode; - ssize_t result; - - result = -ESRCH; - if (!tsk) - goto out_no_task; - - /* can set it only once to be even more secure */ - result = -EPERM; - if (unlikely(tsk->seccomp.mode)) - goto out; - - result = -EFAULT; - memset(__buf, 0, sizeof(__buf)); - count = min(count, sizeof(__buf) - 1); - if (copy_from_user(__buf, buf, count)) - goto out; - - seccomp_mode = simple_strtoul(__buf, &end, 0); - if (*end == '\n') - end++; - result = -EINVAL; - if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { - tsk->seccomp.mode = seccomp_mode; - set_tsk_thread_flag(tsk, TIF_SECCOMP); - } else - goto out; - result = -EIO; - if (unlikely(!(end - __buf))) - goto out; - result = end - __buf; -out: - put_task_struct(tsk); -out_no_task: - return result; -} - -static const struct file_operations proc_seccomp_operations = { - .read = seccomp_read, - .write = seccomp_write, -}; -#endif /* CONFIG_SECCOMP */ - #ifdef CONFIG_FAULT_INJECTION static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) @@ -954,7 +853,8 @@ char __user *buffer, int buflen) { struct inode * inode; - char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; + char *tmp = (char*)__get_free_page(GFP_TEMPORARY); + char *path; int len; if (!tmp) @@ -1015,7 +915,7 @@ task_lock(task); mm = task->mm; if (mm) - dumpable = mm->dumpable; + dumpable = get_dumpable(mm); task_unlock(task); if(dumpable == 1) return 1; @@ -1744,7 +1644,7 @@ goto out; length = -ENOMEM; - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -1804,6 +1704,91 @@ #endif +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + size_t len; + int ret; + + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%08lx\n", + ((mm->flags & MMF_DUMP_FILTER_MASK) >> + MMF_DUMP_FILTER_SHIFT)); + mmput(mm); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + } + + put_task_struct(task); + + return ret; +} + +static ssize_t proc_coredump_filter_write(struct file *file, + const char __user *buf, + size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF], *end; + unsigned int val; + int ret; + int i; + unsigned long mask; + + ret = -EFAULT; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + goto out_no_task; + + ret = -EINVAL; + val = (unsigned int)simple_strtoul(buffer, &end, 0); + if (*end == '\n') + end++; + if (end - buffer == 0) + goto out_no_task; + + ret = -ESRCH; + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + goto out_no_task; + + ret = end - buffer; + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + + for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { + if (val & mask) + set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); + else + clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); + } + + mmput(mm); + out_no_mm: + put_task_struct(task); + out_no_task: + return ret; +} + +static const struct file_operations proc_coredump_filter_operations = { + .read = proc_coredump_filter_read, + .write = proc_coredump_filter_write, +}; +#endif + /* * /proc/self: */ @@ -1995,18 +1980,22 @@ REG("numa_maps", S_IRUGO, numa_maps), #endif REG("mem", S_IRUSR|S_IWUSR, mem), -#ifdef CONFIG_SECCOMP - REG("seccomp", S_IRUSR|S_IWUSR, seccomp), -#endif LNK("cwd", cwd), LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), REG("mountstats", S_IRUSR, mountstats), #ifdef CONFIG_MMU +#ifdef CONFIG_PROC_CLEAR_REFS REG("clear_refs", S_IWUSR, clear_refs), +#endif +#ifdef CONFIG_PROC_SMAPS REG("smaps", S_IRUGO, smaps), #endif +#ifdef CONFIG_PROC_PAGEMAP + REG("pagemap", S_IRUSR, pagemap), +#endif +#endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), #endif @@ -2016,7 +2005,7 @@ #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif INF("vinfo", S_IRUGO, pid_vx_info), @@ -2029,6 +2018,9 @@ #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) + REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), +#endif #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, pid_io_accounting), #endif @@ -2285,17 +2277,21 @@ REG("numa_maps", S_IRUGO, numa_maps), #endif REG("mem", S_IRUSR|S_IWUSR, mem), -#ifdef CONFIG_SECCOMP - REG("seccomp", S_IRUSR|S_IWUSR, seccomp), -#endif LNK("cwd", cwd), LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), #ifdef CONFIG_MMU +#ifdef CONFIG_PROC_CLEAR_REFS REG("clear_refs", S_IWUSR, clear_refs), +#endif +#ifdef CONFIG_PROC_SMAPS REG("smaps", S_IRUGO, smaps), #endif +#ifdef CONFIG_PROC_PAGEMAP + REG("pagemap", S_IRUSR, pagemap), +#endif +#endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), #endif @@ -2305,9 +2301,12 @@ #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CONTAINERS + REG("container", S_IRUGO, container), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL diff -Nurb linux-2.6.22-570/fs/proc/generic.c linux-2.6.22-591/fs/proc/generic.c --- linux-2.6.22-570/fs/proc/generic.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/proc/generic.c 2007-12-21 15:36:12.000000000 -0500 @@ -74,7 +74,7 @@ nbytes = MAX_NON_LFS - pos; dp = PDE(inode); - if (!(page = (char*) __get_free_page(GFP_KERNEL))) + if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) return -ENOMEM; while ((nbytes > 0) && !eof) { diff -Nurb linux-2.6.22-570/fs/proc/internal.h linux-2.6.22-591/fs/proc/internal.h --- linux-2.6.22-570/fs/proc/internal.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/proc/internal.h 2007-12-21 15:36:14.000000000 -0500 @@ -17,6 +17,11 @@ #else static inline void proc_sys_init(void) { } #endif +#ifdef CONFIG_NET +extern int proc_net_init(void); +#else +static inline int proc_net_init(void) { return 0; } +#endif struct vmalloc_info { unsigned long used; @@ -46,15 +51,13 @@ extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +extern loff_t mem_lseek(struct file * file, loff_t offset, int orig); extern const struct file_operations proc_maps_operations; extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; - -extern const struct file_operations proc_maps_operations; -extern const struct file_operations proc_numa_maps_operations; -extern const struct file_operations proc_smaps_operations; - +extern const struct file_operations proc_clear_refs_operations; +extern const struct file_operations proc_pagemap_operations; void free_proc_entry(struct proc_dir_entry *de); diff -Nurb linux-2.6.22-570/fs/proc/proc_misc.c linux-2.6.22-591/fs/proc/proc_misc.c --- linux-2.6.22-570/fs/proc/proc_misc.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/proc/proc_misc.c 2007-12-21 15:36:14.000000000 -0500 @@ -122,6 +122,7 @@ cputime_t idletime = cputime_add(init_task.utime, init_task.stime); do_posix_clock_monotonic_gettime(&uptime); + monotonic_to_bootbased(&uptime); cputime_to_timespec(idletime, &idle); if (vx_flags(VXF_VIRT_UPTIME, 0)) vx_vsi_uptime(&uptime, &idle); @@ -463,12 +464,14 @@ unsigned long jif; cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; u64 sum = 0; + struct timespec boottime; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; - jif = - wall_to_monotonic.tv_sec; - if (wall_to_monotonic.tv_nsec) - --jif; + getboottime(&boottime); + jif = boottime.tv_sec; + if (boottime.tv_nsec) + ++jif; for_each_possible_cpu(i) { int j; diff -Nurb linux-2.6.22-570/fs/proc/proc_net.c linux-2.6.22-591/fs/proc/proc_net.c --- linux-2.6.22-570/fs/proc/proc_net.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/proc/proc_net.c 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,154 @@ +/* + * linux/fs/proc/net.c + * + * Copyright (C) 2007 + * + * Author: Eric Biederman + * + * proc net directory handling functions + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +static struct proc_dir_entry *proc_net_shadow; + +static struct dentry *proc_net_shadow_dentry(struct dentry *parent, + struct proc_dir_entry *de) +{ + struct dentry *shadow = NULL; + struct inode *inode; + if (!de) + goto out; + de_get(de); + inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de); + if (!inode) + goto out_de_put; + shadow = d_alloc_name(parent, de->name); + if (!shadow) + goto out_iput; + shadow->d_op = parent->d_op; /* proc_dentry_operations */ + d_instantiate(shadow, inode); +out: + return shadow; +out_iput: + iput(inode); +out_de_put: + de_put(de); + goto out; +} + +static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd) +{ + struct net *net = current->nsproxy->net_ns; + struct dentry *shadow; + shadow = proc_net_shadow_dentry(parent, net->proc_net); + if (!shadow) + return ERR_PTR(-ENOENT); + + dput(nd->dentry); + /* My dentry count is 1 and that should be enough as the + * shadow dentry is thrown away immediately. + */ + nd->dentry = shadow; + return NULL; +} + +static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct net *net = current->nsproxy->net_ns; + struct dentry *shadow; + + shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net); + if (!shadow) + return ERR_PTR(-ENOENT); + + dput(nd->dentry); + nd->dentry = shadow; + + return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd); +} + +static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct net *net = current->nsproxy->net_ns; + struct dentry *shadow; + int ret; + + shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net); + if (!shadow) + return -ENOENT; + ret = shadow->d_inode->i_op->setattr(shadow, iattr); + dput(shadow); + return ret; +} + +static const struct file_operations proc_net_dir_operations = { + .read = generic_read_dir, +}; + +static struct inode_operations proc_net_dir_inode_operations = { + .follow_link = proc_net_follow_link, + .lookup = proc_net_lookup, + .setattr = proc_net_setattr, +}; + + +static int proc_net_ns_init(struct net *net) +{ + struct proc_dir_entry *netd, *net_statd; + + netd = proc_mkdir("net", &net->proc_net_root); + if (!netd) + return -EEXIST; + + net_statd = proc_mkdir("stat", netd); + if (!net_statd) { + remove_proc_entry("net", &net->proc_net_root); + return -EEXIST; + } + + netd->data = net; + net_statd->data = net; + net->proc_net_root.data = net; + net->proc_net = netd; + net->proc_net_stat = net_statd; + + return 0; +} + +static void proc_net_ns_exit(struct net *net) +{ + remove_proc_entry("stat", net->proc_net); + remove_proc_entry("net", &net->proc_net_root); + +} + +struct pernet_operations proc_net_ns_ops = { + .init = proc_net_ns_init, + .exit = proc_net_ns_exit, +}; + +int proc_net_init(void) +{ + proc_net_shadow = proc_mkdir("net", NULL); + proc_net_shadow->proc_iops = &proc_net_dir_inode_operations; + proc_net_shadow->proc_fops = &proc_net_dir_operations; + + return register_pernet_subsys(&proc_net_ns_ops); +} diff -Nurb linux-2.6.22-570/fs/proc/root.c linux-2.6.22-591/fs/proc/root.c --- linux-2.6.22-570/fs/proc/root.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/proc/root.c 2007-12-21 15:36:14.000000000 -0500 @@ -21,11 +21,11 @@ #include "internal.h" -struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; struct proc_dir_entry *proc_virtual; extern void proc_vx_init(void); +struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -64,8 +64,8 @@ return; } proc_misc_init(); - proc_net = proc_mkdir("net", NULL); - proc_net_stat = proc_mkdir("net/stat", NULL); + + proc_net_init(); #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); @@ -163,7 +163,5 @@ EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_net); -EXPORT_SYMBOL(proc_net_stat); EXPORT_SYMBOL(proc_bus); EXPORT_SYMBOL(proc_root_driver); diff -Nurb linux-2.6.22-570/fs/proc/task_mmu.c linux-2.6.22-591/fs/proc/task_mmu.c --- linux-2.6.22-570/fs/proc/task_mmu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/proc/task_mmu.c 2007-12-21 15:36:12.000000000 -0500 @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -114,24 +115,123 @@ seq_printf(m, "%*c", len, ' '); } -struct mem_size_stats +static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { - unsigned long resident; - unsigned long shared_clean; - unsigned long shared_dirty; - unsigned long private_clean; - unsigned long private_dirty; - unsigned long referenced; -}; + if (vma && vma != priv->tail_vma) { + struct mm_struct *mm = vma->vm_mm; + up_read(&mm->mmap_sem); + mmput(mm); + } +} -struct pmd_walker { - struct vm_area_struct *vma; - void *private; - void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, - unsigned long, void *); -}; +static void *m_start(struct seq_file *m, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + unsigned long last_addr = m->version; + struct mm_struct *mm; + struct vm_area_struct *vma, *tail_vma = NULL; + loff_t l = *pos; + + /* Clear the per syscall fields in priv */ + priv->task = NULL; + priv->tail_vma = NULL; + + /* + * We remember last_addr rather than next_addr to hit with + * mmap_cache most of the time. We have zero last_addr at + * the beginning and also after lseek. We will have -1 last_addr + * after the end of the vmas. + */ + + if (last_addr == -1UL) + return NULL; + + priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + if (!priv->task) + return NULL; + + mm = get_task_mm(priv->task); + if (!mm) + return NULL; + + priv->tail_vma = tail_vma = get_gate_vma(priv->task); + down_read(&mm->mmap_sem); + + /* Start with last addr hint */ + if (last_addr && (vma = find_vma(mm, last_addr))) { + vma = vma->vm_next; + goto out; + } + + /* + * Check the vma index is within the range and do + * sequential scan until m_index. + */ + vma = NULL; + if ((unsigned long)l < mm->map_count) { + vma = mm->mmap; + while (l-- && vma) + vma = vma->vm_next; + goto out; + } + + if (l != mm->map_count) + tail_vma = NULL; /* After gate vma */ + +out: + if (vma) + return vma; + + /* End of vmas has been reached */ + m->version = (tail_vma != NULL)? 0: -1UL; + up_read(&mm->mmap_sem); + mmput(mm); + return tail_vma; +} + +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + struct vm_area_struct *tail_vma = priv->tail_vma; + + (*pos)++; + if (vma && (vma != tail_vma) && vma->vm_next) + return vma->vm_next; + vma_stop(priv, vma); + return (vma != tail_vma)? tail_vma: NULL; +} + +static void m_stop(struct seq_file *m, void *v) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + + vma_stop(priv, vma); + if (priv->task) + put_task_struct(priv->task); +} + +static int do_maps_open(struct inode *inode, struct file *file, + struct seq_operations *ops) +{ + struct proc_maps_private *priv; + int ret = -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->pid = proc_pid(inode); + ret = seq_open(file, ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } + } + return ret; +} -static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) +static int show_map(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; @@ -191,38 +291,47 @@ } seq_putc(m, '\n'); - if (mss) - seq_printf(m, - "Size: %8lu kB\n" - "Rss: %8lu kB\n" - "Shared_Clean: %8lu kB\n" - "Shared_Dirty: %8lu kB\n" - "Private_Clean: %8lu kB\n" - "Private_Dirty: %8lu kB\n" - "Referenced: %8lu kB\n", - (vma->vm_end - vma->vm_start) >> 10, - mss->resident >> 10, - mss->shared_clean >> 10, - mss->shared_dirty >> 10, - mss->private_clean >> 10, - mss->private_dirty >> 10, - mss->referenced >> 10); - if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; return 0; } -static int show_map(struct seq_file *m, void *v) +static struct seq_operations proc_pid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_map +}; + +static int maps_open(struct inode *inode, struct file *file) { - return show_map_internal(m, v, NULL); + return do_maps_open(inode, file, &proc_pid_maps_op); } -static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, +const struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#ifdef CONFIG_PROC_SMAPS +struct mem_size_stats +{ + struct vm_area_struct *vma; + unsigned long resident; + unsigned long shared_clean; + unsigned long shared_dirty; + unsigned long private_clean; + unsigned long private_dirty; + unsigned long referenced; +}; + +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, void *private) { struct mem_size_stats *mss = private; + struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -256,12 +365,71 @@ } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; + +static int show_smap(struct seq_file *m, void *v) { + struct vm_area_struct *vma = v; + struct mem_size_stats mss; + int ret; + + memset(&mss, 0, sizeof mss); + mss.vma = vma; + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) + walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, + &smaps_walk, &mss); + + ret = show_map(m, v); + if (ret) + return ret; + + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n" + "Referenced: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + mss.resident >> 10, + mss.shared_clean >> 10, + mss.shared_dirty >> 10, + mss.private_clean >> 10, + mss.private_dirty >> 10, + mss.referenced >> 10); + + return ret; +} + +static struct seq_operations proc_pid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_smap +}; + +static int smaps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_smaps_op); +} + +const struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; +#endif + +#ifdef CONFIG_PROC_CLEAR_REFS +static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, void *private) +{ + struct vm_area_struct *vma = private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -282,236 +450,52 @@ } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, - unsigned long addr, unsigned long end) -{ - pmd_t *pmd; - unsigned long next; - - for (pmd = pmd_offset(pud, addr); addr != end; - pmd++, addr = next) { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - walker->action(walker->vma, pmd, addr, next, walker->private); - } -} - -static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - pud_t *pud; - unsigned long next; - - for (pud = pud_offset(pgd, addr); addr != end; - pud++, addr = next) { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - walk_pmd_range(walker, pud, addr, next); - } -} - -/* - * walk_page_range - walk the page tables of a VMA with a callback - * @vma - VMA to walk - * @action - callback invoked for every bottom-level (PTE) page table - * @private - private data passed to the callback function - * - * Recursively walk the page table for the memory area in a VMA, calling - * a callback for every bottom-level (PTE) page table. - */ -static inline void walk_page_range(struct vm_area_struct *vma, - void (*action)(struct vm_area_struct *, - pmd_t *, unsigned long, - unsigned long, void *), - void *private) -{ - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - struct pmd_walker walker = { - .vma = vma, - .private = private, - .action = action, - }; - pgd_t *pgd; - unsigned long next; - - for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; - pgd++, addr = next) { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - walk_pud_range(&walker, pgd, addr, next); - } -} - -static int show_smap(struct seq_file *m, void *v) -{ - struct vm_area_struct *vma = v; - struct mem_size_stats mss; +static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; - memset(&mss, 0, sizeof mss); - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, smaps_pte_range, &mss); - return show_map_internal(m, v, &mss); -} - -void clear_refs_smap(struct mm_struct *mm) +static ssize_t clear_refs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { + struct task_struct *task; + char buffer[13], *end; + struct mm_struct *mm; struct vm_area_struct *vma; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + if (!simple_strtol(buffer, &end, 0)) + return -EINVAL; + if (*end == '\n') + end++; + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + mm = get_task_mm(task); + if (mm) { down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, clear_refs_pte_range, NULL); + if (!is_vm_hugetlb_page(vma)) + walk_page_range(mm, vma->vm_start, vma->vm_end, + &clear_refs_walk, vma); flush_tlb_mm(mm); up_read(&mm->mmap_sem); -} - -static void *m_start(struct seq_file *m, loff_t *pos) -{ - struct proc_maps_private *priv = m->private; - unsigned long last_addr = m->version; - struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma = NULL; - loff_t l = *pos; - - /* Clear the per syscall fields in priv */ - priv->task = NULL; - priv->tail_vma = NULL; - - /* - * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at - * the beginning and also after lseek. We will have -1 last_addr - * after the end of the vmas. - */ - - if (last_addr == -1UL) - return NULL; - - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); - if (!priv->task) - return NULL; - - mm = get_task_mm(priv->task); - if (!mm) - return NULL; - - priv->tail_vma = tail_vma = get_gate_vma(priv->task); - down_read(&mm->mmap_sem); - - /* Start with last addr hint */ - if (last_addr && (vma = find_vma(mm, last_addr))) { - vma = vma->vm_next; - goto out; - } - - /* - * Check the vma index is within the range and do - * sequential scan until m_index. - */ - vma = NULL; - if ((unsigned long)l < mm->map_count) { - vma = mm->mmap; - while (l-- && vma) - vma = vma->vm_next; - goto out; - } - - if (l != mm->map_count) - tail_vma = NULL; /* After gate vma */ - -out: - if (vma) - return vma; - - /* End of vmas has been reached */ - m->version = (tail_vma != NULL)? 0: -1UL; - up_read(&mm->mmap_sem); - mmput(mm); - return tail_vma; -} - -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) -{ - if (vma && vma != priv->tail_vma) { - struct mm_struct *mm = vma->vm_mm; - up_read(&mm->mmap_sem); mmput(mm); } + put_task_struct(task); + if (end - buffer == 0) + return -EIO; + return end - buffer; } -static void *m_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = priv->tail_vma; - - (*pos)++; - if (vma && (vma != tail_vma) && vma->vm_next) - return vma->vm_next; - vma_stop(priv, vma); - return (vma != tail_vma)? tail_vma: NULL; -} - -static void m_stop(struct seq_file *m, void *v) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - - vma_stop(priv, vma); - if (priv->task) - put_task_struct(priv->task); -} - -static struct seq_operations proc_pid_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_map -}; - -static struct seq_operations proc_pid_smaps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_smap -}; - -static int do_maps_open(struct inode *inode, struct file *file, - struct seq_operations *ops) -{ - struct proc_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } - } - return ret; -} - -static int maps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_maps_op); -} - -const struct file_operations proc_maps_operations = { - .open = maps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, +const struct file_operations proc_clear_refs_operations = { + .write = clear_refs_write, }; +#endif #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); @@ -547,14 +531,211 @@ }; #endif -static int smaps_open(struct inode *inode, struct file *file) +#ifdef CONFIG_PROC_PAGEMAP +struct pagemapread { + struct mm_struct *mm; + unsigned long next; + unsigned long *buf; + pte_t *ptebuf; + unsigned long pos; + size_t count; + int index; + char __user *out; +}; + +static int flush_pagemap(struct pagemapread *pm) { - return do_maps_open(inode, file, &proc_pid_smaps_op); + int n = min(pm->count, pm->index * sizeof(unsigned long)); + if (copy_to_user(pm->out, pm->buf, n)) + return -EFAULT; + pm->out += n; + pm->pos += n; + pm->count -= n; + pm->index = 0; + cond_resched(); + return 0; } -const struct file_operations proc_smaps_operations = { - .open = smaps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, +static int add_to_pagemap(unsigned long addr, unsigned long pfn, + struct pagemapread *pm) +{ + pm->buf[pm->index++] = pfn; + pm->next = addr + PAGE_SIZE; + if (pm->index * sizeof(unsigned long) >= PAGE_SIZE || + pm->index * sizeof(unsigned long) >= pm->count) + return flush_pagemap(pm); + return 0; +} + +static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) +{ + struct pagemapread *pm = private; + pte_t *pte; + int err; + + pte = pte_offset_map(pmd, addr); + +#ifdef CONFIG_HIGHPTE + /* copy PTE directory to temporary buffer and unmap it */ + memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte); + pte_unmap(pte); + pte = pm->ptebuf; +#endif + + for (; addr != end; pte++, addr += PAGE_SIZE) { + if (addr < pm->next) + continue; + if (!pte_present(*pte)) + err = add_to_pagemap(addr, -1, pm); + else + err = add_to_pagemap(addr, pte_pfn(*pte), pm); + if (err) + return err; + } + +#ifndef CONFIG_HIGHPTE + pte_unmap(pte - 1); +#endif + + return 0; +} + +static int pagemap_fill(struct pagemapread *pm, unsigned long end) +{ + int ret; + + while (pm->next != end) { + ret = add_to_pagemap(pm->next, -1UL, pm); + if (ret) + return ret; + } + return 0; +} + +static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range }; + +/* + * /proc/pid/pagemap - an array mapping virtual pages to pfns + * + * For each page in the address space, this file contains one long + * representing the corresponding physical page frame number (PFN) or + * -1 if the page isn't present. This allows determining precisely + * which pages are mapped and comparing mapped pages between + * processes. + * + * Efficient users of this interface will use /proc/pid/maps to + * determine which areas of memory are actually mapped and llseek to + * skip over unmapped regions. + * + * The first 4 bytes of this file form a simple header: + * + * first byte: 0 for big endian, 1 for little + * second byte: page shift (eg 12 for 4096 byte pages) + * third byte: entry size in bytes (currently either 4 or 8) + * fourth byte: header size + */ +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + unsigned long src = *ppos; + unsigned long *page; + unsigned long addr, end, vend, svpfn, evpfn; + struct mm_struct *mm; + struct vm_area_struct *vma; + struct pagemapread pm; + int ret = -ESRCH; + + if (!task) + goto out_no_task; + + ret = -EACCES; + if (!ptrace_may_attach(task)) + goto out; + + ret = -EIO; + svpfn = src / sizeof(unsigned long) - 1; + addr = PAGE_SIZE * svpfn; + if ((svpfn + 1) * sizeof(unsigned long) != src) + goto out; + evpfn = min((src + count) / sizeof(unsigned long), + ((~0UL) >> PAGE_SHIFT) + 1); + count = (evpfn - svpfn) * sizeof(unsigned long); + end = PAGE_SIZE * evpfn; + + ret = -ENOMEM; + page = kzalloc(PAGE_SIZE, GFP_USER); + if (!page) + goto out; + +#ifdef CONFIG_HIGHPTE + pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER); + if (!pm.ptebuf) + goto out_free; +#endif + + ret = 0; + mm = get_task_mm(task); + if (!mm) + goto out_freepte; + + pm.mm = mm; + pm.next = addr; + pm.buf = page; + pm.pos = src; + pm.count = count; + pm.index = 0; + pm.out = buf; + + if (svpfn == -1) { + add_to_pagemap(pm.next, 0, &pm); + ((char *)page)[0] = (ntohl(1) != 1); + ((char *)page)[1] = PAGE_SHIFT; + ((char *)page)[2] = sizeof(unsigned long); + ((char *)page)[3] = sizeof(unsigned long); + } + + down_read(&mm->mmap_sem); + vma = find_vma(mm, pm.next); + while (pm.count > 0 && vma) { + if (!ptrace_may_attach(task)) { + ret = -EIO; + goto out_mm; + } + vend = min(vma->vm_start - 1, end - 1) + 1; + ret = pagemap_fill(&pm, vend); + if (ret || !pm.count) + break; + vend = min(vma->vm_end - 1, end - 1) + 1; + ret = walk_page_range(mm, vma->vm_start, vend, + &pagemap_walk, &pm); + vma = vma->vm_next; + } + up_read(&mm->mmap_sem); + + ret = pagemap_fill(&pm, end); + + *ppos = pm.pos; + if (!ret) + ret = pm.pos - src; + +out_mm: + mmput(mm); +out_freepte: +#ifdef CONFIG_HIGHPTE + kfree(pm.ptebuf); +out_free: +#endif + kfree(page); +out: + put_task_struct(task); +out_no_task: + return ret; +} + +const struct file_operations proc_pagemap_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap_read, }; +#endif diff -Nurb linux-2.6.22-570/fs/ramfs/inode.c linux-2.6.22-591/fs/ramfs/inode.c --- linux-2.6.22-570/fs/ramfs/inode.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/ramfs/inode.c 2007-12-21 15:36:12.000000000 -0500 @@ -60,6 +60,7 @@ inode->i_blocks = 0; inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: diff -Nurb linux-2.6.22-570/fs/revoke.c linux-2.6.22-591/fs/revoke.c --- linux-2.6.22-570/fs/revoke.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/revoke.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,777 @@ +/* + * fs/revoke.c - Invalidate all current open file descriptors of an inode. + * + * Copyright (C) 2006-2007 Pekka Enberg + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * fileset - an array of file pointers. + * @files: the array of file pointers + * @nr: number of elements in the array + * @end: index to next unused file pointer + */ +struct fileset { + struct file **files; + unsigned long nr; + unsigned long end; +}; + +/** + * revoke_details - details of the revoke operation + * @inode: invalidate open file descriptors of this inode + * @fset: set of files that point to a revoked inode + * @restore_start: index to the first file pointer that is currently in + * use by a file descriptor but the real file has not + * been revoked + */ +struct revoke_details { + struct fileset *fset; + unsigned long restore_start; +}; + +static struct kmem_cache *revokefs_inode_cache; + +static inline bool fset_is_full(struct fileset *set) +{ + return set->nr == set->end; +} + +static inline struct file *fset_get_filp(struct fileset *set) +{ + return set->files[set->end++]; +} + +static struct fileset *alloc_fset(unsigned long size) +{ + struct fileset *fset; + + fset = kzalloc(sizeof *fset, GFP_KERNEL); + if (!fset) + return NULL; + + fset->files = kcalloc(size, sizeof(struct file *), GFP_KERNEL); + if (!fset->files) { + kfree(fset); + return NULL; + } + fset->nr = size; + return fset; +} + +static void free_fset(struct fileset *fset) +{ + int i; + + for (i = fset->end; i < fset->nr; i++) + fput(fset->files[i]); + + kfree(fset->files); + kfree(fset); +} + +/* + * Revoked file descriptors point to inodes in the revokefs filesystem. + */ +static struct vfsmount *revokefs_mnt; + +static struct file *get_revoked_file(void) +{ + struct dentry *dentry; + struct inode *inode; + struct file *filp; + struct qstr name; + + filp = get_empty_filp(); + if (!filp) + goto err; + + inode = new_inode(revokefs_mnt->mnt_sb); + if (!inode) + goto err_inode; + + name.name = "revoked_file"; + name.len = strlen(name.name); + dentry = d_alloc(revokefs_mnt->mnt_sb->s_root, &name); + if (!dentry) + goto err_dentry; + + d_instantiate(dentry, inode); + + filp->f_mapping = inode->i_mapping; + filp->f_dentry = dget(dentry); + filp->f_vfsmnt = mntget(revokefs_mnt); + filp->f_op = fops_get(inode->i_fop); + filp->f_pos = 0; + + return filp; + + err_dentry: + iput(inode); + err_inode: + fput(filp); + err: + return NULL; +} + +static inline bool can_revoke_file(struct file *file, struct inode *inode, + struct file *to_exclude) +{ + if (!file || file == to_exclude) + return false; + + return file->f_dentry->d_inode == inode; +} + +/* + * LOCKING: task_lock(owner) + */ +static int revoke_fds(struct task_struct *owner, + struct inode *inode, + struct file *to_exclude, struct fileset *fset) +{ + struct files_struct *files; + struct fdtable *fdt; + unsigned int fd; + int err = 0; + + files = get_files_struct(owner); + if (!files) + goto out; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + + for (fd = 0; fd < fdt->max_fds; fd++) { + struct revokefs_inode_info *info; + struct file *filp, *new_filp; + struct inode *new_inode; + + filp = fcheck_files(files, fd); + if (!can_revoke_file(filp, inode, to_exclude)) + continue; + + if (!filp->f_op->revoke) { + err = -EOPNOTSUPP; + goto failed; + } + + if (fset_is_full(fset)) { + err = -ENOMEM; + goto failed; + } + + new_filp = fset_get_filp(fset); + + /* + * Replace original struct file pointer with a pointer to + * a 'revoked file.' After this point, we don't need to worry + * about racing with sys_close or sys_dup. + */ + rcu_assign_pointer(fdt->fd[fd], new_filp); + + /* + * Hold on to task until we can take down the file and its + * mmap. + */ + get_task_struct(owner); + + new_inode = new_filp->f_dentry->d_inode; + make_revoked_inode(new_inode, inode->i_mode & S_IFMT); + + info = revokefs_i(new_inode); + info->fd = fd; + info->file = filp; + info->owner = owner; + } + failed: + spin_unlock(&files->file_lock); + put_files_struct(files); + out: + return err; +} + +static inline bool can_revoke_vma(struct vm_area_struct *vma, + struct inode *inode, struct file *to_exclude) +{ + struct file *file = vma->vm_file; + + if (vma->vm_flags & VM_REVOKED) + return false; + + if (!file || file == to_exclude) + return false; + + return file->f_path.dentry->d_inode == inode; +} + +static int __revoke_break_cow(struct task_struct *tsk, struct inode *inode, + struct file *to_exclude) +{ + struct mm_struct *mm = tsk->mm; + struct vm_area_struct *vma; + int err = 0; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + int ret; + + if (vma->vm_flags & VM_SHARED) + continue; + + if (!can_revoke_vma(vma, inode, to_exclude)) + continue; + + ret = get_user_pages(tsk, tsk->mm, vma->vm_start, + vma_pages(vma), 1, 1, NULL, NULL); + if (ret < 0) { + err = ret; + break; + } + + unlink_file_vma(vma); + fput(vma->vm_file); + vma->vm_file = NULL; + } + up_read(&mm->mmap_sem); + return err; +} + +static int revoke_break_cow(struct fileset *fset, struct inode *inode, + struct file *to_exclude) +{ + unsigned long i; + int err = 0; + + for (i = 0; i < fset->end; i++) { + struct revokefs_inode_info *info; + struct file *this; + + this = fset->files[i]; + info = revokefs_i(this->f_dentry->d_inode); + + err = __revoke_break_cow(info->owner, inode, to_exclude); + if (err) + break; + } + return err; +} + +/* + * LOCKING: down_write(&mm->mmap_sem) + * -> spin_lock(&mapping->i_mmap_lock) + */ +static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details) +{ + unsigned long restart_addr, start_addr, end_addr; + int need_break; + + start_addr = vma->vm_start; + end_addr = vma->vm_end; + + again: + restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr, + details); + + need_break = need_resched() || need_lockbreak(details->i_mmap_lock); + if (need_break) + goto out_need_break; + + if (restart_addr < end_addr) { + start_addr = restart_addr; + goto again; + } + vma->vm_flags |= VM_REVOKED; + return 0; + + out_need_break: + spin_unlock(details->i_mmap_lock); + cond_resched(); + spin_lock(details->i_mmap_lock); + return -EINTR; +} + +/* + * LOCKING: spin_lock(&mapping->i_mmap_lock) + */ +static int revoke_mm(struct mm_struct *mm, struct address_space *mapping, + struct file *to_exclude) +{ + struct vm_area_struct *vma; + struct zap_details details; + int err = 0; + + details.i_mmap_lock = &mapping->i_mmap_lock; + + /* + * If ->mmap_sem is under contention, we continue scanning other + * mms and try again later. + */ + if (!down_write_trylock(&mm->mmap_sem)) { + err = -EAGAIN; + goto out; + } + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + if (!(vma->vm_flags & VM_SHARED)) + continue; + + if (!can_revoke_vma(vma, mapping->host, to_exclude)) + continue; + + err = revoke_vma(vma, &details); + if (err) + break; + + __unlink_file_vma(vma); + fput(vma->vm_file); + vma->vm_file = NULL; + } + up_write(&mm->mmap_sem); + out: + return err; +} + +/* + * LOCKING: spin_lock(&mapping->i_mmap_lock) + */ +static void revoke_mapping_tree(struct address_space *mapping, + struct file *to_exclude) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + int try_again = 0; + + restart: + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) { + int err; + + if (!(vma->vm_flags & VM_SHARED)) + continue; + + if (likely(!can_revoke_vma(vma, mapping->host, to_exclude))) + continue; + + err = revoke_mm(vma->vm_mm, mapping, to_exclude); + if (err == -EAGAIN) + try_again = 1; + + goto restart; + } + if (try_again) { + cond_resched(); + goto restart; + } +} + +/* + * LOCKING: spin_lock(&mapping->i_mmap_lock) + */ +static void revoke_mapping_list(struct address_space *mapping, + struct file *to_exclude) +{ + struct vm_area_struct *vma; + int try_again = 0; + + restart: + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { + int err; + + if (likely(!can_revoke_vma(vma, mapping->host, to_exclude))) + continue; + + err = revoke_mm(vma->vm_mm, mapping, to_exclude); + if (err == -EAGAIN) { + try_again = 1; + continue; + } + if (err == -EINTR) + goto restart; + } + if (try_again) { + cond_resched(); + goto restart; + } +} + +static void revoke_mapping(struct address_space *mapping, struct file *to_exclude) +{ + spin_lock(&mapping->i_mmap_lock); + if (unlikely(!prio_tree_empty(&mapping->i_mmap))) + revoke_mapping_tree(mapping, to_exclude); + if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) + revoke_mapping_list(mapping, to_exclude); + spin_unlock(&mapping->i_mmap_lock); +} + +static void restore_file(struct revokefs_inode_info *info) +{ + struct files_struct *files; + + files = get_files_struct(info->owner); + if (files) { + struct fdtable *fdt; + struct file *filp; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + + filp = fdt->fd[info->fd]; + if (filp) + fput(filp); + + rcu_assign_pointer(fdt->fd[info->fd], info->file); + FD_SET(info->fd, fdt->close_on_exec); + spin_unlock(&files->file_lock); + put_files_struct(files); + } + put_task_struct(info->owner); + info->owner = NULL; /* To avoid double-restore. */ +} + +static void restore_files(struct revoke_details *details) +{ + unsigned long i; + + for (i = details->restore_start; i < details->fset->end; i++) { + struct revokefs_inode_info *info; + struct file *filp; + + filp = details->fset->files[i]; + info = revokefs_i(filp->f_dentry->d_inode); + + restore_file(info); + } +} + +static int revoke_files(struct revoke_details *details) +{ + unsigned long i; + int err = 0; + + for (i = 0; i < details->fset->end; i++) { + struct revokefs_inode_info *info; + struct file *this, *filp; + struct inode *inode; + + this = details->fset->files[i]; + inode = this->f_dentry->d_inode; + info = revokefs_i(inode); + + /* + * Increase count before attempting to close file as + * an partially closed file can no longer be restored. + */ + details->restore_start++; + filp = info->file; + err = filp->f_op->revoke(filp, inode->i_mapping); + put_task_struct(info->owner); + info->owner = NULL; /* To avoid restoring closed file. */ + if (err) + goto out; + } + out: + return err; +} + +/* + * Returns the maximum number of file descriptors pointing to an inode. + * + * LOCKING: read_lock(&tasklist_lock) + */ +static unsigned long inode_fds(struct inode *inode, struct file *to_exclude) +{ + struct task_struct *g, *p; + unsigned long nr_fds = 0; + + do_each_thread(g, p) { + struct files_struct *files; + struct fdtable *fdt; + unsigned int fd; + + files = get_files_struct(p); + if (!files) + continue; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + for (fd = 0; fd < fdt->max_fds; fd++) { + struct file *file; + + file = fcheck_files(files, fd); + if (can_revoke_file(file, inode, to_exclude)) { + nr_fds += fdt->max_fds; + break; + } + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + while_each_thread(g, p); + return nr_fds; +} + +static struct fileset *__alloc_revoke_fset(unsigned long size) +{ + struct fileset *fset; + int i; + + fset = alloc_fset(size); + if (!fset) + return NULL; + + for (i = 0; i < fset->nr; i++) { + struct file *filp; + + filp = get_revoked_file(); + if (!filp) + goto err; + + fset->files[i] = filp; + } + return fset; + err: + free_fset(fset); + return NULL; +} + +static struct fileset *alloc_revoke_fset(struct inode *inode, struct file *to_exclude) +{ + unsigned long nr_fds; + + read_lock(&tasklist_lock); + nr_fds = inode_fds(inode, to_exclude); + read_unlock(&tasklist_lock); + + return __alloc_revoke_fset(nr_fds); +} + +static int do_revoke(struct inode *inode, struct file *to_exclude) +{ + struct revoke_details details; + struct fileset *fset = NULL; + struct task_struct *g, *p; + int err = 0; + + if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) { + err = -EPERM; + goto out; + } + + retry: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* + * Pre-allocate memory because the first pass is done under + * tasklist_lock. + */ + fset = alloc_revoke_fset(inode, to_exclude); + if (!fset) { + err = -ENOMEM; + goto out; + } + + read_lock(&tasklist_lock); + + /* + * If someone forked while we were allocating memory, try again. + */ + if (inode_fds(inode, to_exclude) > fset->nr) { + read_unlock(&tasklist_lock); + free_fset(fset); + goto retry; + } + + details.fset = fset; + details.restore_start = 0; + + /* + * First revoke the descriptors. After we are done, no one can start + * new operations on them. + */ + do_each_thread(g, p) { + err = revoke_fds(p, inode, to_exclude, fset); + if (err) + goto exit_loop; + } + while_each_thread(g, p); + exit_loop: + read_unlock(&tasklist_lock); + + if (err) + goto out_restore; + + /* + * Take down shared memory mappings. + */ + revoke_mapping(inode->i_mapping, to_exclude); + + /* + * Break COW for private mappings. + */ + err = revoke_break_cow(fset, inode, to_exclude); + if (err) + goto out_restore; + + /* + * Now, revoke the files for good. + */ + err = revoke_files(&details); + if (err) + goto out_restore; + + out_free_table: + free_fset(fset); + out: + return err; + + out_restore: + restore_files(&details); + goto out_free_table; +} + +asmlinkage long sys_revokeat(int dfd, const char __user * filename) +{ + struct nameidata nd; + int err; + + err = __user_walk_fd(dfd, filename, 0, &nd); + if (!err) { + err = do_revoke(nd.dentry->d_inode, NULL); + path_release(&nd); + } + return err; +} + +asmlinkage long sys_frevoke(unsigned int fd) +{ + struct file *file = fget(fd); + int err = -EBADF; + + if (file) { + err = do_revoke(file->f_dentry->d_inode, file); + fput(file); + } + return err; +} + +int generic_file_revoke(struct file *file, struct address_space *new_mapping) +{ + struct address_space *mapping = file->f_mapping; + int err; + + /* + * Flush pending writes. + */ + err = do_fsync(file, 1); + if (err) + goto out; + + file->f_mapping = new_mapping; + + /* + * Make pending reads fail. + */ + err = invalidate_inode_pages2(mapping); + + out: + return err; +} +EXPORT_SYMBOL(generic_file_revoke); + +/* + * Filesystem for revoked files. + */ + +static struct inode *revokefs_alloc_inode(struct super_block *sb) +{ + struct revokefs_inode_info *info; + + info = kmem_cache_alloc(revokefs_inode_cache, GFP_KERNEL); + if (!info) + return NULL; + + return &info->vfs_inode; +} + +static void revokefs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(revokefs_inode_cache, revokefs_i(inode)); +} + +static struct super_operations revokefs_super_ops = { + .alloc_inode = revokefs_alloc_inode, + .destroy_inode = revokefs_destroy_inode, + .drop_inode = generic_delete_inode, +}; + +static int revokefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_pseudo(fs_type, "revoke:", &revokefs_super_ops, + REVOKEFS_MAGIC, mnt); +} + +static struct file_system_type revokefs_fs_type = { + .name = "revokefs", + .get_sb = revokefs_get_sb, + .kill_sb = kill_anon_super +}; + +static void revokefs_init_inode(void *obj, struct kmem_cache *cache, + unsigned long flags) +{ + struct revokefs_inode_info *info = obj; + + info->owner = NULL; + inode_init_once(&info->vfs_inode); +} + +static int __init revokefs_init(void) +{ + int err = -ENOMEM; + + revokefs_inode_cache = + kmem_cache_create("revokefs_inode_cache", + sizeof(struct revokefs_inode_info), + 0, + (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD), revokefs_init_inode, NULL); + if (!revokefs_inode_cache) + goto out; + + err = register_filesystem(&revokefs_fs_type); + if (err) + goto err_register; + + revokefs_mnt = kern_mount(&revokefs_fs_type); + if (IS_ERR(revokefs_mnt)) { + err = PTR_ERR(revokefs_mnt); + goto err_mnt; + } + out: + return err; + err_mnt: + unregister_filesystem(&revokefs_fs_type); + err_register: + kmem_cache_destroy(revokefs_inode_cache); + return err; +} + +late_initcall(revokefs_init); diff -Nurb linux-2.6.22-570/fs/revoked_inode.c linux-2.6.22-591/fs/revoked_inode.c --- linux-2.6.22-570/fs/revoked_inode.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/revoked_inode.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,417 @@ +/* + * fs/revoked_inode.c + * + * Copyright (C) 2007 Pekka Enberg + * + * Provide stub functions for revoked inodes. Based on fs/bad_inode.c which is + * + * Copyright (C) 1997 Stephen Tweedie + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin) +{ + return -EBADF; +} + +static ssize_t revoked_file_read(struct file *filp, char __user * buf, + size_t size, loff_t * ppos) +{ + return -EBADF; +} + +static ssize_t revoked_special_file_read(struct file *filp, char __user * buf, + size_t size, loff_t * ppos) +{ + return 0; +} + +static ssize_t revoked_file_write(struct file *filp, const char __user * buf, + size_t siz, loff_t * ppos) +{ + return -EBADF; +} + +static ssize_t revoked_file_aio_read(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return -EBADF; +} + +static ssize_t revoked_file_aio_write(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return -EBADF; +} + +static int revoked_file_readdir(struct file *filp, void *dirent, + filldir_t filldir) +{ + return -EBADF; +} + +static unsigned int revoked_file_poll(struct file *filp, poll_table * wait) +{ + return POLLERR; +} + +static int revoked_file_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -EBADF; +} + +static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd, + unsigned long arg) +{ + return -EBADF; +} + +static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return -EBADF; +} + +static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + return -EBADF; +} + +static int revoked_file_open(struct inode *inode, struct file *filp) +{ + return -EBADF; +} + +static int revoked_file_flush(struct file *file, fl_owner_t id) +{ + return filp_close(file, id); +} + +static int revoked_file_release(struct inode *inode, struct file *filp) +{ + return -EBADF; +} + +static int revoked_file_fsync(struct file *file, struct dentry *dentry, + int datasync) +{ + return -EBADF; +} + +static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync) +{ + return -EBADF; +} + +static int revoked_file_fasync(int fd, struct file *filp, int on) +{ + return -EBADF; +} + +static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl) +{ + return -EBADF; +} + +static ssize_t revoked_file_sendfile(struct file *in_file, loff_t * ppos, + size_t count, read_actor_t actor, + void *target) +{ + return -EBADF; +} + +static ssize_t revoked_file_sendpage(struct file *file, struct page *page, + int off, size_t len, loff_t * pos, + int more) +{ + return -EBADF; +} + +static unsigned long revoked_file_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + return -EBADF; +} + +static int revoked_file_check_flags(int flags) +{ + return -EBADF; +} + +static int revoked_file_dir_notify(struct file *file, unsigned long arg) +{ + return -EBADF; +} + +static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl) +{ + return -EBADF; +} + +static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t * ppos, + size_t len, unsigned int flags) +{ + return -EBADF; +} + +static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + return -EBADF; +} + +static const struct file_operations revoked_file_ops = { + .llseek = revoked_file_llseek, + .read = revoked_file_read, + .write = revoked_file_write, + .aio_read = revoked_file_aio_read, + .aio_write = revoked_file_aio_write, + .readdir = revoked_file_readdir, + .poll = revoked_file_poll, + .ioctl = revoked_file_ioctl, + .unlocked_ioctl = revoked_file_unlocked_ioctl, + .compat_ioctl = revoked_file_compat_ioctl, + .mmap = revoked_file_mmap, + .open = revoked_file_open, + .flush = revoked_file_flush, + .release = revoked_file_release, + .fsync = revoked_file_fsync, + .aio_fsync = revoked_file_aio_fsync, + .fasync = revoked_file_fasync, + .lock = revoked_file_lock, + .sendfile = revoked_file_sendfile, + .sendpage = revoked_file_sendpage, + .get_unmapped_area = revoked_file_get_unmapped_area, + .check_flags = revoked_file_check_flags, + .dir_notify = revoked_file_dir_notify, + .flock = revoked_file_flock, + .splice_write = revoked_file_splice_write, + .splice_read = revoked_file_splice_read, +}; + +static const struct file_operations revoked_special_file_ops = { + .llseek = revoked_file_llseek, + .read = revoked_special_file_read, + .write = revoked_file_write, + .aio_read = revoked_file_aio_read, + .aio_write = revoked_file_aio_write, + .readdir = revoked_file_readdir, + .poll = revoked_file_poll, + .ioctl = revoked_file_ioctl, + .unlocked_ioctl = revoked_file_unlocked_ioctl, + .compat_ioctl = revoked_file_compat_ioctl, + .mmap = revoked_file_mmap, + .open = revoked_file_open, + .flush = revoked_file_flush, + .release = revoked_file_release, + .fsync = revoked_file_fsync, + .aio_fsync = revoked_file_aio_fsync, + .fasync = revoked_file_fasync, + .lock = revoked_file_lock, + .sendfile = revoked_file_sendfile, + .sendpage = revoked_file_sendpage, + .get_unmapped_area = revoked_file_get_unmapped_area, + .check_flags = revoked_file_check_flags, + .dir_notify = revoked_file_dir_notify, + .flock = revoked_file_flock, + .splice_write = revoked_file_splice_write, + .splice_read = revoked_file_splice_read, +}; + +static int revoked_inode_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + return -EBADF; +} + +static struct dentry *revoked_inode_lookup(struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + return ERR_PTR(-EBADF); +} + +static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + return -EBADF; +} + +static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry) +{ + return -EBADF; +} + +static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + return -EBADF; +} + +static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry, + int mode) +{ + return -EBADF; +} + +static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry) +{ + return -EBADF; +} + +static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + return -EBADF; +} + +static int revoked_inode_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry) +{ + return -EBADF; +} + +static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer, + int buflen) +{ + return -EBADF; +} + +static int revoked_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + return -EBADF; +} + +static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + return -EBADF; +} + +static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs) +{ + return -EBADF; +} + +static int revoked_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EBADF; +} + +static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + return -EBADF; +} + +static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EBADF; +} + +static int revoked_inode_removexattr(struct dentry *dentry, const char *name) +{ + return -EBADF; +} + +static struct inode_operations revoked_inode_ops = { + .create = revoked_inode_create, + .lookup = revoked_inode_lookup, + .link = revoked_inode_link, + .unlink = revoked_inode_unlink, + .symlink = revoked_inode_symlink, + .mkdir = revoked_inode_mkdir, + .rmdir = revoked_inode_rmdir, + .mknod = revoked_inode_mknod, + .rename = revoked_inode_rename, + .readlink = revoked_inode_readlink, + /* follow_link must be no-op, otherwise unmounting this inode + won't work */ + /* put_link returns void */ + /* truncate returns void */ + .permission = revoked_inode_permission, + .getattr = revoked_inode_getattr, + .setattr = revoked_inode_setattr, + .setxattr = revoked_inode_setxattr, + .getxattr = revoked_inode_getxattr, + .listxattr = revoked_inode_listxattr, + .removexattr = revoked_inode_removexattr, + /* truncate_range returns void */ +}; + +static int revoked_readpage(struct file *file, struct page *page) +{ + return -EIO; +} + +static int revoked_writepage(struct page *page, struct writeback_control *wbc) +{ + return -EIO; +} + +static int revoked_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return -EIO; +} + +static int revoked_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return -EIO; +} + +static ssize_t revoked_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + return -EIO; +} + +static const struct address_space_operations revoked_aops = { + .readpage = revoked_readpage, + .writepage = revoked_writepage, + .prepare_write = revoked_prepare_write, + .commit_write = revoked_commit_write, + .direct_IO = revoked_direct_IO, +}; + +void make_revoked_inode(struct inode *inode, int mode) +{ + remove_inode_hash(inode); + + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = + current_fs_time(inode->i_sb); + inode->i_op = &revoked_inode_ops; + + if (special_file(mode)) + inode->i_fop = &revoked_special_file_ops; + else + inode->i_fop = &revoked_file_ops; + + inode->i_mapping->a_ops = &revoked_aops; +} diff -Nurb linux-2.6.22-570/fs/splice.c linux-2.6.22-591/fs/splice.c --- linux-2.6.22-570/fs/splice.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/fs/splice.c 2007-12-21 15:36:14.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include struct partial_page { unsigned int offset; @@ -932,6 +933,10 @@ if (unlikely(ret < 0)) return ret; + ret = security_file_permission(out, MAY_WRITE); + if (unlikely(ret < 0)) + return ret; + return out->f_op->splice_write(pipe, out, ppos, len, flags); } @@ -954,6 +959,10 @@ if (unlikely(ret < 0)) return ret; + ret = security_file_permission(in, MAY_READ); + if (unlikely(ret < 0)) + return ret; + return in->f_op->splice_read(in, ppos, pipe, len, flags); } @@ -1272,6 +1281,7 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { + long err; struct pipe_inode_info *pipe; struct page *pages[PIPE_BUFFERS]; struct partial_page partial[PIPE_BUFFERS]; @@ -1290,6 +1300,10 @@ else if (unlikely(!nr_segs)) return 0; + err = security_file_permission(file, MAY_WRITE); + if (unlikely(err < 0)) + return err; + spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, flags & SPLICE_F_GIFT); if (spd.nr_pages <= 0) diff -Nurb linux-2.6.22-570/fs/stack.c linux-2.6.22-591/fs/stack.c --- linux-2.6.22-570/fs/stack.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/stack.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,20 @@ +/* + * Copyright (c) 2006-2007 Erez Zadok + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek + * Copyright (c) 2006-2007 Stony Brook University + * Copyright (c) 2006-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + #include #include #include -/* does _NOT_ require i_mutex to be held. +/* + * does _NOT_ require i_mutex to be held. * * This function cannot be inlined since i_size_{read,write} is rather * heavy-weight on 32-bit systems @@ -14,7 +26,8 @@ } EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); -/* copy all attributes; get_nlinks is optional way to override the i_nlink +/* + * copy all attributes; get_nlinks is optional way to override the i_nlink * copying */ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, diff -Nurb linux-2.6.22-570/fs/sync.c linux-2.6.22-591/fs/sync.c --- linux-2.6.22-570/fs/sync.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sync.c 2007-12-21 15:36:14.000000000 -0500 @@ -174,6 +174,9 @@ * already-instantiated disk blocks, there are no guarantees here that the data * will be available after a crash. */ +/* It would be nice if people remember that not all the world's an i386 + when they introduce new system calls */ + asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { Files linux-2.6.22-570/fs/sysfs/.symlink.c.swp and linux-2.6.22-591/fs/sysfs/.symlink.c.swp differ diff -Nurb linux-2.6.22-570/fs/sysfs/bin.c linux-2.6.22-591/fs/sysfs/bin.c --- linux-2.6.22-570/fs/sysfs/bin.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sysfs/bin.c 2007-12-22 02:12:47.000000000 -0500 @@ -20,29 +20,41 @@ #include "sysfs.h" +struct bin_buffer { + struct mutex mutex; + void *buffer; + int mmapped; +}; + static int fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) { - struct bin_attribute * attr = to_bin_attr(dentry); - struct kobject * kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + int rc; + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + rc = -EIO; + if (attr->read) + rc = attr->read(kobj, attr, buffer, off, count); - if (!attr->read) - return -EIO; + sysfs_put_active_two(attr_sd); - return attr->read(kobj, buffer, off, count); + return rc; } static ssize_t -read(struct file * file, char __user * userbuf, size_t count, loff_t * off) +read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) { - char *buffer = file->private_data; + struct bin_buffer *bb = file->private_data; struct dentry *dentry = file->f_path.dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; - int ret; - - if (count > PAGE_SIZE) - count = PAGE_SIZE; + int count = min_t(size_t, bytes, PAGE_SIZE); if (size) { if (offs > size) @@ -51,43 +63,56 @@ count = size - offs; } - ret = fill_read(dentry, buffer, offs, count); - if (ret < 0) - return ret; - count = ret; + mutex_lock(&bb->mutex); - if (copy_to_user(userbuf, buffer, count)) - return -EFAULT; + count = fill_read(dentry, bb->buffer, offs, count); + if (count < 0) + goto out_unlock; + + if (copy_to_user(userbuf, bb->buffer, count)) { + count = -EFAULT; + goto out_unlock; + } - pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); + pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); *off = offs + count; + out_unlock: + mutex_unlock(&bb->mutex); return count; } static int flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) { - struct bin_attribute *attr = to_bin_attr(dentry); - struct kobject *kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + int rc; - if (!attr->write) - return -EIO; + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; - return attr->write(kobj, buffer, offset, count); + rc = -EIO; + if (attr->write) + rc = attr->write(kobj, attr, buffer, offset, count); + + sysfs_put_active_two(attr_sd); + + return rc; } -static ssize_t write(struct file * file, const char __user * userbuf, - size_t count, loff_t * off) +static ssize_t write(struct file *file, const char __user *userbuf, + size_t bytes, loff_t *off) { - char *buffer = file->private_data; + struct bin_buffer *bb = file->private_data; struct dentry *dentry = file->f_path.dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; + int count = min_t(size_t, bytes, PAGE_SIZE); - if (count > PAGE_SIZE) - count = PAGE_SIZE; if (size) { if (offs > size) return 0; @@ -95,72 +120,100 @@ count = size - offs; } - if (copy_from_user(buffer, userbuf, count)) - return -EFAULT; + mutex_lock(&bb->mutex); - count = flush_write(dentry, buffer, offs, count); + if (copy_from_user(bb->buffer, userbuf, count)) { + count = -EFAULT; + goto out_unlock; + } + + count = flush_write(dentry, bb->buffer, offs, count); if (count > 0) *off = offs + count; + + out_unlock: + mutex_unlock(&bb->mutex); return count; } static int mmap(struct file *file, struct vm_area_struct *vma) { - struct dentry *dentry = file->f_path.dentry; - struct bin_attribute *attr = to_bin_attr(dentry); - struct kobject *kobj = to_kobj(dentry->d_parent); + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + int rc; + + mutex_lock(&bb->mutex); + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + rc = -EINVAL; + if (attr->mmap) + rc = attr->mmap(kobj, attr, vma); + + if (rc == 0 && !bb->mmapped) + bb->mmapped = 1; + else + sysfs_put_active_two(attr_sd); - if (!attr->mmap) - return -EINVAL; + mutex_unlock(&bb->mutex); - return attr->mmap(kobj, attr, vma); + return rc; } static int open(struct inode * inode, struct file * file) { - struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); - struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); - int error = -EINVAL; - - if (!kobj || !attr) - goto Done; - - /* Grab the module reference for this attribute if we have one */ - error = -ENODEV; - if (!try_module_get(attr->attr.owner)) - goto Done; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; + struct bin_buffer *bb = NULL; + int error; + + /* need attr_sd for attr */ + if (!sysfs_get_active(attr_sd)) + return -ENODEV; error = -EACCES; if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) - goto Error; + goto err_out; if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) - goto Error; + goto err_out; error = -ENOMEM; - file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!file->private_data) - goto Error; - - error = 0; - goto Done; - - Error: - module_put(attr->attr.owner); - Done: - if (error) - kobject_put(kobj); + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) + goto err_out; + + bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!bb->buffer) + goto err_out; + + mutex_init(&bb->mutex); + file->private_data = bb; + + /* open succeeded, put active reference and pin attr_sd */ + sysfs_put_active(attr_sd); + sysfs_get(attr_sd); + return 0; + + err_out: + sysfs_put_active(attr_sd); + kfree(bb); return error; } static int release(struct inode * inode, struct file * file) { - struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent); - struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); - u8 * buffer = file->private_data; - - kobject_put(kobj); - module_put(attr->attr.owner); - kfree(buffer); + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_buffer *bb = file->private_data; + + if (bb->mmapped) + sysfs_put_active_two(attr_sd); + sysfs_put(attr_sd); + kfree(bb->buffer); + kfree(bb); return 0; } @@ -181,9 +234,9 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - BUG_ON(!kobj || !kobj->dentry || !attr); + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR); + return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); } @@ -195,7 +248,7 @@ void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { + if (sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name) < 0) { printk(KERN_ERR "%s: " "bad dentry or inode or no such file: \"%s\"\n", __FUNCTION__, attr->attr.name); diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c linux-2.6.22-591/fs/sysfs/dir.c --- linux-2.6.22-570/fs/sysfs/dir.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sysfs/dir.c 2007-12-23 01:58:30.000000000 -0500 @@ -9,21 +9,442 @@ #include #include #include +#include +#include #include #include "sysfs.h" -DECLARE_RWSEM(sysfs_rename_sem); -spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED; +static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd); + +DEFINE_MUTEX(sysfs_mutex); +spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; + +static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_IDA(sysfs_ino_ida); + +static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target) +{ + /* Find the shadow directory for the specified tag */ + struct sysfs_dirent *sd; + + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { + if (sd->s_name != target) + continue; + break; + } + return sd; +} + +static const void *find_shadow_tag(struct kobject *kobj) +{ + /* Find the tag the current kobj is cached with */ + return kobj->sd->s_parent->s_name; +} + +/** + * sysfs_link_sibling - link sysfs_dirent into sibling list + * @sd: sysfs_dirent of interest + * + * Link @sd into its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ + +/** + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * @sd: sysfs_dirent of interest + * + * Unlink @sd from its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ + +void sysfs_link_sibling(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *parent_sd = sd->s_parent; + + BUG_ON(sd->s_sibling); + sd->s_sibling = parent_sd->s_children; + parent_sd->s_children = sd; +} +/** + * sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sd: sysfs_dirent of interest + * + * Get dentry for @sd. Dentry is looked up if currently not + * present. This function climbs sysfs_dirent tree till it + * reaches a sysfs_dirent with valid dentry attached and descends + * down from there looking up dentry for each step. + * + * LOCKING: + * Kernel thread context (may sleep) + * + * RETURNS: + * Pointer to found dentry on success, ERR_PTR() value on error. + */ +struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *cur; + struct dentry *parent_dentry, *dentry; + int i, depth; + + /* Find the first parent which has valid s_dentry and get the + * dentry. + */ + mutex_lock(&sysfs_mutex); + restart0: + spin_lock(&sysfs_assoc_lock); + restart1: + spin_lock(&dcache_lock); + + dentry = NULL; + depth = 0; + cur = sd; + while (!cur->s_dentry || !cur->s_dentry->d_inode) { + if (cur->s_flags & SYSFS_FLAG_REMOVED) { + dentry = ERR_PTR(-ENOENT); + depth = 0; + break; + } + cur = cur->s_parent; + depth++; + } + if (!IS_ERR(dentry)) + dentry = dget_locked(cur->s_dentry); + + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_assoc_lock); + + /* from the found dentry, look up depth times */ + while (depth--) { + /* find and get depth'th ancestor */ + for (cur = sd, i = 0; cur && i < depth; i++) + cur = cur->s_parent; + + /* This can happen if tree structure was modified due + * to move/rename. Restart. + */ + if (i != depth) { + dput(dentry); + goto restart0; + } + + sysfs_get(cur); + + mutex_unlock(&sysfs_mutex); + + /* look it up */ + parent_dentry = dentry; + dentry = lookup_one_len_kern(cur->s_name, parent_dentry, + strlen(cur->s_name)); + dput(parent_dentry); + + if (IS_ERR(dentry)) { + sysfs_put(cur); + return dentry; + } + + mutex_lock(&sysfs_mutex); + spin_lock(&sysfs_assoc_lock); + + /* This, again, can happen if tree structure has + * changed and we looked up the wrong thing. Restart. + */ + if (cur->s_dentry != dentry) { + dput(dentry); + sysfs_put(cur); + goto restart1; + } + + spin_unlock(&sysfs_assoc_lock); + + sysfs_put(cur); + } + + mutex_unlock(&sysfs_mutex); + return dentry; +} + +/** + * sysfs_link_sibling - link sysfs_dirent into sibling list + * @sd: sysfs_dirent of interest + * + * Link @sd into its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) + */ + +/** + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * @sd: sysfs_dirent of interest + * + * Unlink @sd from its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) + */ +void sysfs_unlink_sibling(struct sysfs_dirent *sd) +{ + struct sysfs_dirent **pos; + + for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) { + if (*pos == sd) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + break; + } + } +} + +/** + * sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sd: sysfs_dirent of interest + * + * Get dentry for @sd. Dentry is looked up if currently not + * present. This function climbs sysfs_dirent tree till it + * reaches a sysfs_dirent with valid dentry attached and descends + * down from there looking up dentry for each step. + * + * LOCKING: + * Kernel thread context (may sleep) + * + * RETURNS: + * Pointer to found dentry on success, ERR_PTR() value on error. + */ + +/** + * sysfs_get_active - get an active reference to sysfs_dirent + * @sd: sysfs_dirent to get an active reference to + * + * Get an active reference of @sd. This function is noop if @sd + * is NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +/** + * sysfs_put_active - put an active reference to sysfs_dirent + * @sd: sysfs_dirent to put an active reference to + * + * Put an active reference to @sd. This function is noop if @sd + * is NULL. + */ +void sysfs_put_active(struct sysfs_dirent *sd) +{ + struct completion *cmpl; + int v; + + if (unlikely(!sd)) + return; + + v = atomic_dec_return(&sd->s_active); + if (likely(v != SD_DEACTIVATED_BIAS)) + return; + + /* atomic_dec_return() is a mb(), we'll always see the updated + * sd->s_sibling. + */ + cmpl = (void *)sd->s_sibling; + complete(cmpl); +} + +/** + * sysfs_get_active_two - get active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Get active reference to @sd and its parent. Parent's active + * reference is grabbed first. This function is noop if @sd is + * NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd) +{ + if (sd) { + if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent))) + return NULL; + if (unlikely(!sysfs_get_active(sd))) { + sysfs_put_active(sd->s_parent); + return NULL; + } + } + return sd; +} + +/** + * sysfs_put_active_two - put active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Put active references to @sd and its parent. This function is + * noop if @sd is NULL. + */ +void sysfs_put_active_two(struct sysfs_dirent *sd) +{ + if (sd) { + sysfs_put_active(sd); + sysfs_put_active(sd->s_parent); + } +} + +/** + * sysfs_deactivate - deactivate sysfs_dirent + * @sd: sysfs_dirent to deactivate + * + * Deny new active references and drain existing ones. + */ +static void sysfs_deactivate(struct sysfs_dirent *sd) +{ + DECLARE_COMPLETION_ONSTACK(wait); + int v; + + BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); + sd->s_sibling = (void *)&wait; + + /* atomic_add_return() is a mb(), put_active() will always see + * the updated sd->s_sibling. + */ + v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); + + if (v != SD_DEACTIVATED_BIAS) + wait_for_completion(&wait); + + sd->s_sibling = NULL; +} + +/** + * sysfs_get_active - get an active reference to sysfs_dirent + * @sd: sysfs_dirent to get an active reference to + * + * Get an active reference of @sd. This function is noop if @sd + * is NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) +{ + if (unlikely(!sd)) + return NULL; + + while (1) { + int v, t; + + v = atomic_read(&sd->s_active); + if (unlikely(v < 0)) + return NULL; + + t = atomic_cmpxchg(&sd->s_active, v, v + 1); + if (likely(t == v)) + return sd; + if (t < 0) + return NULL; + + cpu_relax(); + } +} +/** + * sysfs_put_active - put an active reference to sysfs_dirent + * @sd: sysfs_dirent to put an active reference to + * + * Put an active reference to @sd. This function is noop if @sd + * is NULL. + */ + +/** + * sysfs_get_active_two - get active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Get active reference to @sd and its parent. Parent's active + * reference is grabbed first. This function is noop if @sd is + * NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ + +/** + * sysfs_put_active_two - put active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Put active references to @sd and its parent. This function is + * noop if @sd is NULL. + */ + +/** + * sysfs_deactivate - deactivate sysfs_dirent + * @sd: sysfs_dirent to deactivate + * + * Deny new active references and drain existing ones. s_active + * will be unlocked when the sysfs_dirent is released. + */ + +static int sysfs_alloc_ino(ino_t *pino) +{ + int ino, rc; + + retry: + spin_lock(&sysfs_ino_lock); + rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); + spin_unlock(&sysfs_ino_lock); + + if (rc == -EAGAIN) { + if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) + goto retry; + rc = -ENOMEM; + } + + *pino = ino; + return rc; +} + +static void sysfs_free_ino(ino_t ino) +{ + spin_lock(&sysfs_ino_lock); + ida_remove(&sysfs_ino_ida, ino); + spin_unlock(&sysfs_ino_lock); +} + +void release_sysfs_dirent(struct sysfs_dirent * sd) +{ + struct sysfs_dirent *parent_sd; + + repeat: + /* Moving/renaming is always done while holding reference. + * sd->s_parent won't change beneath us. + */ + parent_sd = sd->s_parent; + + if (sysfs_type(sd) == SYSFS_KOBJ_LINK) + sysfs_put(sd->s_elem.symlink.target_sd); + if (sysfs_type(sd) & SYSFS_COPY_NAME) + kfree(sd->s_name); + kfree(sd->s_iattr); + if (sysfs_type(sd) != SYSFS_SHADOW_DIR) + sysfs_free_ino(sd->s_ino); + kmem_cache_free(sysfs_dir_cachep, sd); + + sd = parent_sd; + if (sd && atomic_dec_and_test(&sd->s_count)) + goto repeat; +} static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) { struct sysfs_dirent * sd = dentry->d_fsdata; if (sd) { - /* sd->s_dentry is protected with sysfs_lock. This - * allows sysfs_drop_dentry() to dereference it. + /* sd->s_dentry is protected with sysfs_assoc_lock. + * This allows sysfs_drop_dentry() to dereference it. */ - spin_lock(&sysfs_lock); + spin_lock(&sysfs_assoc_lock); /* The dentry might have been deleted or another * lookup could have happened updating sd->s_dentry to @@ -32,7 +453,7 @@ */ if (sd->s_dentry == dentry) sd->s_dentry = NULL; - spin_unlock(&sysfs_lock); + spin_unlock(&sysfs_assoc_lock); sysfs_put(sd); } iput(inode); @@ -42,344 +463,594 @@ .d_iput = sysfs_d_iput, }; -static unsigned int sysfs_inode_counter; -ino_t sysfs_get_inum(void) +struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) { - if (unlikely(sysfs_inode_counter < 3)) - sysfs_inode_counter = 3; - return sysfs_inode_counter++; -} + char *dup_name = NULL; + struct sysfs_dirent *sd = NULL; -/* - * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent - */ -static struct sysfs_dirent * __sysfs_new_dirent(void * element) -{ - struct sysfs_dirent * sd; + if (type & SYSFS_COPY_NAME) { + name = dup_name = kstrdup(name, GFP_KERNEL); + if (!name) + goto err_out; + } sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); if (!sd) - return NULL; + goto err_out; + + if (sysfs_alloc_ino(&sd->s_ino)) + goto err_out; - sd->s_ino = sysfs_get_inum(); atomic_set(&sd->s_count, 1); + atomic_set(&sd->s_active, 0); atomic_set(&sd->s_event, 1); - INIT_LIST_HEAD(&sd->s_children); - INIT_LIST_HEAD(&sd->s_sibling); - sd->s_element = element; + + sd->s_name = name; + sd->s_mode = mode; + sd->s_flags = type; return sd; + + err_out: + kfree(dup_name); + kmem_cache_free(sysfs_dir_cachep, sd); + return NULL; } -static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, - struct sysfs_dirent *sd) +/** + * sysfs_attach_dentry - associate sysfs_dirent with dentry + * @sd: target sysfs_dirent + * @dentry: dentry to associate + * + * Associate @sd with @dentry. This is protected by + * sysfs_assoc_lock to avoid race with sysfs_d_iput(). + * + * LOCKING: + * mutex_lock(sysfs_mutex) + */ +static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry) { - if (sd) - list_add(&sd->s_sibling, &parent_sd->s_children); + dentry->d_op = &sysfs_dentry_ops; + dentry->d_fsdata = sysfs_get(sd); + + /* protect sd->s_dentry against sysfs_d_iput */ + spin_lock(&sysfs_assoc_lock); + sd->s_dentry = dentry; + spin_unlock(&sysfs_assoc_lock); + + if (dentry->d_flags & DCACHE_UNHASHED) + d_rehash(dentry); } -static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, - void * element) +static int sysfs_ilookup_test(struct inode *inode, void *arg) { - struct sysfs_dirent *sd; - sd = __sysfs_new_dirent(element); - __sysfs_list_dirent(parent_sd, sd); - return sd; + struct sysfs_dirent *sd = arg; + return inode->i_ino == sd->s_ino; } -/* +/** + * sysfs_addrm_start - prepare for sysfs_dirent add/remove + * @acxt: pointer to sysfs_addrm_cxt to be used + * @parent_sd: parent sysfs_dirent * - * Return -EEXIST if there is already a sysfs element with the same name for - * the same parent. + * This function is called when the caller is about to add or + * remove sysfs_dirent under @parent_sd. This function acquires + * sysfs_mutex, grabs inode for @parent_sd if available and lock + * i_mutex of it. @acxt is used to keep and pass context to + * other addrm functions. * - * called with parent inode's i_mutex held + * LOCKING: + * Kernel thread context (may sleep). sysfs_mutex is locked on + * return. i_mutex of parent inode is locked on return if + * available. */ -int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, - const unsigned char *new) +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd) { - struct sysfs_dirent * sd; + struct inode *inode; - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_element) { - const unsigned char *existing = sysfs_get_name(sd); - if (strcmp(existing, new)) - continue; - else - return -EEXIST; - } + memset(acxt, 0, sizeof(*acxt)); + acxt->parent_sd = parent_sd; + + /* Lookup parent inode. inode initialization and I_NEW + * clearing are protected by sysfs_mutex. By grabbing it and + * looking up with _nowait variant, inode state can be + * determined reliably. + */ + mutex_lock(&sysfs_mutex); + + inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, + parent_sd); + + if (inode && !(inode->i_state & I_NEW)) { + /* parent inode available */ + acxt->parent_inode = inode; + + /* sysfs_mutex is below i_mutex in lock hierarchy. + * First, trylock i_mutex. If fails, unlock + * sysfs_mutex and lock them in order. + */ + if (!mutex_trylock(&inode->i_mutex)) { + mutex_unlock(&sysfs_mutex); + mutex_lock(&inode->i_mutex); + mutex_lock(&sysfs_mutex); } + } else + iput(inode); +} - return 0; +/** + * sysfs_add_one - add sysfs_dirent to parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Get @acxt->parent_sd and set sd->s_parent to it and increment + * nlink of parent inode if @sd is a directory. @sd is NOT + * linked into the children list of the parent. The caller + * should invoke sysfs_link_sibling() after this function + * completes if @sd needs to be on the children list. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + sd->s_parent = sysfs_get(acxt->parent_sd); + + if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) + inc_nlink(acxt->parent_inode); + + acxt->cnt++; } +/** + * sysfs_remove_one - remove sysfs_dirent from parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Mark @sd removed and drop nlink of parent inode if @sd is a + * directory. @sd is NOT unlinked from the children list of the + * parent. The caller is repsonsible for removing @sd from the + * children list before calling this function. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED)); + + sd->s_flags |= SYSFS_FLAG_REMOVED; + sd->s_sibling = acxt->removed; + acxt->removed = sd; + + if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) + drop_nlink(acxt->parent_inode); + + acxt->cnt++; +} -static struct sysfs_dirent * -__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) +/** + * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent + * @sd: target sysfs_dirent + * + * Drop dentry for @sd. @sd must have been unlinked from its + * parent on entry to this function such that it can't be looked + * up anymore. + * + * @sd->s_dentry which is protected with sysfs_assoc_lock points + * to the currently associated dentry but we're not holding a + * reference to it and racing with dput(). Grab dcache_lock and + * verify dentry before dropping it. If @sd->s_dentry is NULL or + * dput() beats us, no need to bother. + */ +static void sysfs_drop_dentry(struct sysfs_dirent *sd) { - struct sysfs_dirent * sd; + struct dentry *dentry = NULL; + struct inode *inode; - sd = __sysfs_new_dirent(element); - if (!sd) - goto out; + /* We're not holding a reference to ->s_dentry dentry but the + * field will stay valid as long as sysfs_assoc_lock is held. + */ + spin_lock(&sysfs_assoc_lock); + spin_lock(&dcache_lock); - sd->s_mode = mode; - sd->s_type = type; - sd->s_dentry = dentry; - if (dentry) { - dentry->d_fsdata = sysfs_get(sd); - dentry->d_op = &sysfs_dentry_ops; - } + /* drop dentry if it's there and dput() didn't kill it yet */ + if (sd->s_dentry && sd->s_dentry->d_inode) { + dentry = dget_locked(sd->s_dentry); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + } + + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_assoc_lock); + + /* dentries for shadowed directories are pinned, unpin */ + if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) || + (sd->s_flags & SYSFS_FLAG_SHADOWED)) + dput(dentry); + dput(dentry); -out: - return sd; + /* adjust nlink and update timestamp */ + inode = ilookup(sysfs_sb, sd->s_ino); + if (inode) { + mutex_lock(&inode->i_mutex); + + inode->i_ctime = CURRENT_TIME; + drop_nlink(inode); + if (sysfs_type(sd) == SYSFS_DIR) + drop_nlink(inode); + + mutex_unlock(&inode->i_mutex); + iput(inode); + } } -int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, - void * element, umode_t mode, int type) +/** + * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent + * @sd: target sysfs_dirent + * + * Drop dentry for @sd. @sd must have been unlinked from its + * parent on entry to this function such that it can't be looked + * up anymore. + * + * @sd->s_dentry which is protected with sysfs_assoc_lock points + * to the currently associated dentry but we're not holding a + * reference to it and racing with dput(). Grab dcache_lock and + * verify dentry before dropping it. If @sd->s_dentry is NULL or + * dput() beats us, no need to bother. + */ + + +/** + * sysfs_addrm_finish - finish up sysfs_dirent add/remove + * @acxt: addrm context to finish up + * + * Finish up sysfs_dirent add/remove. Resources acquired by + * sysfs_addrm_start() are released and removed sysfs_dirents are + * cleaned up. Timestamps on the parent inode are updated. + * + * LOCKING: + * All mutexes acquired by sysfs_addrm_start() are released. + * + * RETURNS: + * Number of added/removed sysfs_dirents since sysfs_addrm_start(). + */ +int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) { - struct sysfs_dirent *sd; + /* release resources acquired by sysfs_addrm_start() */ + mutex_unlock(&sysfs_mutex); + if (acxt->parent_inode) { + struct inode *inode = acxt->parent_inode; + + /* if added/removed, update timestamps on the parent */ + if (acxt->cnt) + inode->i_ctime = inode->i_mtime = CURRENT_TIME; - sd = __sysfs_make_dirent(dentry, element, mode, type); - __sysfs_list_dirent(parent_sd, sd); + mutex_unlock(&inode->i_mutex); + iput(inode); + } - return sd ? 0 : -ENOMEM; + /* kill removed sysfs_dirents */ + while (acxt->removed) { + struct sysfs_dirent *sd = acxt->removed; + + acxt->removed = sd->s_sibling; + sd->s_sibling = NULL; + + sysfs_prune_shadow_sd(sd->s_parent); + sysfs_drop_dentry(sd); + sysfs_deactivate(sd); + sysfs_put(sd); + } + + return acxt->cnt; } -static int init_dir(struct inode * inode) +/** + * sysfs_find_dirent - find sysfs_dirent with the given name + * @parent_sd: sysfs_dirent to search under + * @name: name to look for + * + * Look for sysfs_dirent with name @name under @parent_sd. + * + * LOCKING: + * mutex_lock(sysfs_mutex) + * + * RETURNS: + * Pointer to sysfs_dirent if found, NULL if not. + */ +struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) { - inode->i_op = &sysfs_dir_inode_operations; - inode->i_fop = &sysfs_dir_operations; + struct sysfs_dirent *sd; - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - return 0; + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) + if (sysfs_type(sd) && !strcmp(sd->s_name, name)) + return sd; + return NULL; } -static int init_file(struct inode * inode) +/** + * sysfs_get_dirent - find and get sysfs_dirent with the given name + * @parent_sd: sysfs_dirent to search under + * @name: name to look for + * + * Look for sysfs_dirent with name @name under @parent_sd and get + * it if found. + * + * LOCKING: + * Kernel thread context (may sleep). Grabs sysfs_mutex. + * + * RETURNS: + * Pointer to sysfs_dirent if found, NULL if not. + */ +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) { - inode->i_size = PAGE_SIZE; - inode->i_fop = &sysfs_file_operations; - return 0; -} + struct sysfs_dirent *sd; -static int init_symlink(struct inode * inode) -{ - inode->i_op = &sysfs_symlink_inode_operations; - return 0; + mutex_lock(&sysfs_mutex); + sd = sysfs_find_dirent(parent_sd, name); + sysfs_get(sd); + mutex_unlock(&sysfs_mutex); + + return sd; } -static int create_dir(struct kobject * k, struct dentry * p, - const char * n, struct dentry ** d) +static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, + const char *name, struct sysfs_dirent **p_sd) { - int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + int err; - mutex_lock(&p->d_inode->i_mutex); - *d = lookup_one_len(n, p, strlen(n)); - if (!IS_ERR(*d)) { - if (sysfs_dirent_exist(p->d_fsdata, n)) - error = -EEXIST; - else - error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, - SYSFS_DIR); - if (!error) { - error = sysfs_create(*d, mode, init_dir); - if (!error) { - inc_nlink(p->d_inode); - (*d)->d_op = &sysfs_dentry_ops; - d_rehash(*d); - } + /* allocate */ + sd = sysfs_new_dirent(name, mode, SYSFS_DIR); + if (!sd) + return -ENOMEM; + sd->s_elem.dir.kobj = kobj; + + /* link in */ + sysfs_addrm_start(&acxt, parent_sd); + err = -ENOENT; + if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) + goto addrm_finish; + + err = -EEXIST; + if (!sysfs_find_dirent(acxt.parent_sd, name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + err = 0; + } +addrm_finish: + if (sysfs_addrm_finish(&acxt)) { + *p_sd = sd; + return 0; } - if (error && (error != -EEXIST)) { - struct sysfs_dirent *sd = (*d)->d_fsdata; - if (sd) { - list_del_init(&sd->s_sibling); + sysfs_put(sd); - } - d_drop(*d); - } - dput(*d); - } else - error = PTR_ERR(*d); - mutex_unlock(&p->d_inode->i_mutex); - return error; + return err; } - -int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) +int sysfs_create_subdir(struct kobject *kobj, const char *name, + struct sysfs_dirent **p_sd) { - return create_dir(k,k->dentry,n,d); + return create_dir(kobj, kobj->sd, name, p_sd); } /** * sysfs_create_dir - create a directory for an object. * @kobj: object we're creating directory for. - * @shadow_parent: parent parent object. */ - -int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) +int sysfs_create_dir(struct kobject * kobj) { - struct dentry * dentry = NULL; - struct dentry * parent; + struct sysfs_dirent *parent_sd, *sd; int error = 0; BUG_ON(!kobj); - if (shadow_parent) - parent = shadow_parent; - else if (kobj->parent) - parent = kobj->parent->dentry; + if (kobj->parent) + parent_sd = kobj->parent->sd; else if (sysfs_mount && sysfs_mount->mnt_sb) - parent = sysfs_mount->mnt_sb->s_root; + parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; else return -EFAULT; - error = create_dir(kobj,parent,kobject_name(kobj),&dentry); + error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); if (!error) - kobj->dentry = dentry; + kobj->sd = sd; return error; } -/* attaches attribute's sysfs_dirent to the dentry corresponding to the - * attribute file - */ -static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) +static int sysfs_count_nlink(struct sysfs_dirent *sd) { - struct attribute * attr = NULL; - struct bin_attribute * bin_attr = NULL; - int (* init) (struct inode *) = NULL; - int error = 0; + struct sysfs_dirent *child; + int nr = 0; - if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) { - bin_attr = sd->s_element; - attr = &bin_attr->attr; - } else { - attr = sd->s_element; - init = init_file; - } + for (child = sd->s_children; child; child = child->s_sibling) + if (sysfs_type(child) == SYSFS_DIR) + nr++; + return nr + 2; +} - dentry->d_fsdata = sysfs_get(sd); - /* protect sd->s_dentry against sysfs_d_iput */ - spin_lock(&sysfs_lock); - sd->s_dentry = dentry; - spin_unlock(&sysfs_lock); - error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); - if (error) { - sysfs_put(sd); - return error; - } +static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd; + struct bin_attribute *bin_attr; + struct inode *inode; + int found = 0; - if (bin_attr) { - dentry->d_inode->i_size = bin_attr->size; - dentry->d_inode->i_fop = &bin_fops; + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { + if (sysfs_type(sd) && + !strcmp(sd->s_name, dentry->d_name.name)) { + found = 1; + break; + } } - dentry->d_op = &sysfs_dentry_ops; - d_rehash(dentry); - return 0; -} + /* no such entry */ + if (!found) + return NULL; -static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) -{ - int err = 0; + /* attach dentry and inode */ + inode = sysfs_get_inode(sd); + if (!inode) + return ERR_PTR(-ENOMEM); + + mutex_lock(&sysfs_mutex); + + if (inode->i_state & I_NEW) { + /* initialize inode according to type */ + switch (sysfs_type(sd)) { + case SYSFS_DIR: + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; + inode->i_nlink = sysfs_count_nlink(sd); + break; + case SYSFS_KOBJ_ATTR: + inode->i_size = PAGE_SIZE; + inode->i_fop = &sysfs_file_operations; + break; + case SYSFS_KOBJ_BIN_ATTR: + bin_attr = sd->s_elem.bin_attr.bin_attr; + inode->i_size = bin_attr->size; + inode->i_fop = &bin_fops; + break; + case SYSFS_KOBJ_LINK: + inode->i_op = &sysfs_symlink_inode_operations; + break; + default: + BUG(); + } + } - dentry->d_fsdata = sysfs_get(sd); - /* protect sd->s_dentry against sysfs_d_iput */ - spin_lock(&sysfs_lock); - sd->s_dentry = dentry; - spin_unlock(&sysfs_lock); - err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink); - if (!err) { - dentry->d_op = &sysfs_dentry_ops; - d_rehash(dentry); - } else - sysfs_put(sd); + sysfs_instantiate(dentry, inode); + sysfs_attach_dentry(sd, dentry); - return err; + mutex_unlock(&sysfs_mutex); + + return NULL; } -static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) +static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; - struct sysfs_dirent * sd; - int err = 0; + struct sysfs_dirent *sd; + struct dentry *dest; - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_type & SYSFS_NOT_PINNED) { - const unsigned char * name = sysfs_get_name(sd); + sd = dentry->d_fsdata; + dest = NULL; + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const struct shadow_dir_operations *shadow_ops; + const void *tag; - if (strcmp(name, dentry->d_name.name)) - continue; + mutex_lock(&sysfs_mutex); - if (sd->s_type & SYSFS_KOBJ_LINK) - err = sysfs_attach_link(sd, dentry); - else - err = sysfs_attach_attr(sd, dentry); - break; - } + shadow_ops = dentry->d_inode->i_private; + tag = shadow_ops->current_tag(); + + sd = find_shadow_sd(sd, tag); + if (sd) + dest = sd->s_dentry; + dget(dest); + + mutex_unlock(&sysfs_mutex); } + if (!dest) + dest = dget(dentry); + dput(nd->dentry); + nd->dentry = dest; - return ERR_PTR(err); + return NULL; } + const struct inode_operations sysfs_dir_inode_operations = { .lookup = sysfs_lookup, .setattr = sysfs_setattr, + .follow_link = sysfs_shadow_follow_link, }; -static void remove_dir(struct dentry * d) +static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { - struct dentry * parent = dget(d->d_parent); - struct sysfs_dirent * sd; + sysfs_unlink_sibling(sd); + sysfs_remove_one(acxt, sd); +} - mutex_lock(&parent->d_inode->i_mutex); - d_delete(d); - sd = d->d_fsdata; - list_del_init(&sd->s_sibling); - sysfs_put(sd); - if (d->d_inode) - simple_rmdir(parent->d_inode,d); +static void remove_dir(struct sysfs_dirent *sd) +{ + struct sysfs_addrm_cxt acxt; - pr_debug(" o %s removing done (%d)\n",d->d_name.name, - atomic_read(&d->d_count)); + sysfs_addrm_start(&acxt, sd->s_parent); + __remove_dir(&acxt, sd); + sysfs_addrm_finish(&acxt); +} - mutex_unlock(&parent->d_inode->i_mutex); - dput(parent); +void sysfs_remove_subdir(struct sysfs_dirent *sd) +{ + remove_dir(sd); } -void sysfs_remove_subdir(struct dentry * d) +static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *dir_sd) { - remove_dir(d); + struct sysfs_dirent **pos; + + pos = &dir_sd->s_children; + while (*pos) { + struct sysfs_dirent *sd = *pos; + + if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + sysfs_remove_one(acxt, sd); + } else + pos = &(*pos)->s_sibling; + } } +static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt, + struct sysfs_dirent *dir_sd) +{ + struct sysfs_dirent **pos; + + pos = &dir_sd->s_children; + while (*pos) { + struct sysfs_dirent *sd = *pos; + + sysfs_empty_dir(acxt, sd); + __remove_dir(acxt, sd); + } +} -static void __sysfs_remove_dir(struct dentry *dentry) +static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) { - struct sysfs_dirent * parent_sd; - struct sysfs_dirent * sd, * tmp; + struct sysfs_addrm_cxt acxt; - dget(dentry); - if (!dentry) + if (!dir_sd) return; - pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); - mutex_lock(&dentry->d_inode->i_mutex); - parent_sd = dentry->d_fsdata; - list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { - if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) - continue; - list_del_init(&sd->s_sibling); - sysfs_drop_dentry(sd, dentry); - sysfs_put(sd); - } - mutex_unlock(&dentry->d_inode->i_mutex); + pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); + sysfs_addrm_start(&acxt, dir_sd); + if (sysfs_type(dir_sd) == SYSFS_DIR) + sysfs_empty_dir(&acxt, dir_sd); + else + sysfs_remove_shadows(&acxt, dir_sd); + sysfs_addrm_finish(&acxt); - remove_dir(dentry); - /** - * Drop reference from dget() on entrance. - */ - dput(dentry); + remove_dir(dir_sd); } /** @@ -393,102 +1064,154 @@ void sysfs_remove_dir(struct kobject * kobj) { - __sysfs_remove_dir(kobj->dentry); - kobj->dentry = NULL; + struct sysfs_dirent *sd = kobj->sd; + + spin_lock(&sysfs_assoc_lock); + kobj->sd = NULL; + spin_unlock(&sysfs_assoc_lock); + + __sysfs_remove_dir(sd); } -int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, - const char *new_name) +int sysfs_rename_dir(struct kobject * kobj, const char *new_name) { - int error = 0; - struct dentry * new_dentry; + struct dentry *old_dentry, *new_dentry, *parent; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + const char *dup_name; + int error; - if (!new_parent) - return -EFAULT; + dup_name = NULL; + new_dentry = NULL; - down_write(&sysfs_rename_sem); - mutex_lock(&new_parent->d_inode->i_mutex); + sd = kobj->sd; + sysfs_addrm_start(&acxt, sd->s_parent); + error = -ENOENT; + if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) + goto addrm_finish; + + error = -EEXIST; + if (sysfs_find_dirent(acxt.parent_sd, new_name)) + goto addrm_finish; - new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); - if (!IS_ERR(new_dentry)) { - /* By allowing two different directories with the - * same d_parent we allow this routine to move - * between different shadows of the same directory - */ - if (kobj->dentry->d_parent->d_inode != new_parent->d_inode) - return -EINVAL; - else if (new_dentry->d_parent->d_inode != new_parent->d_inode) error = -EINVAL; - else if (new_dentry == kobj->dentry) + if ((sd->s_parent == acxt.parent_sd) && + (strcmp(new_name, sd->s_name) == 0)) + goto addrm_finish; + + old_dentry = sd->s_dentry; + parent = acxt.parent_sd->s_dentry; + if (old_dentry) { + old_dentry = sd->s_dentry; + parent = acxt.parent_sd->s_dentry; + new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); + if (IS_ERR(new_dentry)) { + error = PTR_ERR(new_dentry); + goto addrm_finish; + } + error = -EINVAL; - else if (!new_dentry->d_inode) { + if (old_dentry == new_dentry) + goto addrm_finish; + } + + /* rename kobject and sysfs_dirent */ + error = -ENOMEM; + new_name = dup_name = kstrdup(new_name, GFP_KERNEL); + if (!new_name) + goto addrm_finish; + error = kobject_set_name(kobj, "%s", new_name); - if (!error) { - struct sysfs_dirent *sd, *parent_sd; + if (error) + goto addrm_finish; - d_add(new_dentry, NULL); - d_move(kobj->dentry, new_dentry); + dup_name = sd->s_name; + sd->s_name = new_name; - sd = kobj->dentry->d_fsdata; - parent_sd = new_parent->d_fsdata; + /* move under the new parent */ + sysfs_unlink_sibling(sd); + sysfs_get(acxt.parent_sd); + sysfs_put(sd->s_parent); + sd->s_parent = acxt.parent_sd; + sysfs_link_sibling(sd); - list_del_init(&sd->s_sibling); - list_add(&sd->s_sibling, &parent_sd->s_children); - } - else - d_drop(new_dentry); - } else - error = -EEXIST; - dput(new_dentry); + if (new_dentry) { + d_add(new_dentry, NULL); + d_move(old_dentry, new_dentry); } - mutex_unlock(&new_parent->d_inode->i_mutex); - up_write(&sysfs_rename_sem); + error = 0; +addrm_finish: + sysfs_addrm_finish(&acxt); + kfree(dup_name); + dput(new_dentry); return error; } -int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) +int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) { - struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry; - struct sysfs_dirent *new_parent_sd, *sd; + struct sysfs_dirent *sd = kobj->sd; + struct sysfs_dirent *new_parent_sd; + struct dentry *old_parent, *new_parent = NULL; + struct dentry *old_dentry = NULL, *new_dentry = NULL; int error; - old_parent_dentry = kobj->parent ? - kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; - new_parent_dentry = new_parent ? - new_parent->dentry : sysfs_mount->mnt_sb->s_root; + BUG_ON(!sd->s_parent); + new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; + + /* get dentries */ + old_dentry = sysfs_get_dentry(sd); + if (IS_ERR(old_dentry)) { + error = PTR_ERR(old_dentry); + goto out_dput; + } + old_parent = sd->s_parent->s_dentry; + + new_parent = sysfs_get_dentry(new_parent_sd); + if (IS_ERR(new_parent)) { + error = PTR_ERR(new_parent); + goto out_dput; + } - if (old_parent_dentry->d_inode == new_parent_dentry->d_inode) - return 0; /* nothing to move */ + if (old_parent->d_inode == new_parent->d_inode) { + error = 0; + goto out_dput; /* nothing to move */ + } again: - mutex_lock(&old_parent_dentry->d_inode->i_mutex); - if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) { - mutex_unlock(&old_parent_dentry->d_inode->i_mutex); + mutex_lock(&old_parent->d_inode->i_mutex); + if (!mutex_trylock(&new_parent->d_inode->i_mutex)) { + mutex_unlock(&old_parent->d_inode->i_mutex); goto again; } - new_parent_sd = new_parent_dentry->d_fsdata; - sd = kobj->dentry->d_fsdata; - - new_dentry = lookup_one_len(kobj->name, new_parent_dentry, - strlen(kobj->name)); + new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name)); if (IS_ERR(new_dentry)) { error = PTR_ERR(new_dentry); - goto out; + goto out_unlock; } else error = 0; d_add(new_dentry, NULL); - d_move(kobj->dentry, new_dentry); + d_move(sd->s_dentry, new_dentry); dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ - list_del_init(&sd->s_sibling); - list_add(&sd->s_sibling, &new_parent_sd->s_children); + mutex_lock(&sysfs_mutex); -out: - mutex_unlock(&new_parent_dentry->d_inode->i_mutex); - mutex_unlock(&old_parent_dentry->d_inode->i_mutex); + sysfs_unlink_sibling(sd); + sysfs_get(new_parent_sd); + sysfs_put(sd->s_parent); + sd->s_parent = new_parent_sd; + sysfs_link_sibling(sd); + + mutex_unlock(&sysfs_mutex); + out_unlock: + mutex_unlock(&new_parent->d_inode->i_mutex); + mutex_unlock(&old_parent->d_inode->i_mutex); + out_dput: + dput(new_parent); + dput(old_dentry); + dput(new_dentry); return error; } @@ -496,23 +1219,27 @@ { struct dentry * dentry = file->f_path.dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; + struct sysfs_dirent * sd; - mutex_lock(&dentry->d_inode->i_mutex); - file->private_data = sysfs_new_dirent(parent_sd, NULL); - mutex_unlock(&dentry->d_inode->i_mutex); - - return file->private_data ? 0 : -ENOMEM; + sd = sysfs_new_dirent("_DIR_", 0, 0); + if (sd) { + mutex_lock(&sysfs_mutex); + sd->s_parent = sysfs_get(parent_sd); + sysfs_link_sibling(sd); + mutex_unlock(&sysfs_mutex); + } + file->private_data = sd; + return sd ? 0 : -ENOMEM; } static int sysfs_dir_close(struct inode *inode, struct file *file) { - struct dentry * dentry = file->f_path.dentry; struct sysfs_dirent * cursor = file->private_data; - mutex_lock(&dentry->d_inode->i_mutex); - list_del_init(&cursor->s_sibling); - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_lock(&sysfs_mutex); + sysfs_unlink_sibling(cursor); + mutex_unlock(&sysfs_mutex); release_sysfs_dirent(cursor); @@ -530,7 +1257,7 @@ struct dentry *dentry = filp->f_path.dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; struct sysfs_dirent *cursor = filp->private_data; - struct list_head *p, *q = &cursor->s_sibling; + struct sysfs_dirent **pos; ino_t ino; int i = filp->f_pos; @@ -543,38 +1270,55 @@ i++; /* fallthrough */ case 1: - ino = parent_ino(dentry); + if (parent_sd->s_parent) + ino = parent_sd->s_parent->s_ino; + else + ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) break; filp->f_pos++; i++; /* fallthrough */ default: + /* If I am the shadow master return nothing. */ + if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED) + return 0; + + mutex_lock(&sysfs_mutex); + pos = &parent_sd->s_children; + while (*pos != cursor) + pos = &(*pos)->s_sibling; + + /* unlink cursor */ + *pos = cursor->s_sibling; + if (filp->f_pos == 2) - list_move(q, &parent_sd->s_children); + pos = &parent_sd->s_children; - for (p=q->next; p!= &parent_sd->s_children; p=p->next) { - struct sysfs_dirent *next; + for ( ; *pos; pos = &(*pos)->s_sibling) { + struct sysfs_dirent *next = *pos; const char * name; int len; - next = list_entry(p, struct sysfs_dirent, - s_sibling); - if (!next->s_element) + if (!sysfs_type(next)) continue; - name = sysfs_get_name(next); + name = next->s_name; len = strlen(name); ino = next->s_ino; if (filldir(dirent, name, len, filp->f_pos, ino, dt_type(next)) < 0) - return 0; + break; - list_move(q, p); - p = q; filp->f_pos++; } + + /* put cursor back in */ + cursor->s_sibling = *pos; + *pos = cursor; + + mutex_unlock(&sysfs_mutex); } return 0; } @@ -583,7 +1327,6 @@ { struct dentry * dentry = file->f_path.dentry; - mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -591,127 +1334,224 @@ if (offset >= 0) break; default: - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { + mutex_lock(&sysfs_mutex); + file->f_pos = offset; if (file->f_pos >= 2) { struct sysfs_dirent *sd = dentry->d_fsdata; struct sysfs_dirent *cursor = file->private_data; - struct list_head *p; + struct sysfs_dirent **pos; loff_t n = file->f_pos - 2; - list_del(&cursor->s_sibling); - p = sd->s_children.next; - while (n && p != &sd->s_children) { - struct sysfs_dirent *next; - next = list_entry(p, struct sysfs_dirent, - s_sibling); - if (next->s_element) + sysfs_unlink_sibling(cursor); + + pos = &sd->s_children; + while (n && *pos) { + struct sysfs_dirent *next = *pos; + if (sysfs_type(next)) n--; - p = p->next; + pos = &(*pos)->s_sibling; } - list_add_tail(&cursor->s_sibling, p); + + cursor->s_sibling = *pos; + *pos = cursor; } + + mutex_unlock(&sysfs_mutex); } - mutex_unlock(&dentry->d_inode->i_mutex); + return offset; } +const struct file_operations sysfs_dir_operations = { + .open = sysfs_dir_open, + .release = sysfs_dir_close, + .llseek = sysfs_dir_lseek, + .read = generic_read_dir, + .readdir = sysfs_readdir, +}; -/** - * sysfs_make_shadowed_dir - Setup so a directory can be shadowed - * @kobj: object we're creating shadow of. - */ -int sysfs_make_shadowed_dir(struct kobject *kobj, - void * (*follow_link)(struct dentry *, struct nameidata *)) +static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd) { - struct inode *inode; - struct inode_operations *i_op; + struct sysfs_addrm_cxt acxt; - inode = kobj->dentry->d_inode; - if (inode->i_op != &sysfs_dir_inode_operations) - return -EINVAL; + /* If a shadow directory goes empty remove it. */ + if (sysfs_type(sd) != SYSFS_SHADOW_DIR) + return; - i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); - if (!i_op) - return -ENOMEM; + if (sd->s_children) + return; - memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op)); - i_op->follow_link = follow_link; + sysfs_addrm_start(&acxt, sd->s_parent); - /* Locking of inode->i_op? - * Since setting i_op is a single word write and they - * are atomic we should be ok here. - */ - inode->i_op = i_op; - return 0; -} + if (sd->s_flags & SYSFS_FLAG_REMOVED) + goto addrm_finish; -/** - * sysfs_create_shadow_dir - create a shadow directory for an object. - * @kobj: object we're creating directory for. - * - * sysfs_make_shadowed_dir must already have been called on this - * directory. - */ + if (sd->s_children) + goto addrm_finish; -struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) + __remove_dir(&acxt, sd); +addrm_finish: + sysfs_addrm_finish(&acxt); +} + +static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag) { - struct sysfs_dirent *sd; - struct dentry *parent, *dir, *shadow; + struct sysfs_dirent *sd = NULL; + struct dentry *dir, *shadow; struct inode *inode; - dir = kobj->dentry; + dir = parent_sd->s_dentry; inode = dir->d_inode; - parent = dir->d_parent; - shadow = ERR_PTR(-EINVAL); - if (!sysfs_is_shadowed_inode(inode)) - goto out; - shadow = d_alloc(parent, &dir->d_name); + shadow = d_alloc(dir->d_parent, &dir->d_name); if (!shadow) - goto nomem; + goto out; + + /* Since the shadow directory is reachable make it look + * like it is actually hashed. + */ + shadow->d_hash.pprev = &shadow->d_hash.next; + shadow->d_hash.next = NULL; + shadow->d_flags &= ~DCACHE_UNHASHED; - sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); + sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR); if (!sd) - goto nomem; + goto error; - d_instantiate(shadow, igrab(inode)); - inc_nlink(inode); - inc_nlink(parent->d_inode); - shadow->d_op = &sysfs_dentry_ops; + sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj; + sd->s_parent = sysfs_get(parent_sd); - dget(shadow); /* Extra count - pin the dentry in core */ + /* Use the inode number of the parent we are shadowing */ + sysfs_free_ino(sd->s_ino); + sd->s_ino = parent_sd->s_ino; + inc_nlink(inode); + inc_nlink(dir->d_parent->d_inode); + + sysfs_link_sibling(sd); + __iget(inode); + sysfs_instantiate(shadow, inode); + sysfs_attach_dentry(sd, shadow); out: - return shadow; -nomem: + return sd; +error: dput(shadow); - shadow = ERR_PTR(-ENOMEM); goto out; } +int sysfs_resolve_for_create(struct kobject *kobj, + struct sysfs_dirent **parent_sd) +{ + const struct shadow_dir_operations *shadow_ops; + struct sysfs_dirent *sd, *shadow_sd; + + sd = *parent_sd; + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + sd = sd->s_parent; + + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const void *tag; + + shadow_ops = sd->s_dentry->d_inode->i_private; + tag = shadow_ops->kobject_tag(kobj); + + shadow_sd = find_shadow_sd(sd, tag); + if (!shadow_sd) + shadow_sd = add_shadow_sd(sd, tag); + sd = shadow_sd; + } + if (sd) { + *parent_sd = sd; + return 1; + } + return 0; +} + +int sysfs_resolve_for_remove(struct kobject *kobj, + struct sysfs_dirent **parent_sd) +{ + struct sysfs_dirent *sd; + /* If dentry is a shadow directory find the shadow that is + * stored under the same tag as kobj. This allows removal + * of dirents to function properly even if the value of + * kobject_tag() has changed since we initially created + * the dirents assoctated with kobj. + */ + + sd = *parent_sd; + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + sd = sd->s_parent; + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const void *tag; + + tag = find_shadow_tag(kobj); + sd = find_shadow_sd(sd, tag); + } + if (sd) { + *parent_sd = sd; + return 1; + } + return 0; +} + /** - * sysfs_remove_shadow_dir - remove an object's directory. - * @shadow: dentry of shadow directory + * sysfs_enable_shadowing - Automatically create shadows of a directory + * @kobj: object to automatically shadow * - * The only thing special about this is that we remove any files in - * the directory before we remove the directory, and we've inlined - * what used to be sysfs_rmdir() below, instead of calling separately. + * Once shadowing has been enabled on a directory the contents + * of the directory become dependent upon context. + * + * shadow_ops->current_tag() returns the context for the current + * process. + * + * shadow_ops->kobject_tag() returns the context that a given kobj + * resides in. + * + * Using those methods the sysfs code on shadowed directories + * carefully stores the files so that when we lookup files + * we get the proper answer for our context. + * + * If the context of a kobject is changed it is expected that + * the kobject will be renamed so the appopriate sysfs data structures + * can be updated. */ - -void sysfs_remove_shadow_dir(struct dentry *shadow) +int sysfs_enable_shadowing(struct kobject *kobj, + const struct shadow_dir_operations *shadow_ops) { - __sysfs_remove_dir(shadow); + struct sysfs_dirent *sd; + struct dentry *dentry; + int err; + + /* Find the dentry for the shadowed directory and + * increase it's count. + */ + err = -ENOENT; + sd = kobj->sd; + dentry = sysfs_get_dentry(sd); + if (!dentry) + goto out; + + mutex_lock(&sysfs_mutex); + err = -EINVAL; + /* We can only enable shadowing on empty directories + * where shadowing is not already enabled. + */ + if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) && + !(sd->s_flags & SYSFS_FLAG_REMOVED) && + !(sd->s_flags & SYSFS_FLAG_SHADOWED)) { + sd->s_flags |= SYSFS_FLAG_SHADOWED; + dentry->d_inode->i_private = (void *)shadow_ops; + err = 0; + } + mutex_unlock(&sysfs_mutex); +out: + if (err) + dput(dentry); + return err; } -const struct file_operations sysfs_dir_operations = { - .open = sysfs_dir_open, - .release = sysfs_dir_close, - .llseek = sysfs_dir_lseek, - .read = generic_read_dir, - .readdir = sysfs_readdir, -}; diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c.orig linux-2.6.22-591/fs/sysfs/dir.c.orig --- linux-2.6.22-570/fs/sysfs/dir.c.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/sysfs/dir.c.orig 2007-12-22 20:43:14.000000000 -0500 @@ -0,0 +1,1558 @@ +/* + * dir.c - Operations for sysfs directories. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysfs.h" + +static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd); + +DEFINE_MUTEX(sysfs_mutex); +spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; + +static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_IDA(sysfs_ino_ida); + +static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target) +{ + /* Find the shadow directory for the specified tag */ + struct sysfs_dirent *sd; + + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { + if (sd->s_name != target) + continue; + break; + } + return sd; +} + +static const void *find_shadow_tag(struct kobject *kobj) +{ + /* Find the tag the current kobj is cached with */ + return kobj->sd->s_parent->s_name; +} + +/** + * sysfs_link_sibling - link sysfs_dirent into sibling list + * @sd: sysfs_dirent of interest + * + * Link @sd into its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ + +/** + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * @sd: sysfs_dirent of interest + * + * Unlink @sd from its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sysfs_mutex) + */ + + +/** + * sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sd: sysfs_dirent of interest + * + * Get dentry for @sd. Dentry is looked up if currently not + * present. This function climbs sysfs_dirent tree till it + * reaches a sysfs_dirent with valid dentry attached and descends + * down from there looking up dentry for each step. + * + * LOCKING: + * Kernel thread context (may sleep) + * + * RETURNS: + * Pointer to found dentry on success, ERR_PTR() value on error. + */ +struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *cur; + struct dentry *parent_dentry, *dentry; + int i, depth; + + /* Find the first parent which has valid s_dentry and get the + * dentry. + */ + mutex_lock(&sysfs_mutex); + restart0: + spin_lock(&sysfs_assoc_lock); + restart1: + spin_lock(&dcache_lock); + + dentry = NULL; + depth = 0; + cur = sd; + while (!cur->s_dentry || !cur->s_dentry->d_inode) { + if (cur->s_flags & SYSFS_FLAG_REMOVED) { + dentry = ERR_PTR(-ENOENT); + depth = 0; + break; + } + cur = cur->s_parent; + depth++; + } + if (!IS_ERR(dentry)) + dentry = dget_locked(cur->s_dentry); + + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_assoc_lock); + + /* from the found dentry, look up depth times */ + while (depth--) { + /* find and get depth'th ancestor */ + for (cur = sd, i = 0; cur && i < depth; i++) + cur = cur->s_parent; + + /* This can happen if tree structure was modified due + * to move/rename. Restart. + */ + if (i != depth) { + dput(dentry); + goto restart0; + } + + sysfs_get(cur); + + mutex_unlock(&sysfs_mutex); + + /* look it up */ + parent_dentry = dentry; + dentry = lookup_one_len_kern(cur->s_name, parent_dentry, + strlen(cur->s_name)); + dput(parent_dentry); + + if (IS_ERR(dentry)) { + sysfs_put(cur); + return dentry; + } + + mutex_lock(&sysfs_mutex); + spin_lock(&sysfs_assoc_lock); + + /* This, again, can happen if tree structure has + * changed and we looked up the wrong thing. Restart. + */ + if (cur->s_dentry != dentry) { + dput(dentry); + sysfs_put(cur); + goto restart1; + } + + spin_unlock(&sysfs_assoc_lock); + + sysfs_put(cur); + } + + mutex_unlock(&sysfs_mutex); + return dentry; +} + +/** + * sysfs_link_sibling - link sysfs_dirent into sibling list + * @sd: sysfs_dirent of interest + * + * Link @sd into its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) + */ +void sysfs_link_sibling(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *parent_sd = sd->s_parent; + + BUG_ON(sd->s_sibling); + sd->s_sibling = parent_sd->s_children; + parent_sd->s_children = sd; +} + +/** + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * @sd: sysfs_dirent of interest + * + * Unlink @sd from its sibling list which starts from + * sd->s_parent->s_children. + * + * Locking: + * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) + */ +void sysfs_unlink_sibling(struct sysfs_dirent *sd) +{ + struct sysfs_dirent **pos; + + for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) { + if (*pos == sd) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + break; + } + } +} + +/** + * sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sd: sysfs_dirent of interest + * + * Get dentry for @sd. Dentry is looked up if currently not + * present. This function climbs sysfs_dirent tree till it + * reaches a sysfs_dirent with valid dentry attached and descends + * down from there looking up dentry for each step. + * + * LOCKING: + * Kernel thread context (may sleep) + * + * RETURNS: + * Pointer to found dentry on success, ERR_PTR() value on error. + */ + +/** + * sysfs_get_active - get an active reference to sysfs_dirent + * @sd: sysfs_dirent to get an active reference to + * + * Get an active reference of @sd. This function is noop if @sd + * is NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +/** + * sysfs_put_active - put an active reference to sysfs_dirent + * @sd: sysfs_dirent to put an active reference to + * + * Put an active reference to @sd. This function is noop if @sd + * is NULL. + */ +void sysfs_put_active(struct sysfs_dirent *sd) +{ + struct completion *cmpl; + int v; + + if (unlikely(!sd)) + return; + + v = atomic_dec_return(&sd->s_active); + if (likely(v != SD_DEACTIVATED_BIAS)) + return; + + /* atomic_dec_return() is a mb(), we'll always see the updated + * sd->s_sibling. + */ + cmpl = (void *)sd->s_sibling; + complete(cmpl); +} + +/** + * sysfs_get_active_two - get active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Get active reference to @sd and its parent. Parent's active + * reference is grabbed first. This function is noop if @sd is + * NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd) +{ + if (sd) { + if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent))) + return NULL; + if (unlikely(!sysfs_get_active(sd))) { + sysfs_put_active(sd->s_parent); + return NULL; + } + } + return sd; +} + +/** + * sysfs_put_active_two - put active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Put active references to @sd and its parent. This function is + * noop if @sd is NULL. + */ +void sysfs_put_active_two(struct sysfs_dirent *sd) +{ + if (sd) { + sysfs_put_active(sd); + sysfs_put_active(sd->s_parent); + } +} + +/** + * sysfs_deactivate - deactivate sysfs_dirent + * @sd: sysfs_dirent to deactivate + * + * Deny new active references and drain existing ones. + */ +static void sysfs_deactivate(struct sysfs_dirent *sd) +{ + DECLARE_COMPLETION_ONSTACK(wait); + int v; + + BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); + sd->s_sibling = (void *)&wait; + + /* atomic_add_return() is a mb(), put_active() will always see + * the updated sd->s_sibling. + */ + v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); + + if (v != SD_DEACTIVATED_BIAS) + wait_for_completion(&wait); + + sd->s_sibling = NULL; +} + +/** + * sysfs_get_active - get an active reference to sysfs_dirent + * @sd: sysfs_dirent to get an active reference to + * + * Get an active reference of @sd. This function is noop if @sd + * is NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ +struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) +{ + if (unlikely(!sd)) + return NULL; + + while (1) { + int v, t; + + v = atomic_read(&sd->s_active); + if (unlikely(v < 0)) + return NULL; + + t = atomic_cmpxchg(&sd->s_active, v, v + 1); + if (likely(t == v)) + return sd; + if (t < 0) + return NULL; + + cpu_relax(); + } +} +/** + * sysfs_put_active - put an active reference to sysfs_dirent + * @sd: sysfs_dirent to put an active reference to + * + * Put an active reference to @sd. This function is noop if @sd + * is NULL. + */ + +/** + * sysfs_get_active_two - get active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Get active reference to @sd and its parent. Parent's active + * reference is grabbed first. This function is noop if @sd is + * NULL. + * + * RETURNS: + * Pointer to @sd on success, NULL on failure. + */ + +/** + * sysfs_put_active_two - put active references to sysfs_dirent and parent + * @sd: sysfs_dirent of interest + * + * Put active references to @sd and its parent. This function is + * noop if @sd is NULL. + */ + +/** + * sysfs_deactivate - deactivate sysfs_dirent + * @sd: sysfs_dirent to deactivate + * + * Deny new active references and drain existing ones. s_active + * will be unlocked when the sysfs_dirent is released. + */ + +static int sysfs_alloc_ino(ino_t *pino) +{ + int ino, rc; + + retry: + spin_lock(&sysfs_ino_lock); + rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); + spin_unlock(&sysfs_ino_lock); + + if (rc == -EAGAIN) { + if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) + goto retry; + rc = -ENOMEM; + } + + *pino = ino; + return rc; +} + +static void sysfs_free_ino(ino_t ino) +{ + spin_lock(&sysfs_ino_lock); + ida_remove(&sysfs_ino_ida, ino); + spin_unlock(&sysfs_ino_lock); +} + +void release_sysfs_dirent(struct sysfs_dirent * sd) +{ + struct sysfs_dirent *parent_sd; + + repeat: + /* Moving/renaming is always done while holding reference. + * sd->s_parent won't change beneath us. + */ + parent_sd = sd->s_parent; + + if (sysfs_type(sd) == SYSFS_KOBJ_LINK) + sysfs_put(sd->s_elem.symlink.target_sd); + if (sysfs_type(sd) & SYSFS_COPY_NAME) + kfree(sd->s_name); + kfree(sd->s_iattr); + if (sysfs_type(sd) != SYSFS_SHADOW_DIR) + sysfs_free_ino(sd->s_ino); + kmem_cache_free(sysfs_dir_cachep, sd); + + sd = parent_sd; + if (sd && atomic_dec_and_test(&sd->s_count)) + goto repeat; +} + +static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) +{ + struct sysfs_dirent * sd = dentry->d_fsdata; + + if (sd) { + /* sd->s_dentry is protected with sysfs_assoc_lock. + * This allows sysfs_drop_dentry() to dereference it. + */ + spin_lock(&sysfs_assoc_lock); + + /* The dentry might have been deleted or another + * lookup could have happened updating sd->s_dentry to + * point the new dentry. Ignore if it isn't pointing + * to this dentry. + */ + if (sd->s_dentry == dentry) + sd->s_dentry = NULL; + spin_unlock(&sysfs_assoc_lock); + sysfs_put(sd); + } + iput(inode); +} + +static struct dentry_operations sysfs_dentry_ops = { + .d_iput = sysfs_d_iput, +}; + +struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) +{ + char *dup_name = NULL; + struct sysfs_dirent *sd = NULL; + + if (type & SYSFS_COPY_NAME) { + name = dup_name = kstrdup(name, GFP_KERNEL); + if (!name) + goto err_out; + } + + sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); + if (!sd) + goto err_out; + + if (sysfs_alloc_ino(&sd->s_ino)) + goto err_out; + + atomic_set(&sd->s_count, 1); + atomic_set(&sd->s_active, 0); + atomic_set(&sd->s_event, 1); + + sd->s_name = name; + sd->s_mode = mode; + sd->s_flags = type; + + return sd; + + err_out: + kfree(dup_name); + kmem_cache_free(sysfs_dir_cachep, sd); + return NULL; +} + +/** + * sysfs_attach_dentry - associate sysfs_dirent with dentry + * @sd: target sysfs_dirent + * @dentry: dentry to associate + * + * Associate @sd with @dentry. This is protected by + * sysfs_assoc_lock to avoid race with sysfs_d_iput(). + * + * LOCKING: + * mutex_lock(sysfs_mutex) + */ +static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry) +{ + dentry->d_op = &sysfs_dentry_ops; + dentry->d_fsdata = sysfs_get(sd); + + /* protect sd->s_dentry against sysfs_d_iput */ + spin_lock(&sysfs_assoc_lock); + sd->s_dentry = dentry; + spin_unlock(&sysfs_assoc_lock); + + if (dentry->d_flags & DCACHE_UNHASHED) + d_rehash(dentry); +} + +static int sysfs_ilookup_test(struct inode *inode, void *arg) +{ + struct sysfs_dirent *sd = arg; + return inode->i_ino == sd->s_ino; +} + +/** + * sysfs_addrm_start - prepare for sysfs_dirent add/remove + * @acxt: pointer to sysfs_addrm_cxt to be used + * @parent_sd: parent sysfs_dirent + * + * This function is called when the caller is about to add or + * remove sysfs_dirent under @parent_sd. This function acquires + * sysfs_mutex, grabs inode for @parent_sd if available and lock + * i_mutex of it. @acxt is used to keep and pass context to + * other addrm functions. + * + * LOCKING: + * Kernel thread context (may sleep). sysfs_mutex is locked on + * return. i_mutex of parent inode is locked on return if + * available. + */ +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd) +{ + struct inode *inode; + + memset(acxt, 0, sizeof(*acxt)); + acxt->parent_sd = parent_sd; + + /* Lookup parent inode. inode initialization and I_NEW + * clearing are protected by sysfs_mutex. By grabbing it and + * looking up with _nowait variant, inode state can be + * determined reliably. + */ + mutex_lock(&sysfs_mutex); + + inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, + parent_sd); + + if (inode && !(inode->i_state & I_NEW)) { + /* parent inode available */ + acxt->parent_inode = inode; + + /* sysfs_mutex is below i_mutex in lock hierarchy. + * First, trylock i_mutex. If fails, unlock + * sysfs_mutex and lock them in order. + */ + if (!mutex_trylock(&inode->i_mutex)) { + mutex_unlock(&sysfs_mutex); + mutex_lock(&inode->i_mutex); + mutex_lock(&sysfs_mutex); + } + } else + iput(inode); +} + +/** + * sysfs_add_one - add sysfs_dirent to parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Get @acxt->parent_sd and set sd->s_parent to it and increment + * nlink of parent inode if @sd is a directory. @sd is NOT + * linked into the children list of the parent. The caller + * should invoke sysfs_link_sibling() after this function + * completes if @sd needs to be on the children list. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + sd->s_parent = sysfs_get(acxt->parent_sd); + + if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) + inc_nlink(acxt->parent_inode); + + acxt->cnt++; +} + +/** + * sysfs_remove_one - remove sysfs_dirent from parent + * @acxt: addrm context to use + * @sd: sysfs_dirent to be added + * + * Mark @sd removed and drop nlink of parent inode if @sd is a + * directory. @sd is NOT unlinked from the children list of the + * parent. The caller is repsonsible for removing @sd from the + * children list before calling this function. + * + * This function should be called between calls to + * sysfs_addrm_start() and sysfs_addrm_finish() and should be + * passed the same @acxt as passed to sysfs_addrm_start(). + * + * LOCKING: + * Determined by sysfs_addrm_start(). + */ +void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED)); + + sd->s_flags |= SYSFS_FLAG_REMOVED; + sd->s_sibling = acxt->removed; + acxt->removed = sd; + + if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) + drop_nlink(acxt->parent_inode); + + acxt->cnt++; +} + +/** + * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent + * @sd: target sysfs_dirent + * + * Drop dentry for @sd. @sd must have been unlinked from its + * parent on entry to this function such that it can't be looked + * up anymore. + * + * @sd->s_dentry which is protected with sysfs_assoc_lock points + * to the currently associated dentry but we're not holding a + * reference to it and racing with dput(). Grab dcache_lock and + * verify dentry before dropping it. If @sd->s_dentry is NULL or + * dput() beats us, no need to bother. + */ +static void sysfs_drop_dentry(struct sysfs_dirent *sd) +{ + struct dentry *dentry = NULL; + struct inode *inode; + + /* We're not holding a reference to ->s_dentry dentry but the + * field will stay valid as long as sysfs_assoc_lock is held. + */ + spin_lock(&sysfs_assoc_lock); + spin_lock(&dcache_lock); + + /* drop dentry if it's there and dput() didn't kill it yet */ + if (sd->s_dentry && sd->s_dentry->d_inode) { + dentry = dget_locked(sd->s_dentry); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + } + + spin_unlock(&dcache_lock); + spin_unlock(&sysfs_assoc_lock); + + /* dentries for shadowed directories are pinned, unpin */ + if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) || + (sd->s_flags & SYSFS_FLAG_SHADOWED)) + dput(dentry); + dput(dentry); + + /* adjust nlink and update timestamp */ + inode = ilookup(sysfs_sb, sd->s_ino); + if (inode) { + mutex_lock(&inode->i_mutex); + + inode->i_ctime = CURRENT_TIME; + drop_nlink(inode); + if (sysfs_type(sd) == SYSFS_DIR) + drop_nlink(inode); + + mutex_unlock(&inode->i_mutex); + iput(inode); + } +} + +/** + * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent + * @sd: target sysfs_dirent + * + * Drop dentry for @sd. @sd must have been unlinked from its + * parent on entry to this function such that it can't be looked + * up anymore. + * + * @sd->s_dentry which is protected with sysfs_assoc_lock points + * to the currently associated dentry but we're not holding a + * reference to it and racing with dput(). Grab dcache_lock and + * verify dentry before dropping it. If @sd->s_dentry is NULL or + * dput() beats us, no need to bother. + */ + + +/** + * sysfs_addrm_finish - finish up sysfs_dirent add/remove + * @acxt: addrm context to finish up + * + * Finish up sysfs_dirent add/remove. Resources acquired by + * sysfs_addrm_start() are released and removed sysfs_dirents are + * cleaned up. Timestamps on the parent inode are updated. + * + * LOCKING: + * All mutexes acquired by sysfs_addrm_start() are released. + * + * RETURNS: + * Number of added/removed sysfs_dirents since sysfs_addrm_start(). + */ +int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) +{ + /* release resources acquired by sysfs_addrm_start() */ + mutex_unlock(&sysfs_mutex); + if (acxt->parent_inode) { + struct inode *inode = acxt->parent_inode; + + /* if added/removed, update timestamps on the parent */ + if (acxt->cnt) + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + + mutex_unlock(&inode->i_mutex); + iput(inode); + } + + /* kill removed sysfs_dirents */ + while (acxt->removed) { + struct sysfs_dirent *sd = acxt->removed; + + acxt->removed = sd->s_sibling; + sd->s_sibling = NULL; + + sysfs_prune_shadow_sd(sd->s_parent); + sysfs_drop_dentry(sd); + sysfs_deactivate(sd); + sysfs_put(sd); + } + + return acxt->cnt; +} + +/** + * sysfs_find_dirent - find sysfs_dirent with the given name + * @parent_sd: sysfs_dirent to search under + * @name: name to look for + * + * Look for sysfs_dirent with name @name under @parent_sd. + * + * LOCKING: + * mutex_lock(sysfs_mutex) + * + * RETURNS: + * Pointer to sysfs_dirent if found, NULL if not. + */ +struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) +{ + struct sysfs_dirent *sd; + + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) + if (sysfs_type(sd) && !strcmp(sd->s_name, name)) + return sd; + return NULL; +} + +/** + * sysfs_get_dirent - find and get sysfs_dirent with the given name + * @parent_sd: sysfs_dirent to search under + * @name: name to look for + * + * Look for sysfs_dirent with name @name under @parent_sd and get + * it if found. + * + * LOCKING: + * Kernel thread context (may sleep). Grabs sysfs_mutex. + * + * RETURNS: + * Pointer to sysfs_dirent if found, NULL if not. + */ +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name) +{ + struct sysfs_dirent *sd; + + mutex_lock(&sysfs_mutex); + sd = sysfs_find_dirent(parent_sd, name); + sysfs_get(sd); + mutex_unlock(&sysfs_mutex); + + return sd; +} + +static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, + const char *name, struct sysfs_dirent **p_sd) +{ + umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + int err; + + /* allocate */ + sd = sysfs_new_dirent(name, mode, SYSFS_DIR); + if (!sd) + return -ENOMEM; + sd->s_elem.dir.kobj = kobj; + + /* link in */ + sysfs_addrm_start(&acxt, parent_sd); + err = -ENOENT; + if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) + goto addrm_finish; + + err = -EEXIST; + if (!sysfs_find_dirent(acxt.parent_sd, name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + err = 0; + } +addrm_finish: + if (sysfs_addrm_finish(&acxt)) { + *p_sd = sd; + return 0; + } + + sysfs_put(sd); + return err; +} + +int sysfs_create_subdir(struct kobject *kobj, const char *name, + struct sysfs_dirent **p_sd) +{ + return create_dir(kobj, kobj->sd, name, p_sd); +} + +/** + * sysfs_create_dir - create a directory for an object. + * @kobj: object we're creating directory for. + */ +int sysfs_create_dir(struct kobject * kobj) +{ + struct sysfs_dirent *parent_sd, *sd; + int error = 0; + + BUG_ON(!kobj); + + if (kobj->parent) + parent_sd = kobj->parent->sd; + else if (sysfs_mount && sysfs_mount->mnt_sb) + parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; + else + return -EFAULT; + + error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); + if (!error) + kobj->sd = sd; + return error; +} + +static int sysfs_count_nlink(struct sysfs_dirent *sd) +{ + struct sysfs_dirent *child; + int nr = 0; + + for (child = sd->s_children; child; child = child->s_sibling) + if (sysfs_type(child) == SYSFS_DIR) + nr++; + return nr + 2; +} + +static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd; + struct bin_attribute *bin_attr; + struct inode *inode; + int found = 0; + + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { + if (sysfs_type(sd) && + !strcmp(sd->s_name, dentry->d_name.name)) { + found = 1; + break; + } + } + + /* no such entry */ + if (!found) + return NULL; + + /* attach dentry and inode */ + inode = sysfs_get_inode(sd); + if (!inode) + return ERR_PTR(-ENOMEM); + + mutex_lock(&sysfs_mutex); + + if (inode->i_state & I_NEW) { + /* initialize inode according to type */ + switch (sysfs_type(sd)) { + case SYSFS_DIR: + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; + inode->i_nlink = sysfs_count_nlink(sd); + break; + case SYSFS_KOBJ_ATTR: + inode->i_size = PAGE_SIZE; + inode->i_fop = &sysfs_file_operations; + break; + case SYSFS_KOBJ_BIN_ATTR: + bin_attr = sd->s_elem.bin_attr.bin_attr; + inode->i_size = bin_attr->size; + inode->i_fop = &bin_fops; + break; + case SYSFS_KOBJ_LINK: + inode->i_op = &sysfs_symlink_inode_operations; + break; + default: + BUG(); + } + } + + sysfs_instantiate(dentry, inode); + sysfs_attach_dentry(sd, dentry); + + mutex_unlock(&sysfs_mutex); + + return NULL; +} + +static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct sysfs_dirent *sd; + struct dentry *dest; + + sd = dentry->d_fsdata; + dest = NULL; + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const struct shadow_dir_operations *shadow_ops; + const void *tag; + + mutex_lock(&sysfs_mutex); + + shadow_ops = dentry->d_inode->i_private; + tag = shadow_ops->current_tag(); + + sd = find_shadow_sd(sd, tag); + if (sd) + dest = sd->s_dentry; + dget(dest); + + mutex_unlock(&sysfs_mutex); + } + if (!dest) + dest = dget(dentry); + dput(nd->dentry); + nd->dentry = dest; + + return NULL; +} + + +const struct inode_operations sysfs_dir_inode_operations = { + .lookup = sysfs_lookup, + .setattr = sysfs_setattr, + .follow_link = sysfs_shadow_follow_link, +}; + +static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + sysfs_unlink_sibling(sd); + sysfs_remove_one(acxt, sd); +} + +static void remove_dir(struct sysfs_dirent *sd) +{ + struct sysfs_addrm_cxt acxt; + + sysfs_addrm_start(&acxt, sd->s_parent); + __remove_dir(&acxt, sd); + sysfs_addrm_finish(&acxt); +} + +void sysfs_remove_subdir(struct sysfs_dirent *sd) +{ + remove_dir(sd); +} + +static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *dir_sd) +{ + struct sysfs_dirent **pos; + + pos = &dir_sd->s_children; + while (*pos) { + struct sysfs_dirent *sd = *pos; + + if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + sysfs_remove_one(acxt, sd); + } else + pos = &(*pos)->s_sibling; + } +} + +static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt, + struct sysfs_dirent *dir_sd) +{ + struct sysfs_dirent **pos; + + pos = &dir_sd->s_children; + while (*pos) { + struct sysfs_dirent *sd = *pos; + + sysfs_empty_dir(acxt, sd); + __remove_dir(acxt, sd); + } +} + +static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) +{ + struct sysfs_addrm_cxt acxt; + + if (!dir_sd) + return; + + pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); + sysfs_addrm_start(&acxt, dir_sd); + if (sysfs_type(dir_sd) == SYSFS_DIR) + sysfs_empty_dir(&acxt, dir_sd); + else + sysfs_remove_shadows(&acxt, dir_sd); + sysfs_addrm_finish(&acxt); + + remove_dir(dir_sd); +} + +/** + * sysfs_remove_dir - remove an object's directory. + * @kobj: object. + * + * The only thing special about this is that we remove any files in + * the directory before we remove the directory, and we've inlined + * what used to be sysfs_rmdir() below, instead of calling separately. + */ + +void sysfs_remove_dir(struct kobject * kobj) +{ + struct sysfs_dirent *sd = kobj->sd; + + spin_lock(&sysfs_assoc_lock); + kobj->sd = NULL; + spin_unlock(&sysfs_assoc_lock); + + __sysfs_remove_dir(sd); +} + +int sysfs_rename_dir(struct kobject * kobj, const char *new_name) +{ + struct dentry *old_dentry, *new_dentry, *parent; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + const char *dup_name; + int error; + + dup_name = NULL; + new_dentry = NULL; + + sd = kobj->sd; + sysfs_addrm_start(&acxt, sd->s_parent); + error = -ENOENT; + if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) + goto addrm_finish; + + error = -EEXIST; + if (sysfs_find_dirent(acxt.parent_sd, new_name)) + goto addrm_finish; + + error = -EINVAL; + if ((sd->s_parent == acxt.parent_sd) && + (strcmp(new_name, sd->s_name) == 0)) + goto addrm_finish; + + old_dentry = sd->s_dentry; + parent = acxt.parent_sd->s_dentry; + if (old_dentry) { + old_dentry = sd->s_dentry; + parent = acxt.parent_sd->s_dentry; + new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); + if (IS_ERR(new_dentry)) { + error = PTR_ERR(new_dentry); + goto addrm_finish; + } + + error = -EINVAL; + if (old_dentry == new_dentry) + goto addrm_finish; + } + + /* rename kobject and sysfs_dirent */ + error = -ENOMEM; + new_name = dup_name = kstrdup(new_name, GFP_KERNEL); + if (!new_name) + goto addrm_finish; + + error = kobject_set_name(kobj, "%s", new_name); + if (error) + goto addrm_finish; + + dup_name = sd->s_name; + sd->s_name = new_name; + + /* move under the new parent */ + sysfs_unlink_sibling(sd); + sysfs_get(acxt.parent_sd); + sysfs_put(sd->s_parent); + sd->s_parent = acxt.parent_sd; + sysfs_link_sibling(sd); + + if (new_dentry) { + d_add(new_dentry, NULL); + d_move(old_dentry, new_dentry); + } + error = 0; +addrm_finish: + sysfs_addrm_finish(&acxt); + + kfree(dup_name); + dput(new_dentry); + return error; +} + +int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) +{ + struct sysfs_dirent *sd = kobj->sd; + struct sysfs_dirent *new_parent_sd; + struct dentry *old_parent, *new_parent = NULL; + struct dentry *old_dentry = NULL, *new_dentry = NULL; + int error; + + BUG_ON(!sd->s_parent); + new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; + + /* get dentries */ + old_dentry = sysfs_get_dentry(sd); + if (IS_ERR(old_dentry)) { + error = PTR_ERR(old_dentry); + goto out_dput; + } + old_parent = sd->s_parent->s_dentry; + + new_parent = sysfs_get_dentry(new_parent_sd); + if (IS_ERR(new_parent)) { + error = PTR_ERR(new_parent); + goto out_dput; + } + + if (old_parent->d_inode == new_parent->d_inode) { + error = 0; + goto out_dput; /* nothing to move */ + } +again: + mutex_lock(&old_parent->d_inode->i_mutex); + if (!mutex_trylock(&new_parent->d_inode->i_mutex)) { + mutex_unlock(&old_parent->d_inode->i_mutex); + goto again; + } + + new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name)); + if (IS_ERR(new_dentry)) { + error = PTR_ERR(new_dentry); + goto out_unlock; + } else + error = 0; + d_add(new_dentry, NULL); + d_move(sd->s_dentry, new_dentry); + dput(new_dentry); + + /* Remove from old parent's list and insert into new parent's list. */ + mutex_lock(&sysfs_mutex); + + sysfs_unlink_sibling(sd); + sysfs_get(new_parent_sd); + sysfs_put(sd->s_parent); + sd->s_parent = new_parent_sd; + sysfs_link_sibling(sd); + + mutex_unlock(&sysfs_mutex); + + out_unlock: + mutex_unlock(&new_parent->d_inode->i_mutex); + mutex_unlock(&old_parent->d_inode->i_mutex); + out_dput: + dput(new_parent); + dput(old_dentry); + dput(new_dentry); + return error; +} + +static int sysfs_dir_open(struct inode *inode, struct file *file) +{ + struct dentry * dentry = file->f_path.dentry; + struct sysfs_dirent * parent_sd = dentry->d_fsdata; + struct sysfs_dirent * sd; + + sd = sysfs_new_dirent("_DIR_", 0, 0); + if (sd) { + mutex_lock(&sysfs_mutex); + sd->s_parent = sysfs_get(parent_sd); + sysfs_link_sibling(sd); + mutex_unlock(&sysfs_mutex); + } + + file->private_data = sd; + return sd ? 0 : -ENOMEM; +} + +static int sysfs_dir_close(struct inode *inode, struct file *file) +{ + struct sysfs_dirent * cursor = file->private_data; + + mutex_lock(&sysfs_mutex); + sysfs_unlink_sibling(cursor); + mutex_unlock(&sysfs_mutex); + + release_sysfs_dirent(cursor); + + return 0; +} + +/* Relationship between s_mode and the DT_xxx types */ +static inline unsigned char dt_type(struct sysfs_dirent *sd) +{ + return (sd->s_mode >> 12) & 15; +} + +static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + struct sysfs_dirent * parent_sd = dentry->d_fsdata; + struct sysfs_dirent *cursor = filp->private_data; + struct sysfs_dirent **pos; + ino_t ino; + int i = filp->f_pos; + + switch (i) { + case 0: + ino = parent_sd->s_ino; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + case 1: + if (parent_sd->s_parent) + ino = parent_sd->s_parent->s_ino; + else + ino = parent_sd->s_ino; + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + default: + /* If I am the shadow master return nothing. */ + if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED) + return 0; + + mutex_lock(&sysfs_mutex); + pos = &parent_sd->s_children; + while (*pos != cursor) + pos = &(*pos)->s_sibling; + + /* unlink cursor */ + *pos = cursor->s_sibling; + + if (filp->f_pos == 2) + pos = &parent_sd->s_children; + + for ( ; *pos; pos = &(*pos)->s_sibling) { + struct sysfs_dirent *next = *pos; + const char * name; + int len; + + if (!sysfs_type(next)) + continue; + + name = next->s_name; + len = strlen(name); + ino = next->s_ino; + + if (filldir(dirent, name, len, filp->f_pos, ino, + dt_type(next)) < 0) + break; + + filp->f_pos++; + } + + /* put cursor back in */ + cursor->s_sibling = *pos; + *pos = cursor; + + mutex_unlock(&sysfs_mutex); + } + return 0; +} + +static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) +{ + struct dentry * dentry = file->f_path.dentry; + + switch (origin) { + case 1: + offset += file->f_pos; + case 0: + if (offset >= 0) + break; + default: + return -EINVAL; + } + if (offset != file->f_pos) { + mutex_lock(&sysfs_mutex); + + file->f_pos = offset; + if (file->f_pos >= 2) { + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *cursor = file->private_data; + struct sysfs_dirent **pos; + loff_t n = file->f_pos - 2; + + sysfs_unlink_sibling(cursor); + + pos = &sd->s_children; + while (n && *pos) { + struct sysfs_dirent *next = *pos; + if (sysfs_type(next)) + n--; + pos = &(*pos)->s_sibling; + } + + cursor->s_sibling = *pos; + *pos = cursor; + } + + mutex_unlock(&sysfs_mutex); + } + + return offset; +} + +const struct file_operations sysfs_dir_operations = { + .open = sysfs_dir_open, + .release = sysfs_dir_close, + .llseek = sysfs_dir_lseek, + .read = generic_read_dir, + .readdir = sysfs_readdir, +}; + + +static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd) +{ + struct sysfs_addrm_cxt acxt; + + /* If a shadow directory goes empty remove it. */ + if (sysfs_type(sd) != SYSFS_SHADOW_DIR) + return; + + if (sd->s_children) + return; + + sysfs_addrm_start(&acxt, sd->s_parent); + + if (sd->s_flags & SYSFS_FLAG_REMOVED) + goto addrm_finish; + + if (sd->s_children) + goto addrm_finish; + + __remove_dir(&acxt, sd); +addrm_finish: + sysfs_addrm_finish(&acxt); +} + +static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag) +{ + struct sysfs_dirent *sd = NULL; + struct dentry *dir, *shadow; + struct inode *inode; + + dir = parent_sd->s_dentry; + inode = dir->d_inode; + + shadow = d_alloc(dir->d_parent, &dir->d_name); + if (!shadow) + goto out; + + /* Since the shadow directory is reachable make it look + * like it is actually hashed. + */ + shadow->d_hash.pprev = &shadow->d_hash.next; + shadow->d_hash.next = NULL; + shadow->d_flags &= ~DCACHE_UNHASHED; + + sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR); + if (!sd) + goto error; + + sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj; + sd->s_parent = sysfs_get(parent_sd); + + /* Use the inode number of the parent we are shadowing */ + sysfs_free_ino(sd->s_ino); + sd->s_ino = parent_sd->s_ino; + + inc_nlink(inode); + inc_nlink(dir->d_parent->d_inode); + + sysfs_link_sibling(sd); + __iget(inode); + sysfs_instantiate(shadow, inode); + sysfs_attach_dentry(sd, shadow); +out: + return sd; +error: + dput(shadow); + goto out; +} + +int sysfs_resolve_for_create(struct kobject *kobj, + struct sysfs_dirent **parent_sd) +{ + const struct shadow_dir_operations *shadow_ops; + struct sysfs_dirent *sd, *shadow_sd; + + sd = *parent_sd; + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + sd = sd->s_parent; + + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const void *tag; + + shadow_ops = sd->s_dentry->d_inode->i_private; + tag = shadow_ops->kobject_tag(kobj); + + shadow_sd = find_shadow_sd(sd, tag); + if (!shadow_sd) + shadow_sd = add_shadow_sd(sd, tag); + sd = shadow_sd; + } + if (sd) { + *parent_sd = sd; + return 1; + } + return 0; +} + +int sysfs_resolve_for_remove(struct kobject *kobj, + struct sysfs_dirent **parent_sd) +{ + struct sysfs_dirent *sd; + /* If dentry is a shadow directory find the shadow that is + * stored under the same tag as kobj. This allows removal + * of dirents to function properly even if the value of + * kobject_tag() has changed since we initially created + * the dirents assoctated with kobj. + */ + + sd = *parent_sd; + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + sd = sd->s_parent; + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const void *tag; + + tag = find_shadow_tag(kobj); + sd = find_shadow_sd(sd, tag); + } + if (sd) { + *parent_sd = sd; + return 1; + } + return 0; +} + +/** + * sysfs_enable_shadowing - Automatically create shadows of a directory + * @kobj: object to automatically shadow + * + * Once shadowing has been enabled on a directory the contents + * of the directory become dependent upon context. + * + * shadow_ops->current_tag() returns the context for the current + * process. + * + * shadow_ops->kobject_tag() returns the context that a given kobj + * resides in. + * + * Using those methods the sysfs code on shadowed directories + * carefully stores the files so that when we lookup files + * we get the proper answer for our context. + * + * If the context of a kobject is changed it is expected that + * the kobject will be renamed so the appopriate sysfs data structures + * can be updated. + */ +int sysfs_enable_shadowing(struct kobject *kobj, + const struct shadow_dir_operations *shadow_ops) +{ + struct sysfs_dirent *sd; + struct dentry *dentry; + int err; + + /* Find the dentry for the shadowed directory and + * increase it's count. + */ + err = -ENOENT; + sd = kobj->sd; + dentry = sysfs_get_dentry(sd); + if (!dentry) + goto out; + + mutex_lock(&sysfs_mutex); + err = -EINVAL; + /* We can only enable shadowing on empty directories + * where shadowing is not already enabled. + */ + if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) && + !(sd->s_flags & SYSFS_FLAG_REMOVED) && + !(sd->s_flags & SYSFS_FLAG_SHADOWED)) { + sd->s_flags |= SYSFS_FLAG_SHADOWED; + dentry->d_inode->i_private = (void *)shadow_ops; + err = 0; + } + mutex_unlock(&sysfs_mutex); +out: + if (err) + dput(dentry); + return err; +} + diff -Nurb linux-2.6.22-570/fs/sysfs/file.c linux-2.6.22-591/fs/sysfs/file.c --- linux-2.6.22-570/fs/sysfs/file.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/fs/sysfs/file.c 2007-12-21 15:36:14.000000000 -0500 @@ -50,29 +50,15 @@ .store = subsys_attr_store, }; -/** - * add_to_collection - add buffer to a collection - * @buffer: buffer to be added - * @node: inode of set to add to - */ - -static inline void -add_to_collection(struct sysfs_buffer *buffer, struct inode *node) -{ - struct sysfs_buffer_collection *set = node->i_private; - - mutex_lock(&node->i_mutex); - list_add(&buffer->associates, &set->associates); - mutex_unlock(&node->i_mutex); -} - -static inline void -remove_from_collection(struct sysfs_buffer *buffer, struct inode *node) -{ - mutex_lock(&node->i_mutex); - list_del(&buffer->associates); - mutex_unlock(&node->i_mutex); -} +struct sysfs_buffer { + size_t count; + loff_t pos; + char * page; + struct sysfs_ops * ops; + struct semaphore sem; + int needs_read_fill; + int event; +}; /** * fill_read_buffer - allocate and fill buffer from object. @@ -87,9 +73,8 @@ */ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) { - struct sysfs_dirent * sd = dentry->d_fsdata; - struct attribute * attr = to_attr(dentry); - struct kobject * kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_ops * ops = buffer->ops; int ret = 0; ssize_t count; @@ -99,8 +84,15 @@ if (!buffer->page) return -ENOMEM; - buffer->event = atomic_read(&sd->s_event); - count = ops->show(kobj,attr,buffer->page); + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + buffer->event = atomic_read(&attr_sd->s_event); + count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page); + + sysfs_put_active_two(attr_sd); + BUG_ON(count > (ssize_t)PAGE_SIZE); if (count >= 0) { buffer->needs_read_fill = 0; @@ -138,9 +130,6 @@ down(&buffer->sem); if (buffer->needs_read_fill) { - if (buffer->orphaned) - retval = -ENODEV; - else retval = fill_read_buffer(file->f_path.dentry,buffer); if (retval) goto out; @@ -199,11 +188,20 @@ static int flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) { - struct attribute * attr = to_attr(dentry); - struct kobject * kobj = to_kobj(dentry->d_parent); + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_ops * ops = buffer->ops; + int rc; + + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; + + rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count); + + sysfs_put_active_two(attr_sd); - return ops->store(kobj,attr,buffer->page,count); + return rc; } @@ -231,37 +229,29 @@ ssize_t len; down(&buffer->sem); - if (buffer->orphaned) { - len = -ENODEV; - goto out; - } len = fill_write_buffer(buffer, buf, count); if (len > 0) len = flush_write_buffer(file->f_path.dentry, buffer, len); if (len > 0) *ppos += len; -out: up(&buffer->sem); return len; } static int sysfs_open_file(struct inode *inode, struct file *file) { - struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); - struct attribute * attr = to_attr(file->f_path.dentry); - struct sysfs_buffer_collection *set; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; struct sysfs_buffer * buffer; + struct sysfs_ops * ops = NULL; - int error = 0; + int error; - if (!kobj || !attr) - goto Einval; - /* Grab the module reference for this attribute if we have one */ - if (!try_module_get(attr->owner)) { - error = -ENODEV; - goto Done; - } + /* need attr_sr for attr and ops, its parent for kobj */ + + if (!sysfs_get_active_two(attr_sd)) + return -ENODEV; /* if the kobject has no ktype, then we assume that it is a subsystem * itself, and use ops for it. @@ -277,20 +267,7 @@ * or the subsystem have no operations. */ if (!ops) - goto Eaccess; - - /* make sure we have a collection to add our buffers to */ - mutex_lock(&inode->i_mutex); - if (!(set = inode->i_private)) { - if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) { - mutex_unlock(&inode->i_mutex); - error = -ENOMEM; - goto Done; - } else { - INIT_LIST_HEAD(&set->associates); - } - } - mutex_unlock(&inode->i_mutex); + goto err_out; /* File needs write support. * The inode's perms must say it's ok, @@ -299,7 +276,7 @@ if (file->f_mode & FMODE_WRITE) { if (!(inode->i_mode & S_IWUGO) || !ops->store) - goto Eaccess; + goto err_out; } @@ -309,48 +286,38 @@ */ if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO) || !ops->show) - goto Eaccess; + goto err_out; } /* No error? Great, allocate a buffer for the file, and store it * it in file->private_data for easy access. */ + error = -ENOMEM; buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); - if (buffer) { - INIT_LIST_HEAD(&buffer->associates); + if (!buffer) + goto err_out; + init_MUTEX(&buffer->sem); buffer->needs_read_fill = 1; buffer->ops = ops; - add_to_collection(buffer, inode); file->private_data = buffer; - } else - error = -ENOMEM; - goto Done; - Einval: - error = -EINVAL; - goto Done; - Eaccess: - error = -EACCES; - module_put(attr->owner); - Done: - if (error) - kobject_put(kobj); + /* open succeeded, put active references and pin attr_sd */ + sysfs_put_active_two(attr_sd); + sysfs_get(attr_sd); + return 0; + + err_out: + sysfs_put_active_two(attr_sd); return error; } static int sysfs_release(struct inode * inode, struct file * filp) { - struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); - struct attribute * attr = to_attr(filp->f_path.dentry); - struct module * owner = attr->owner; - struct sysfs_buffer * buffer = filp->private_data; + struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; + struct sysfs_buffer *buffer = filp->private_data; - if (buffer) - remove_from_collection(buffer, inode); - kobject_put(kobj); - /* After this point, attr should not be accessed. */ - module_put(owner); + sysfs_put(attr_sd); if (buffer) { if (buffer->page) @@ -377,57 +344,43 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) { struct sysfs_buffer * buffer = filp->private_data; - struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); - struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; - int res = 0; - - poll_wait(filp, &kobj->poll, wait); + struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; - if (buffer->event != atomic_read(&sd->s_event)) { - res = POLLERR|POLLPRI; - buffer->needs_read_fill = 1; - } + /* need parent for the kobj, grab both */ + if (!sysfs_get_active_two(attr_sd)) + goto trigger; - return res; -} + poll_wait(filp, &kobj->poll, wait); + sysfs_put_active_two(attr_sd); -static struct dentry *step_down(struct dentry *dir, const char * name) -{ - struct dentry * de; + if (buffer->event != atomic_read(&attr_sd->s_event)) + goto trigger; - if (dir == NULL || dir->d_inode == NULL) - return NULL; + return 0; - mutex_lock(&dir->d_inode->i_mutex); - de = lookup_one_len(name, dir, strlen(name)); - mutex_unlock(&dir->d_inode->i_mutex); - dput(dir); - if (IS_ERR(de)) - return NULL; - if (de->d_inode == NULL) { - dput(de); - return NULL; - } - return de; + trigger: + buffer->needs_read_fill = 1; + return POLLERR|POLLPRI; } -void sysfs_notify(struct kobject * k, char *dir, char *attr) +void sysfs_notify(struct kobject *k, char *dir, char *attr) { - struct dentry *de = k->dentry; - if (de) - dget(de); - if (de && dir) - de = step_down(de, dir); - if (de && attr) - de = step_down(de, attr); - if (de) { - struct sysfs_dirent * sd = de->d_fsdata; - if (sd) + struct sysfs_dirent *sd = k->sd; + + mutex_lock(&sysfs_mutex); + + if (sd && dir) + sd = sysfs_find_dirent(sd, dir); + if (sd && attr) + sd = sysfs_find_dirent(sd, attr); + if (sd) { atomic_inc(&sd->s_event); wake_up_interruptible(&k->poll); - dput(de); } + + mutex_unlock(&sysfs_mutex); } EXPORT_SYMBOL_GPL(sysfs_notify); @@ -441,19 +394,30 @@ }; -int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) +int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, + int type) { - struct sysfs_dirent * parent_sd = dir->d_fsdata; umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; - int error = -EEXIST; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; - mutex_lock(&dir->d_inode->i_mutex); - if (!sysfs_dirent_exist(parent_sd, attr->name)) - error = sysfs_make_dirent(parent_sd, NULL, (void *)attr, - mode, type); - mutex_unlock(&dir->d_inode->i_mutex); + sd = sysfs_new_dirent(attr->name, mode, type); + if (!sd) + return -ENOMEM; + sd->s_elem.attr.attr = (void *)attr; - return error; + sysfs_addrm_start(&acxt, dir_sd); + + if (!sysfs_find_dirent(dir_sd, attr->name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + } + + if (sysfs_addrm_finish(&acxt)) + return 0; + + sysfs_put(sd); + return -EEXIST; } @@ -465,9 +429,9 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) { - BUG_ON(!kobj || !kobj->dentry || !attr); + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); + return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR); } @@ -481,16 +445,16 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { - struct dentry *dir; + struct sysfs_dirent *dir_sd; int error; - dir = lookup_one_len(group, kobj->dentry, strlen(group)); - if (IS_ERR(dir)) - error = PTR_ERR(dir); - else { - error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR); - dput(dir); - } + dir_sd = sysfs_get_dirent(kobj->sd, group); + if (!dir_sd) + return -ENOENT; + + error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); + sysfs_put(dir_sd); + return error; } EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); @@ -503,30 +467,31 @@ */ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) { - struct dentry * dir = kobj->dentry; - struct dentry * victim; - int res = -ENOENT; - - mutex_lock(&dir->d_inode->i_mutex); - victim = lookup_one_len(attr->name, dir, strlen(attr->name)); - if (!IS_ERR(victim)) { - /* make sure dentry is really there */ - if (victim->d_inode && - (victim->d_parent->d_inode == dir->d_inode)) { - victim->d_inode->i_mtime = CURRENT_TIME; - fsnotify_modify(victim); - res = 0; - } else - d_drop(victim); + struct sysfs_dirent *victim_sd = NULL; + struct dentry *victim = NULL; + int rc; + + rc = -ENOENT; + victim_sd = sysfs_get_dirent(kobj->sd, attr->name); + if (!victim_sd) + goto out; - /** - * Drop the reference acquired from lookup_one_len() above. - */ - dput(victim); + victim = sysfs_get_dentry(victim_sd); + if (IS_ERR(victim)) { + rc = PTR_ERR(victim); + victim = NULL; + goto out; } - mutex_unlock(&dir->d_inode->i_mutex); - return res; + mutex_lock(&victim->d_inode->i_mutex); + victim->d_inode->i_mtime = CURRENT_TIME; + fsnotify_modify(victim); + mutex_unlock(&victim->d_inode->i_mutex); + rc = 0; + out: + dput(victim); + sysfs_put(victim_sd); + return rc; } @@ -539,30 +504,34 @@ */ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) { - struct dentry *dir = kobj->dentry; - struct dentry *victim; + struct sysfs_dirent *victim_sd = NULL; + struct dentry *victim = NULL; struct inode * inode; struct iattr newattrs; - int res = -ENOENT; + int rc; + + rc = -ENOENT; + victim_sd = sysfs_get_dirent(kobj->sd, attr->name); + if (!victim_sd) + goto out; + + victim = sysfs_get_dentry(victim_sd); + if (IS_ERR(victim)) { + rc = PTR_ERR(victim); + victim = NULL; + goto out; + } - mutex_lock(&dir->d_inode->i_mutex); - victim = lookup_one_len(attr->name, dir, strlen(attr->name)); - if (!IS_ERR(victim)) { - if (victim->d_inode && - (victim->d_parent->d_inode == dir->d_inode)) { inode = victim->d_inode; mutex_lock(&inode->i_mutex); - newattrs.ia_mode = (mode & S_IALLUGO) | - (inode->i_mode & ~S_IALLUGO); + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - res = notify_change(victim, &newattrs); + rc = notify_change(victim, &newattrs); mutex_unlock(&inode->i_mutex); - } + out: dput(victim); - } - mutex_unlock(&dir->d_inode->i_mutex); - - return res; + sysfs_put(victim_sd); + return rc; } EXPORT_SYMBOL_GPL(sysfs_chmod_file); @@ -577,7 +546,7 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) { - sysfs_hash_and_remove(kobj->dentry, attr->name); + sysfs_hash_and_remove(kobj, kobj->sd, attr->name); } @@ -590,12 +559,12 @@ void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group) { - struct dentry *dir; + struct sysfs_dirent *dir_sd; - dir = lookup_one_len(group, kobj->dentry, strlen(group)); - if (!IS_ERR(dir)) { - sysfs_hash_and_remove(dir, attr->name); - dput(dir); + dir_sd = sysfs_get_dirent(kobj->sd, group); + if (dir_sd) { + sysfs_hash_and_remove(kobj, dir_sd, attr->name); + sysfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); diff -Nurb linux-2.6.22-570/fs/sysfs/group.c linux-2.6.22-591/fs/sysfs/group.c --- linux-2.6.22-570/fs/sysfs/group.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sysfs/group.c 2007-12-21 15:36:14.000000000 -0500 @@ -13,31 +13,29 @@ #include #include #include -#include #include #include "sysfs.h" -static void remove_files(struct dentry * dir, - const struct attribute_group * grp) +static void remove_files(struct kobject *kobj, struct sysfs_dirent *dir_sd, + const struct attribute_group *grp) { struct attribute *const* attr; for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir,(*attr)->name); + sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name); } -static int create_files(struct dentry * dir, - const struct attribute_group * grp) +static int create_files(struct kobject *kobj, struct sysfs_dirent *dir_sd, + const struct attribute_group *grp) { struct attribute *const* attr; int error = 0; - for (attr = grp->attrs; *attr && !error; attr++) { - error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); - } + for (attr = grp->attrs; *attr && !error; attr++) + error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); if (error) - remove_files(dir,grp); + remove_files(kobj, dir_sd, grp); return error; } @@ -45,44 +43,44 @@ int sysfs_create_group(struct kobject * kobj, const struct attribute_group * grp) { - struct dentry * dir; + struct sysfs_dirent *sd; int error; - BUG_ON(!kobj || !kobj->dentry); + BUG_ON(!kobj || !kobj->sd); if (grp->name) { - error = sysfs_create_subdir(kobj,grp->name,&dir); + error = sysfs_create_subdir(kobj, grp->name, &sd); if (error) return error; } else - dir = kobj->dentry; - dir = dget(dir); - if ((error = create_files(dir,grp))) { + sd = kobj->sd; + sysfs_get(sd); + error = create_files(kobj, sd, grp); + if (error) { if (grp->name) - sysfs_remove_subdir(dir); + sysfs_remove_subdir(sd); } - dput(dir); + sysfs_put(sd); return error; } void sysfs_remove_group(struct kobject * kobj, const struct attribute_group * grp) { - struct dentry * dir; + struct sysfs_dirent *dir_sd = kobj->sd; + struct sysfs_dirent *sd; if (grp->name) { - dir = lookup_one_len_kern(grp->name, kobj->dentry, - strlen(grp->name)); - BUG_ON(IS_ERR(dir)); - } - else - dir = dget(kobj->dentry); + sd = sysfs_get_dirent(dir_sd, grp->name); + BUG_ON(!sd); + } else + sd = sysfs_get(dir_sd); - remove_files(dir,grp); + remove_files(kobj, sd, grp); if (grp->name) - sysfs_remove_subdir(dir); - /* release the ref. taken in this routine */ - dput(dir); + sysfs_remove_subdir(sd); + + sysfs_put(sd); } diff -Nurb linux-2.6.22-570/fs/sysfs/inode.c linux-2.6.22-591/fs/sysfs/inode.c --- linux-2.6.22-570/fs/sysfs/inode.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sysfs/inode.c 2007-12-23 01:18:26.000000000 -0500 @@ -34,16 +34,6 @@ .setattr = sysfs_setattr, }; -void sysfs_delete_inode(struct inode *inode) -{ - /* Free the shadowed directory inode operations */ - if (sysfs_is_shadowed_inode(inode)) { - kfree(inode->i_op); - inode->i_op = NULL; - } - return generic_delete_inode(inode); -} - int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) { struct inode * inode = dentry->d_inode; @@ -133,10 +123,8 @@ */ static struct lock_class_key sysfs_inode_imutex_key; -struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) +void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { - struct inode * inode = new_inode(sysfs_sb); - if (inode) { inode->i_blocks = 0; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; @@ -151,169 +139,81 @@ */ set_inode_attr(inode, sd->s_iattr); } else - set_default_inode_attr(inode, mode); - } - return inode; -} - -int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) -{ - int error = 0; - struct inode * inode = NULL; - if (dentry) { - if (!dentry->d_inode) { - struct sysfs_dirent * sd = dentry->d_fsdata; - if ((inode = sysfs_new_inode(mode, sd))) { - if (dentry->d_parent && dentry->d_parent->d_inode) { - struct inode *p_inode = dentry->d_parent->d_inode; - p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; - } - goto Proceed; - } - else - error = -ENOMEM; - } else - error = -EEXIST; - } else - error = -ENOENT; - goto Done; - - Proceed: - if (init) - error = init(inode); - if (!error) { - d_instantiate(dentry, inode); - if (S_ISDIR(mode)) - dget(dentry); /* pin only directory dentry in core */ - } else - iput(inode); - Done: - return error; + set_default_inode_attr(inode, sd->s_mode); } -/* - * Get the name for corresponding element represented by the given sysfs_dirent +/** + * sysfs_get_inode - get inode for sysfs_dirent + * @sd: sysfs_dirent to allocate inode for + * + * Get inode for @sd. If such inode doesn't exist, a new inode + * is allocated and basics are initialized. New inode is + * returned locked. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * Pointer to allocated inode on success, NULL on failure. */ -const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) +struct inode * sysfs_get_inode(struct sysfs_dirent *sd) { - struct attribute * attr; - struct bin_attribute * bin_attr; - struct sysfs_symlink * sl; - - BUG_ON(!sd || !sd->s_element); - - switch (sd->s_type) { - case SYSFS_DIR: - /* Always have a dentry so use that */ - return sd->s_dentry->d_name.name; - - case SYSFS_KOBJ_ATTR: - attr = sd->s_element; - return attr->name; - - case SYSFS_KOBJ_BIN_ATTR: - bin_attr = sd->s_element; - return bin_attr->attr.name; - - case SYSFS_KOBJ_LINK: - sl = sd->s_element; - return sl->link_name; - } - return NULL; -} + struct inode *inode; -static inline void orphan_all_buffers(struct inode *node) -{ - struct sysfs_buffer_collection *set; - struct sysfs_buffer *buf; + inode = iget_locked(sysfs_sb, sd->s_ino); + if (inode && (inode->i_state & I_NEW)) + sysfs_init_inode(sd, inode); - mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); - set = node->i_private; - if (set) { - list_for_each_entry(buf, &set->associates, associates) { - down(&buf->sem); - buf->orphaned = 1; - up(&buf->sem); - } - } - mutex_unlock(&node->i_mutex); + return inode; } - -/* - * Unhashes the dentry corresponding to given sysfs_dirent - * Called with parent inode's i_mutex held. +/** + * sysfs_instantiate - instantiate dentry + * @dentry: dentry to be instantiated + * @inode: inode associated with @sd + * + * Unlock @inode if locked and instantiate @dentry with @inode. + * + * LOCKING: + * None. */ -void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) +void sysfs_instantiate(struct dentry *dentry, struct inode *inode) { - struct dentry *dentry = NULL; - struct inode *inode; + BUG_ON(!dentry || dentry->d_inode); - /* We're not holding a reference to ->s_dentry dentry but the - * field will stay valid as long as sysfs_lock is held. - */ - spin_lock(&sysfs_lock); - spin_lock(&dcache_lock); + if (inode->i_state & I_NEW) + unlock_new_inode(inode); - /* dget dentry if it's still alive */ - if (sd->s_dentry && sd->s_dentry->d_inode) - dentry = dget_locked(sd->s_dentry); - - spin_unlock(&dcache_lock); - spin_unlock(&sysfs_lock); - - /* drop dentry */ - if (dentry) { - spin_lock(&dcache_lock); - spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry) && dentry->d_inode) { - inode = dentry->d_inode; - spin_lock(&inode->i_lock); - __iget(inode); - spin_unlock(&inode->i_lock); - dget_locked(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - simple_unlink(parent->d_inode, dentry); - orphan_all_buffers(inode); - iput(inode); - } else { - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - } - - dput(dentry); - } + d_instantiate(dentry, inode); } -int sysfs_hash_and_remove(struct dentry * dir, const char * name) +int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name) { - struct sysfs_dirent * sd; - struct sysfs_dirent * parent_sd; - int found = 0; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent **pos, *sd; - if (!dir) + if (!dir_sd) return -ENOENT; - if (dir->d_inode == NULL) - /* no inode means this hasn't been made visible yet */ - return -ENOENT; - parent_sd = dir->d_fsdata; - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (!sd->s_element) + sysfs_addrm_start(&acxt, dir_sd); + if (!sysfs_resolve_for_remove(kobj, &acxt.parent_sd)) + goto addrm_finish; + + for (pos = &acxt.parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) { + sd = *pos; + + if (!sysfs_type(sd)) continue; - if (!strcmp(sysfs_get_name(sd), name)) { - list_del_init(&sd->s_sibling); - sysfs_drop_dentry(sd, dir); - sysfs_put(sd); - found = 1; + if (!strcmp(sd->s_name, name)) { + *pos = sd->s_sibling; + sd->s_sibling = NULL; + sysfs_remove_one(&acxt, sd); break; } } - mutex_unlock(&dir->d_inode->i_mutex); - - return found ? 0 : -ENOENT; +addrm_finish: + if (sysfs_addrm_finish(&acxt)) + return 0; + return -ENOENT; } diff -Nurb linux-2.6.22-570/fs/sysfs/mount.c linux-2.6.22-591/fs/sysfs/mount.c --- linux-2.6.22-570/fs/sysfs/mount.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/sysfs/mount.c 2007-12-21 15:36:14.000000000 -0500 @@ -17,28 +17,18 @@ struct super_block * sysfs_sb = NULL; struct kmem_cache *sysfs_dir_cachep; -static void sysfs_clear_inode(struct inode *inode); - static const struct super_operations sysfs_ops = { .statfs = simple_statfs, - .drop_inode = sysfs_delete_inode, - .clear_inode = sysfs_clear_inode, + .drop_inode = generic_delete_inode, }; -static struct sysfs_dirent sysfs_root = { - .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), - .s_children = LIST_HEAD_INIT(sysfs_root.s_children), - .s_element = NULL, - .s_type = SYSFS_ROOT, - .s_iattr = NULL, +struct sysfs_dirent sysfs_root = { + .s_count = ATOMIC_INIT(1), + .s_flags = SYSFS_ROOT, + .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, .s_ino = 1, }; -static void sysfs_clear_inode(struct inode *inode) -{ - kfree(inode->i_private); -} - static int sysfs_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; @@ -51,17 +41,18 @@ sb->s_time_gran = 1; sysfs_sb = sb; - inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, - &sysfs_root); - if (inode) { + inode = new_inode(sysfs_sb); + if (!inode) { + pr_debug("sysfs: could not get root inode\n"); + return -ENOMEM; + } + + sysfs_init_inode(&sysfs_root, inode); + inode->i_op = &sysfs_dir_inode_operations; inode->i_fop = &sysfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - } else { - pr_debug("sysfs: could not get root inode\n"); - return -ENOMEM; - } root = d_alloc_root(inode); if (!root) { @@ -69,6 +60,7 @@ iput(inode); return -ENOMEM; } + sysfs_root.s_dentry = root; root->d_fsdata = &sysfs_root; sb->s_root = root; return 0; diff -Nurb linux-2.6.22-570/fs/sysfs/symlink.c linux-2.6.22-591/fs/sysfs/symlink.c --- linux-2.6.22-570/fs/sysfs/symlink.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sysfs/symlink.c 2007-12-21 15:36:14.000000000 -0500 @@ -11,71 +11,49 @@ #include "sysfs.h" -static int object_depth(struct kobject * kobj) +static int object_depth(struct sysfs_dirent *sd) { - struct kobject * p = kobj; int depth = 0; - do { depth++; } while ((p = p->parent)); + + for (; sd->s_parent; sd = sd->s_parent) { + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + continue; + depth++; + } + return depth; } -static int object_path_length(struct kobject * kobj) +static int object_path_length(struct sysfs_dirent * sd) { - struct kobject * p = kobj; int length = 1; - do { - length += strlen(kobject_name(p)) + 1; - p = p->parent; - } while (p); + + for (; sd->s_parent; sd = sd->s_parent) { + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + continue; + length += strlen(sd->s_name) + 1; + } + return length; } -static void fill_object_path(struct kobject * kobj, char * buffer, int length) +static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length) { - struct kobject * p; - + int cur; --length; - for (p = kobj; p; p = p->parent) { - int cur = strlen(kobject_name(p)); + for (; sd->s_parent; sd = sd->s_parent) { + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + continue; + + cur = strlen(sd->s_name); /* back up enough to print this bus id with '/' */ length -= cur; - strncpy(buffer + length,kobject_name(p),cur); + strncpy(buffer + length, sd->s_name, cur); *(buffer + --length) = '/'; } } -static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target) -{ - struct sysfs_dirent * parent_sd = parent->d_fsdata; - struct sysfs_symlink * sl; - int error = 0; - - error = -ENOMEM; - sl = kmalloc(sizeof(*sl), GFP_KERNEL); - if (!sl) - goto exit1; - - sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL); - if (!sl->link_name) - goto exit2; - - strcpy(sl->link_name, name); - sl->target_kobj = kobject_get(target); - - error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO, - SYSFS_KOBJ_LINK); - if (!error) - return 0; - - kobject_put(target); - kfree(sl->link_name); -exit2: - kfree(sl); -exit1: - return error; -} - /** * sysfs_create_link - create symlink between two objects. * @kobj: object whose directory we're creating the link in. @@ -84,29 +62,80 @@ */ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) { - struct dentry *dentry = NULL; - int error = -EEXIST; + struct sysfs_dirent *parent_sd = NULL; + struct sysfs_dirent *target_sd = NULL; + struct sysfs_dirent *sd = NULL; + struct sysfs_addrm_cxt acxt; + int error; BUG_ON(!name); if (!kobj) { if (sysfs_mount && sysfs_mount->mnt_sb) - dentry = sysfs_mount->mnt_sb->s_root; + parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; } else - dentry = kobj->dentry; + parent_sd = kobj->sd; + + error = -EFAULT; + if (!parent_sd) + goto out_put; + + /* target->sd can go away beneath us but is protected with + * sysfs_assoc_lock. Fetch target_sd from it. + */ + spin_lock(&sysfs_assoc_lock); + if (target->sd) + target_sd = sysfs_get(target->sd); + spin_unlock(&sysfs_assoc_lock); + + error = -ENOENT; + if (!target_sd) + goto out_put; + + error = -ENOMEM; + sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); + if (!sd) + goto out_put; + sd->s_elem.symlink.target_sd = target_sd; + + sysfs_addrm_start(&acxt, parent_sd); + if (!sysfs_resolve_for_create(target, &acxt.parent_sd)) + goto addrm_finish; + + if (!sysfs_find_dirent(acxt.parent_sd, name)) { + sysfs_add_one(&acxt, sd); + sysfs_link_sibling(sd); + } - if (!dentry) - return -EFAULT; +addrm_finish: + if (sysfs_addrm_finish(&acxt)) + return 0; - mutex_lock(&dentry->d_inode->i_mutex); - if (!sysfs_dirent_exist(dentry->d_fsdata, name)) - error = sysfs_add_link(dentry, name, target); - mutex_unlock(&dentry->d_inode->i_mutex); + error = -EEXIST; + /* fall through */ + out_put: + sysfs_put(target_sd); + sysfs_put(sd); return error; } /** + * sysfs_delete_link - remove symlink in object's directory. + * @kobj: object we're acting for. + * @targ: object we're pointing to. + * @name: name of the symlink to remove. + * + * Unlike sysfs_remove_link sysfs_delete_link has enough information + * to successfully delete symlinks in shadow directories. + */ +void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, + const char *name) +{ + sysfs_hash_and_remove(targ, kobj->sd, name); +} + +/** * sysfs_remove_link - remove symlink in object's directory. * @kobj: object we're acting for. * @name: name of the symlink to remove. @@ -114,17 +143,33 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) { - sysfs_hash_and_remove(kobj->dentry,name); + sysfs_hash_and_remove(kobj, kobj->sd, name); } -static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, - char *path) +/** + * sysfs_rename_link - rename symlink in object's directory. + * @kobj: object we're acting for. + * @targ: object we're pointing to. + * @old: previous name of the symlink. + * @new: new name of the symlink. + * + * A helper function for the common rename symlink idiom. + */ +int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, + const char *old, const char *new) +{ + sysfs_delete_link(kobj, targ, old); + return sysfs_create_link(kobj, targ, new); +} + +static int sysfs_get_target_path(struct sysfs_dirent * parent_sd, + struct sysfs_dirent * target_sd, char *path) { char * s; int depth, size; - depth = object_depth(kobj); - size = object_path_length(target) + depth * 3 - 1; + depth = object_depth(parent_sd); + size = object_path_length(target_sd) + depth * 3 - 1; if (size > PATH_MAX) return -ENAMETOOLONG; @@ -133,7 +178,7 @@ for (s = path; depth--; s += 3) strcpy(s,"../"); - fill_object_path(target, path, size); + fill_object_path(target_sd, path, size); pr_debug("%s: path = '%s'\n", __FUNCTION__, path); return 0; @@ -141,27 +186,16 @@ static int sysfs_getlink(struct dentry *dentry, char * path) { - struct kobject *kobj, *target_kobj; - int error = 0; - - kobj = sysfs_get_kobject(dentry->d_parent); - if (!kobj) - return -EINVAL; - - target_kobj = sysfs_get_kobject(dentry); - if (!target_kobj) { - kobject_put(kobj); - return -EINVAL; - } + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *parent_sd = sd->s_parent; + struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd; + int error; + + mutex_lock(&sysfs_mutex); + error = sysfs_get_target_path(parent_sd, target_sd, path); + mutex_unlock(&sysfs_mutex); - down_read(&sysfs_rename_sem); - error = sysfs_get_target_path(kobj, target_kobj, path); - up_read(&sysfs_rename_sem); - - kobject_put(kobj); - kobject_put(target_kobj); return error; - } static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) diff -Nurb linux-2.6.22-570/fs/sysfs/sysfs.h linux-2.6.22-591/fs/sysfs/sysfs.h --- linux-2.6.22-570/fs/sysfs/sysfs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/sysfs/sysfs.h 2007-12-21 15:36:14.000000000 -0500 @@ -1,9 +1,40 @@ +struct sysfs_elem_dir { + struct kobject * kobj; +}; + +struct sysfs_elem_symlink { + struct sysfs_dirent * target_sd; +}; + +struct sysfs_elem_attr { + struct attribute * attr; +}; + +struct sysfs_elem_bin_attr { + struct bin_attribute * bin_attr; +}; + +/* + * As long as s_count reference is held, the sysfs_dirent itself is + * accessible. Dereferencing s_elem or any other outer entity + * requires s_active reference. + */ struct sysfs_dirent { atomic_t s_count; - struct list_head s_sibling; - struct list_head s_children; - void * s_element; - int s_type; + atomic_t s_active; + struct sysfs_dirent * s_parent; + struct sysfs_dirent * s_sibling; + struct sysfs_dirent * s_children; + const char * s_name; + + union { + struct sysfs_elem_dir dir; + struct sysfs_elem_symlink symlink; + struct sysfs_elem_attr attr; + struct sysfs_elem_bin_attr bin_attr; + } s_elem; + + unsigned int s_flags; umode_t s_mode; ino_t s_ino; struct dentry * s_dentry; @@ -11,30 +42,77 @@ atomic_t s_event; }; +#define SD_DEACTIVATED_BIAS INT_MIN + +struct sysfs_addrm_cxt { + struct sysfs_dirent *parent_sd; + struct inode *parent_inode; + struct sysfs_dirent *removed; + int cnt; +}; + +/* + * A sysfs file which deletes another file when written to need to + * write lock the s_active of the victim while its s_active is read + * locked for the write operation. Tell lockdep that this is okay. + */ +enum sysfs_s_active_class +{ + SYSFS_S_ACTIVE_NORMAL, /* file r/w access, etc - default */ + SYSFS_S_ACTIVE_DEACTIVATE, /* file deactivation */ +}; + extern struct vfsmount * sysfs_mount; +extern struct sysfs_dirent sysfs_root; extern struct kmem_cache *sysfs_dir_cachep; -extern void sysfs_delete_inode(struct inode *inode); -extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); -extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); - -extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *); -extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, - umode_t, int); - -extern int sysfs_add_file(struct dentry *, const struct attribute *, int); -extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); +extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); +extern void sysfs_link_sibling(struct sysfs_dirent *sd); +extern void sysfs_unlink_sibling(struct sysfs_dirent *sd); + +extern int sysfs_resolve_for_create(struct kobject *kobj, + struct sysfs_dirent **parent_sd); +extern int sysfs_resolve_for_remove(struct kobject *kobj, + struct sysfs_dirent **parent_sd); + +extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); +extern void sysfs_put_active(struct sysfs_dirent *sd); +extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd); +extern void sysfs_put_active_two(struct sysfs_dirent *sd); +extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd); +extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd); +extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd); +extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); + +extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode); +extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd); +extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode); + +extern void release_sysfs_dirent(struct sysfs_dirent * sd); +extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name); +extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const unsigned char *name); +extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, + int type); + +extern int sysfs_add_file(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type); +extern int sysfs_hash_and_remove(struct kobject *kobj, + struct sysfs_dirent *dir_sd, const char *name); extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); -extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); -extern void sysfs_remove_subdir(struct dentry *); +extern int sysfs_create_subdir(struct kobject *kobj, const char *name, + struct sysfs_dirent **p_sd); +extern void sysfs_remove_subdir(struct sysfs_dirent *sd); -extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd); -extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent); extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); -extern spinlock_t sysfs_lock; -extern struct rw_semaphore sysfs_rename_sem; +extern spinlock_t sysfs_assoc_lock; +extern struct mutex sysfs_mutex; extern struct super_block * sysfs_sb; extern const struct file_operations sysfs_dir_operations; extern const struct file_operations sysfs_file_operations; @@ -42,73 +120,9 @@ extern const struct inode_operations sysfs_dir_inode_operations; extern const struct inode_operations sysfs_symlink_inode_operations; -struct sysfs_symlink { - char * link_name; - struct kobject * target_kobj; -}; - -struct sysfs_buffer { - struct list_head associates; - size_t count; - loff_t pos; - char * page; - struct sysfs_ops * ops; - struct semaphore sem; - int orphaned; - int needs_read_fill; - int event; -}; - -struct sysfs_buffer_collection { - struct list_head associates; -}; - -static inline struct kobject * to_kobj(struct dentry * dentry) -{ - struct sysfs_dirent * sd = dentry->d_fsdata; - return ((struct kobject *) sd->s_element); -} - -static inline struct attribute * to_attr(struct dentry * dentry) -{ - struct sysfs_dirent * sd = dentry->d_fsdata; - return ((struct attribute *) sd->s_element); -} - -static inline struct bin_attribute * to_bin_attr(struct dentry * dentry) -{ - struct sysfs_dirent * sd = dentry->d_fsdata; - return ((struct bin_attribute *) sd->s_element); -} - -static inline struct kobject *sysfs_get_kobject(struct dentry *dentry) +static inline unsigned int sysfs_type(struct sysfs_dirent *sd) { - struct kobject * kobj = NULL; - - spin_lock(&dcache_lock); - if (!d_unhashed(dentry)) { - struct sysfs_dirent * sd = dentry->d_fsdata; - if (sd->s_type & SYSFS_KOBJ_LINK) { - struct sysfs_symlink * sl = sd->s_element; - kobj = kobject_get(sl->target_kobj); - } else - kobj = kobject_get(sd->s_element); - } - spin_unlock(&dcache_lock); - - return kobj; -} - -static inline void release_sysfs_dirent(struct sysfs_dirent * sd) -{ - if (sd->s_type & SYSFS_KOBJ_LINK) { - struct sysfs_symlink * sl = sd->s_element; - kfree(sl->link_name); - kobject_put(sl->target_kobj); - kfree(sl); - } - kfree(sd->s_iattr); - kmem_cache_free(sysfs_dir_cachep, sd); + return sd->s_flags & SYSFS_TYPE_MASK; } static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) @@ -122,11 +136,6 @@ static inline void sysfs_put(struct sysfs_dirent * sd) { - if (atomic_dec_and_test(&sd->s_count)) + if (sd && atomic_dec_and_test(&sd->s_count)) release_sysfs_dirent(sd); } - -static inline int sysfs_is_shadowed_inode(struct inode *inode) -{ - return S_ISDIR(inode->i_mode) && inode->i_op->follow_link; -} diff -Nurb linux-2.6.22-570/fs/unionfs/Makefile linux-2.6.22-591/fs/unionfs/Makefile --- linux-2.6.22-570/fs/unionfs/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,7 @@ +obj-$(CONFIG_UNION_FS) += unionfs.o + +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \ + rdstate.o copyup.o dirhelper.o rename.o unlink.o \ + lookup.o commonfops.o dirfops.o sioq.o mmap.o + +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o diff -Nurb linux-2.6.22-570/fs/unionfs/commonfops.c linux-2.6.22-591/fs/unionfs/commonfops.c --- linux-2.6.22-570/fs/unionfs/commonfops.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/commonfops.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * 1) Copyup the file + * 2) Rename the file to '.unionfs' - obviously + * stolen from NFS's silly rename + */ +static int copyup_deleted_file(struct file *file, struct dentry *dentry, + int bstart, int bindex) +{ + static unsigned int counter; + const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2; + const int countersize = sizeof(counter) * 2; + const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1; + char name[nlen + 1]; + + int err; + struct dentry *tmp_dentry = NULL; + struct dentry *hidden_dentry; + struct dentry *hidden_dir_dentry = NULL; + + hidden_dentry = unionfs_lower_dentry_idx(dentry, bstart); + + sprintf(name, ".unionfs%*.*lx", + i_inosize, i_inosize, hidden_dentry->d_inode->i_ino); + +retry: + /* + * Loop, looking for an unused temp name to copyup to. + * + * It's somewhat silly that we look for a free temp tmp name in the + * source branch (bstart) instead of the dest branch (bindex), where + * the final name will be created. We _will_ catch it if somehow + * the name exists in the dest branch, but it'd be nice to catch it + * sooner than later. + */ + tmp_dentry = NULL; + do { + char *suffix = name + nlen - countersize; + + dput(tmp_dentry); + counter++; + sprintf(suffix, "%*.*x", countersize, countersize, counter); + + printk(KERN_DEBUG "unionfs: trying to rename %s to %s\n", + dentry->d_name.name, name); + + tmp_dentry = lookup_one_len(name, hidden_dentry->d_parent, + nlen); + if (IS_ERR(tmp_dentry)) { + err = PTR_ERR(tmp_dentry); + goto out; + } + } while (tmp_dentry->d_inode != NULL); /* need negative dentry */ + dput(tmp_dentry); + + err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart, + bindex, file->f_dentry->d_inode->i_size); + if (err == -EEXIST) + goto retry; + else if (err) + goto out; + + /* bring it to the same state as an unlinked file */ + hidden_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry)); + hidden_dir_dentry = lock_parent(hidden_dentry); + err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry); + unlock_dir(hidden_dir_dentry); + +out: + return err; +} + +/* + * put all references held by upper struct file and free lower file pointer + * array + */ +static void cleanup_file(struct file *file) +{ + int bindex, bstart, bend; + struct file **lf; + struct super_block *sb = file->f_dentry->d_sb; + + lf = UNIONFS_F(file)->lower_files; + bstart = fbstart(file); + bend = fbend(file); + + for (bindex = bstart; bindex <= bend; bindex++) { + if (unionfs_lower_file_idx(file, bindex)) { + /* + * Find new index of matching branch with an open + * file, since branches could have been added or + * deleted causing the one with open files to shift. + */ + int i; /* holds (possibly) updated branch index */ + int old_bid; + + old_bid = UNIONFS_F(file)->saved_branch_ids[bindex]; + i = branch_id_to_idx(sb, old_bid); + if (i < 0) + printk(KERN_ERR "unionfs: no superblock for " + "file %p\n", file); + else { + /* decrement count of open files */ + branchput(sb, i); + /* + * fput will perform an mntput for us on the + * correct branch. Although we're using the + * file's old branch configuration, bindex, + * which is the old index, correctly points + * to the right branch in the file's branch + * list. In other words, we're going to + * mntput the correct branch even if + * branches have been added/removed. + */ + fput(unionfs_lower_file_idx(file, bindex)); + } + } + } + + UNIONFS_F(file)->lower_files = NULL; + kfree(lf); + kfree(UNIONFS_F(file)->saved_branch_ids); + /* set to NULL because caller needs to know if to kfree on error */ + UNIONFS_F(file)->saved_branch_ids = NULL; +} + +/* open all lower files for a given file */ +static int open_all_files(struct file *file) +{ + int bindex, bstart, bend, err = 0; + struct file *hidden_file; + struct dentry *hidden_dentry; + struct dentry *dentry = file->f_dentry; + struct super_block *sb = dentry->d_sb; + + bstart = dbstart(dentry); + bend = dbend(dentry); + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + continue; + + dget(hidden_dentry); + unionfs_mntget(dentry, bindex); + branchget(sb, bindex); + + hidden_file = + dentry_open(hidden_dentry, + unionfs_lower_mnt_idx(dentry, bindex), + file->f_flags); + if (IS_ERR(hidden_file)) { + err = PTR_ERR(hidden_file); + goto out; + } else + unionfs_set_lower_file_idx(file, bindex, hidden_file); + } +out: + return err; +} + +/* open the highest priority file for a given upper file */ +static int open_highest_file(struct file *file, int willwrite) +{ + int bindex, bstart, bend, err = 0; + struct file *hidden_file; + struct dentry *hidden_dentry; + + struct dentry *dentry = file->f_dentry; + struct inode *parent_inode = dentry->d_parent->d_inode; + struct super_block *sb = dentry->d_sb; + size_t inode_size = dentry->d_inode->i_size; + + bstart = dbstart(dentry); + bend = dbend(dentry); + + hidden_dentry = unionfs_lower_dentry(dentry); + if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) { + for (bindex = bstart - 1; bindex >= 0; bindex--) { + err = copyup_file(parent_inode, file, bstart, bindex, + inode_size); + if (!err) + break; + } + atomic_set(&UNIONFS_F(file)->generation, + atomic_read(&UNIONFS_I(dentry->d_inode)-> + generation)); + goto out; + } + + dget(hidden_dentry); + unionfs_mntget(dentry, bstart); + branchget(sb, bstart); + hidden_file = dentry_open(hidden_dentry, + unionfs_lower_mnt_idx(dentry, bstart), + file->f_flags); + if (IS_ERR(hidden_file)) { + err = PTR_ERR(hidden_file); + goto out; + } + unionfs_set_lower_file(file, hidden_file); + /* Fix up the position. */ + hidden_file->f_pos = file->f_pos; + + memcpy(&hidden_file->f_ra, &file->f_ra, sizeof(struct file_ra_state)); +out: + return err; +} + +/* perform a delayed copyup of a read-write file on a read-only branch */ +static int do_delayed_copyup(struct file *file, struct dentry *dentry) +{ + int bindex, bstart, bend, err = 0; + struct inode *parent_inode = dentry->d_parent->d_inode; + loff_t inode_size = file->f_dentry->d_inode->i_size; + + bstart = fbstart(file); + bend = fbend(file); + + BUG_ON(!S_ISREG(file->f_dentry->d_inode->i_mode)); + + for (bindex = bstart - 1; bindex >= 0; bindex--) { + if (!d_deleted(file->f_dentry)) + err = copyup_file(parent_inode, file, bstart, + bindex, inode_size); + else + err = copyup_deleted_file(file, dentry, bstart, + bindex); + + if (!err) + break; + } + if (!err && (bstart > fbstart(file))) { + bend = fbend(file); + for (bindex = bstart; bindex <= bend; bindex++) { + if (unionfs_lower_file_idx(file, bindex)) { + branchput(dentry->d_sb, bindex); + fput(unionfs_lower_file_idx(file, bindex)); + unionfs_set_lower_file_idx(file, bindex, NULL); + } + } + fbend(file) = bend; + } + return err; +} + +/* + * Revalidate the struct file + * @file: file to revalidate + * @willwrite: 1 if caller may cause changes to the file; 0 otherwise. + */ +int unionfs_file_revalidate(struct file *file, int willwrite) +{ + struct super_block *sb; + struct dentry *dentry; + int sbgen, fgen, dgen; + int bstart, bend; + int size; + + int err = 0; + + dentry = file->f_dentry; + unionfs_lock_dentry(dentry); + sb = dentry->d_sb; + + /* + * First revalidate the dentry inside struct file, + * but not unhashed dentries. + */ + if (!d_deleted(dentry) && + !__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out_nofree; + } + + sbgen = atomic_read(&UNIONFS_SB(sb)->generation); + dgen = atomic_read(&UNIONFS_D(dentry)->generation); + fgen = atomic_read(&UNIONFS_F(file)->generation); + + BUG_ON(sbgen > dgen); + + /* + * There are two cases we are interested in. The first is if the + * generation is lower than the super-block. The second is if + * someone has copied up this file from underneath us, we also need + * to refresh things. + */ + if (!d_deleted(dentry) && + (sbgen > fgen || dbstart(dentry) != fbstart(file))) { + /* First we throw out the existing files. */ + cleanup_file(file); + + /* Now we reopen the file(s) as in unionfs_open. */ + bstart = fbstart(file) = dbstart(dentry); + bend = fbend(file) = dbend(dentry); + + size = sizeof(struct file *) * sbmax(sb); + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); + if (!UNIONFS_F(file)->lower_files) { + err = -ENOMEM; + goto out; + } + size = sizeof(int) * sbmax(sb); + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); + if (!UNIONFS_F(file)->saved_branch_ids) { + err = -ENOMEM; + goto out; + } + + if (S_ISDIR(dentry->d_inode->i_mode)) { + /* We need to open all the files. */ + err = open_all_files(file); + if (err) + goto out; + } else { + /* We only open the highest priority branch. */ + err = open_highest_file(file, willwrite); + if (err) + goto out; + } + atomic_set(&UNIONFS_F(file)->generation, + atomic_read(&UNIONFS_I(dentry->d_inode)-> + generation)); + } + + /* Copyup on the first write to a file on a readonly branch. */ + if (willwrite && IS_WRITE_FLAG(file->f_flags) && + !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) && + is_robranch(dentry)) { + printk(KERN_DEBUG "unionfs: Doing delayed copyup of a " + "read-write file on a read-only branch.\n"); + err = do_delayed_copyup(file, dentry); + } + +out: + if (err) { + kfree(UNIONFS_F(file)->lower_files); + kfree(UNIONFS_F(file)->saved_branch_ids); + } +out_nofree: + unionfs_unlock_dentry(dentry); + return err; +} + +/* unionfs_open helper function: open a directory */ +static int __open_dir(struct inode *inode, struct file *file) +{ + struct dentry *hidden_dentry; + struct file *hidden_file; + int bindex, bstart, bend; + + bstart = fbstart(file) = dbstart(file->f_dentry); + bend = fbend(file) = dbend(file->f_dentry); + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = + unionfs_lower_dentry_idx(file->f_dentry, bindex); + if (!hidden_dentry) + continue; + + dget(hidden_dentry); + unionfs_mntget(file->f_dentry, bindex); + hidden_file = dentry_open(hidden_dentry, + unionfs_lower_mnt_idx(file->f_dentry, + bindex), + file->f_flags); + if (IS_ERR(hidden_file)) + return PTR_ERR(hidden_file); + + unionfs_set_lower_file_idx(file, bindex, hidden_file); + + /* + * The branchget goes after the open, because otherwise + * we would miss the reference on release. + */ + branchget(inode->i_sb, bindex); + } + + return 0; +} + +/* unionfs_open helper function: open a file */ +static int __open_file(struct inode *inode, struct file *file) +{ + struct dentry *hidden_dentry; + struct file *hidden_file; + int hidden_flags; + int bindex, bstart, bend; + + hidden_dentry = unionfs_lower_dentry(file->f_dentry); + hidden_flags = file->f_flags; + + bstart = fbstart(file) = dbstart(file->f_dentry); + bend = fbend(file) = dbend(file->f_dentry); + + /* + * check for the permission for hidden file. If the error is + * COPYUP_ERR, copyup the file. + */ + if (hidden_dentry->d_inode && is_robranch(file->f_dentry)) { + /* + * if the open will change the file, copy it up otherwise + * defer it. + */ + if (hidden_flags & O_TRUNC) { + int size = 0; + int err = -EROFS; + + /* copyup the file */ + for (bindex = bstart - 1; bindex >= 0; bindex--) { + err = copyup_file( + file->f_dentry->d_parent->d_inode, + file, bstart, bindex, size); + if (!err) + break; + } + return err; + } else + hidden_flags &= ~(OPEN_WRITE_FLAGS); + } + + dget(hidden_dentry); + + /* + * dentry_open will decrement mnt refcnt if err. + * otherwise fput() will do an mntput() for us upon file close. + */ + unionfs_mntget(file->f_dentry, bstart); + hidden_file = + dentry_open(hidden_dentry, + unionfs_lower_mnt_idx(file->f_dentry, bstart), + hidden_flags); + if (IS_ERR(hidden_file)) + return PTR_ERR(hidden_file); + + unionfs_set_lower_file(file, hidden_file); + branchget(inode->i_sb, bstart); + + return 0; +} + +int unionfs_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *hidden_file = NULL; + struct dentry *dentry = NULL; + int bindex = 0, bstart = 0, bend = 0; + int size; + + unionfs_read_lock(inode->i_sb); + + file->private_data = + kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL); + if (!UNIONFS_F(file)) { + err = -ENOMEM; + goto out_nofree; + } + fbstart(file) = -1; + fbend(file) = -1; + atomic_set(&UNIONFS_F(file)->generation, + atomic_read(&UNIONFS_I(inode)->generation)); + + size = sizeof(struct file *) * sbmax(inode->i_sb); + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); + if (!UNIONFS_F(file)->lower_files) { + err = -ENOMEM; + goto out; + } + size = sizeof(int) * sbmax(inode->i_sb); + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); + if (!UNIONFS_F(file)->saved_branch_ids) { + err = -ENOMEM; + goto out; + } + + dentry = file->f_dentry; + unionfs_lock_dentry(dentry); + + bstart = fbstart(file) = dbstart(dentry); + bend = fbend(file) = dbend(dentry); + + /* increment, so that we can flush appropriately */ + atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens); + + /* + * open all directories and make the unionfs file struct point to + * these hidden file structs + */ + if (S_ISDIR(inode->i_mode)) + err = __open_dir(inode, file); /* open a dir */ + else + err = __open_file(inode, file); /* open a file */ + + /* freeing the allocated resources, and fput the opened files */ + if (err) { + atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens); + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_file = unionfs_lower_file_idx(file, bindex); + if (!hidden_file) + continue; + + branchput(file->f_dentry->d_sb, bindex); + /* fput calls dput for hidden_dentry */ + fput(hidden_file); + } + } + + unionfs_unlock_dentry(dentry); + +out: + if (err) { + kfree(UNIONFS_F(file)->lower_files); + kfree(UNIONFS_F(file)->saved_branch_ids); + kfree(UNIONFS_F(file)); + } +out_nofree: + unionfs_read_unlock(inode->i_sb); + return err; +} + +/* + * release all lower object references & free the file info structure + * + * No need to grab sb info's rwsem. + */ +int unionfs_file_release(struct inode *inode, struct file *file) +{ + struct file *hidden_file = NULL; + struct unionfs_file_info *fileinfo; + struct unionfs_inode_info *inodeinfo; + struct super_block *sb = inode->i_sb; + int bindex, bstart, bend; + int fgen; + int err; + + unionfs_read_lock(sb); + /* + * Yes, we have to revalidate this file even if it's being released. + * This is important for open-but-unlinked files, as well as mmap + * support. + */ + if ((err = unionfs_file_revalidate(file, 1))) + return err; + fileinfo = UNIONFS_F(file); + BUG_ON(file->f_dentry->d_inode != inode); + inodeinfo = UNIONFS_I(inode); + + /* fput all the hidden files */ + fgen = atomic_read(&fileinfo->generation); + bstart = fbstart(file); + bend = fbend(file); + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_file = unionfs_lower_file_idx(file, bindex); + + if (hidden_file) { + fput(hidden_file); + branchput(inode->i_sb, bindex); + } + } + kfree(fileinfo->lower_files); + kfree(fileinfo->saved_branch_ids); + + if (fileinfo->rdstate) { + fileinfo->rdstate->access = jiffies; + printk(KERN_DEBUG "unionfs: saving rdstate with cookie " + "%u [%d.%lld]\n", + fileinfo->rdstate->cookie, + fileinfo->rdstate->bindex, + (long long)fileinfo->rdstate->dirpos); + spin_lock(&inodeinfo->rdlock); + inodeinfo->rdcount++; + list_add_tail(&fileinfo->rdstate->cache, + &inodeinfo->readdircache); + mark_inode_dirty(inode); + spin_unlock(&inodeinfo->rdlock); + fileinfo->rdstate = NULL; + } + kfree(fileinfo); + return 0; +} + +/* pass the ioctl to the lower fs */ +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct file *hidden_file; + int err; + + hidden_file = unionfs_lower_file(file); + + err = security_file_ioctl(hidden_file, cmd, arg); + if (err) + goto out; + + err = -ENOTTY; + if (!hidden_file || !hidden_file->f_op) + goto out; + if (hidden_file->f_op->unlocked_ioctl) { + err = hidden_file->f_op->unlocked_ioctl(hidden_file, cmd, arg); + } else if (hidden_file->f_op->ioctl) { + lock_kernel(); + err = hidden_file->f_op->ioctl(hidden_file->f_dentry->d_inode, + hidden_file, cmd, arg); + unlock_kernel(); + } + +out: + return err; +} + +/* + * return to user-space the branch indices containing the file in question + * + * We use fd_set and therefore we are limited to the number of the branches + * to FD_SETSIZE, which is currently 1024 - plenty for most people + */ +static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = 0; + fd_set branchlist; + + int bstart = 0, bend = 0, bindex = 0; + struct dentry *dentry, *hidden_dentry; + + dentry = file->f_dentry; + unionfs_lock_dentry(dentry); + if ((err = unionfs_partial_lookup(dentry))) + goto out; + bstart = dbstart(dentry); + bend = dbend(dentry); + + FD_ZERO(&branchlist); + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + continue; + if (hidden_dentry->d_inode) + FD_SET(bindex, &branchlist); + } + + err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set)); + if (err) + err = -EFAULT; + +out: + unionfs_unlock_dentry(dentry); + return err < 0 ? err : bend; +} + +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long err; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 1))) + goto out; + + /* check if asked for local commands */ + switch (cmd) { + case UNIONFS_IOCTL_INCGEN: + /* Increment the superblock generation count */ + printk("unionfs: incgen ioctl deprecated; " + "use \"-o remount,incgen\"\n"); + err = -ENOSYS; + break; + + case UNIONFS_IOCTL_QUERYFILE: + /* Return list of branches containing the given file */ + err = unionfs_ioctl_queryfile(file, cmd, arg); + break; + + default: + /* pass the ioctl down */ + err = do_ioctl(file, cmd, arg); + break; + } + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} + +int unionfs_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct file *hidden_file = NULL; + struct dentry *dentry = file->f_dentry; + int bindex, bstart, bend; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 1))) + goto out; + + if (!atomic_dec_and_test(&UNIONFS_I(dentry->d_inode)->totalopens)) + goto out; + + unionfs_lock_dentry(dentry); + + bstart = fbstart(file); + bend = fbend(file); + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_file = unionfs_lower_file_idx(file, bindex); + + if (hidden_file && hidden_file->f_op && + hidden_file->f_op->flush) { + err = hidden_file->f_op->flush(hidden_file, id); + if (err) + goto out_lock; + + /* if there are no more refs to the dentry, dput it */ + if (d_deleted(dentry)) { + dput(unionfs_lower_dentry_idx(dentry, bindex)); + unionfs_set_lower_dentry_idx(dentry, bindex, + NULL); + } + } + + } + +out_lock: + unionfs_unlock_dentry(dentry); +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} diff -Nurb linux-2.6.22-570/fs/unionfs/copyup.c linux-2.6.22-591/fs/unionfs/copyup.c --- linux-2.6.22-570/fs/unionfs/copyup.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/copyup.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,806 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * For detailed explanation of copyup see: + * Documentation/filesystems/unionfs/concepts.txt + */ + +/* forward definitions */ +static int copyup_named_dentry(struct inode *dir, struct dentry *dentry, + int bstart, int new_bindex, const char *name, + int namelen, struct file **copyup_file, + loff_t len); +static struct dentry *create_parents_named(struct inode *dir, + struct dentry *dentry, + const char *name, int bindex); + +#ifdef CONFIG_UNION_FS_XATTR +/* copyup all extended attrs for a given dentry */ +static int copyup_xattrs(struct dentry *old_hidden_dentry, + struct dentry *new_hidden_dentry) +{ + int err = 0; + ssize_t list_size = -1; + char *name_list = NULL; + char *attr_value = NULL; + char *name_list_orig = NULL; + + list_size = vfs_listxattr(old_hidden_dentry, NULL, 0); + + if (list_size <= 0) { + err = list_size; + goto out; + } + + name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX); + if (!name_list || IS_ERR(name_list)) { + err = PTR_ERR(name_list); + goto out; + } + list_size = vfs_listxattr(old_hidden_dentry, name_list, list_size); + attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX); + if (!attr_value || IS_ERR(attr_value)) { + err = PTR_ERR(name_list); + goto out; + } + name_list_orig = name_list; + while (*name_list) { + ssize_t size; + + /* Lock here since vfs_getxattr doesn't lock for us */ + mutex_lock(&old_hidden_dentry->d_inode->i_mutex); + size = vfs_getxattr(old_hidden_dentry, name_list, + attr_value, XATTR_SIZE_MAX); + mutex_unlock(&old_hidden_dentry->d_inode->i_mutex); + if (size < 0) { + err = size; + goto out; + } + + if (size > XATTR_SIZE_MAX) { + err = -E2BIG; + goto out; + } + /* Don't lock here since vfs_setxattr does it for us. */ + err = vfs_setxattr(new_hidden_dentry, name_list, attr_value, + size, 0); + + if (err < 0) + goto out; + name_list += strlen(name_list) + 1; + } +out: + name_list = name_list_orig; + + if (name_list) + unionfs_xattr_free(name_list, list_size + 1); + if (attr_value) + unionfs_xattr_free(attr_value, XATTR_SIZE_MAX); + /* It is no big deal if this fails, we just roll with the punches. */ + if (err == -ENOTSUPP || err == -EOPNOTSUPP) + err = 0; + return err; +} +#endif /* CONFIG_UNION_FS_XATTR */ + +/* Determine the mode based on the copyup flags, and the existing dentry. */ +static int copyup_permissions(struct super_block *sb, + struct dentry *old_hidden_dentry, + struct dentry *new_hidden_dentry) +{ + struct inode *i = old_hidden_dentry->d_inode; + struct iattr newattrs; + int err; + + newattrs.ia_atime = i->i_atime; + newattrs.ia_mtime = i->i_mtime; + newattrs.ia_ctime = i->i_ctime; + + newattrs.ia_gid = i->i_gid; + newattrs.ia_uid = i->i_uid; + + newattrs.ia_mode = i->i_mode; + + newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME | + ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE | + ATTR_GID | ATTR_UID | ATTR_MODE; + + err = notify_change(new_hidden_dentry, &newattrs); + + return err; +} + +int copyup_dentry(struct inode *dir, struct dentry *dentry, + int bstart, int new_bindex, + struct file **copyup_file, loff_t len) +{ + return copyup_named_dentry(dir, dentry, bstart, new_bindex, + dentry->d_name.name, + dentry->d_name.len, copyup_file, len); +} + +/* + * create the new device/file/directory - use copyup_permission to copyup + * times, and mode + * + * if the object being copied up is a regular file, the file is only created, + * the contents have to be copied up separately + */ +static int __copyup_ndentry(struct dentry *old_hidden_dentry, + struct dentry *new_hidden_dentry, + struct dentry *new_hidden_parent_dentry, + char *symbuf) +{ + int err = 0; + umode_t old_mode = old_hidden_dentry->d_inode->i_mode; + struct sioq_args args; + + if (S_ISDIR(old_mode)) { + args.mkdir.parent = new_hidden_parent_dentry->d_inode; + args.mkdir.dentry = new_hidden_dentry; + args.mkdir.mode = old_mode; + + run_sioq(__unionfs_mkdir, &args); + err = args.err; + } else if (S_ISLNK(old_mode)) { + args.symlink.parent = new_hidden_parent_dentry->d_inode; + args.symlink.dentry = new_hidden_dentry; + args.symlink.symbuf = symbuf; + args.symlink.mode = old_mode; + + run_sioq(__unionfs_symlink, &args); + err = args.err; + } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) || + S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) { + args.mknod.parent = new_hidden_parent_dentry->d_inode; + args.mknod.dentry = new_hidden_dentry; + args.mknod.mode = old_mode; + args.mknod.dev = old_hidden_dentry->d_inode->i_rdev; + + run_sioq(__unionfs_mknod, &args); + err = args.err; + } else if (S_ISREG(old_mode)) { + args.create.parent = new_hidden_parent_dentry->d_inode; + args.create.dentry = new_hidden_dentry; + args.create.mode = old_mode; + args.create.nd = NULL; + + run_sioq(__unionfs_create, &args); + err = args.err; + } else { + printk(KERN_ERR "unionfs: unknown inode type %d\n", + old_mode); + BUG(); + } + + return err; +} + +static int __copyup_reg_data(struct dentry *dentry, + struct dentry *new_hidden_dentry, int new_bindex, + struct dentry *old_hidden_dentry, int old_bindex, + struct file **copyup_file, loff_t len) +{ + struct super_block *sb = dentry->d_sb; + struct file *input_file; + struct file *output_file; + mm_segment_t old_fs; + char *buf = NULL; + ssize_t read_bytes, write_bytes; + loff_t size; + int err = 0; + + /* open old file */ + unionfs_mntget(dentry, old_bindex); + branchget(sb, old_bindex); + input_file = dentry_open(old_hidden_dentry, + unionfs_lower_mnt_idx(dentry, old_bindex), + O_RDONLY | O_LARGEFILE); + if (IS_ERR(input_file)) { + dput(old_hidden_dentry); + err = PTR_ERR(input_file); + goto out; + } + if (!input_file->f_op || !input_file->f_op->read) { + err = -EINVAL; + goto out_close_in; + } + + /* open new file */ + dget(new_hidden_dentry); + unionfs_mntget(dentry, new_bindex); + branchget(sb, new_bindex); + output_file = dentry_open(new_hidden_dentry, + unionfs_lower_mnt_idx(dentry, new_bindex), + O_WRONLY | O_LARGEFILE); + if (IS_ERR(output_file)) { + err = PTR_ERR(output_file); + goto out_close_in2; + } + if (!output_file->f_op || !output_file->f_op->write) { + err = -EINVAL; + goto out_close_out; + } + + /* allocating a buffer */ + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto out_close_out; + } + + input_file->f_pos = 0; + output_file->f_pos = 0; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + + size = len; + err = 0; + do { + if (len >= PAGE_SIZE) + size = PAGE_SIZE; + else if ((len < PAGE_SIZE) && (len > 0)) + size = len; + + len -= PAGE_SIZE; + + read_bytes = + input_file->f_op->read(input_file, + (char __user *)buf, size, + &input_file->f_pos); + if (read_bytes <= 0) { + err = read_bytes; + break; + } + + write_bytes = + output_file->f_op->write(output_file, + (char __user *)buf, + read_bytes, + &output_file->f_pos); + if ((write_bytes < 0) || (write_bytes < read_bytes)) { + err = write_bytes; + break; + } + } while ((read_bytes > 0) && (len > 0)); + + set_fs(old_fs); + + kfree(buf); + + if (!err) + err = output_file->f_op->fsync(output_file, + new_hidden_dentry, 0); + + if (err) + goto out_close_out; + + if (copyup_file) { + *copyup_file = output_file; + goto out_close_in; + } + +out_close_out: + fput(output_file); + +out_close_in2: + branchput(sb, new_bindex); + +out_close_in: + fput(input_file); + +out: + branchput(sb, old_bindex); + + return err; +} + +/* + * dput the lower references for old and new dentry & clear a lower dentry + * pointer + */ +static void __clear(struct dentry *dentry, struct dentry *old_hidden_dentry, + int old_bstart, int old_bend, + struct dentry *new_hidden_dentry, int new_bindex) +{ + /* get rid of the hidden dentry and all its traces */ + unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL); + set_dbstart(dentry, old_bstart); + set_dbend(dentry, old_bend); + + dput(new_hidden_dentry); + dput(old_hidden_dentry); +} + +/* copy up a dentry to a file of specified name */ +static int copyup_named_dentry(struct inode *dir, struct dentry *dentry, + int bstart, int new_bindex, const char *name, + int namelen, struct file **copyup_file, + loff_t len) +{ + struct dentry *new_hidden_dentry; + struct dentry *old_hidden_dentry = NULL; + struct super_block *sb; + int err = 0; + int old_bindex; + int old_bstart; + int old_bend; + struct dentry *new_hidden_parent_dentry = NULL; + mm_segment_t oldfs; + char *symbuf = NULL; + + verify_locked(dentry); + + old_bindex = bstart; + old_bstart = dbstart(dentry); + old_bend = dbend(dentry); + + BUG_ON(new_bindex < 0); + BUG_ON(new_bindex >= old_bindex); + + sb = dir->i_sb; + + if ((err = is_robranch_super(sb, new_bindex))) + goto out; + + /* Create the directory structure above this dentry. */ + new_hidden_dentry = + create_parents_named(dir, dentry, name, new_bindex); + if (IS_ERR(new_hidden_dentry)) { + err = PTR_ERR(new_hidden_dentry); + goto out; + } + + old_hidden_dentry = unionfs_lower_dentry_idx(dentry, old_bindex); + /* we conditionally dput this old_hidden_dentry at end of function */ + dget(old_hidden_dentry); + + /* For symlinks, we must read the link before we lock the directory. */ + if (S_ISLNK(old_hidden_dentry->d_inode->i_mode)) { + + symbuf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!symbuf) { + __clear(dentry, old_hidden_dentry, + old_bstart, old_bend, + new_hidden_dentry, new_bindex); + err = -ENOMEM; + goto out_free; + } + + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = old_hidden_dentry->d_inode->i_op->readlink( + old_hidden_dentry, + (char __user *)symbuf, + PATH_MAX); + set_fs(oldfs); + if (err) { + __clear(dentry, old_hidden_dentry, + old_bstart, old_bend, + new_hidden_dentry, new_bindex); + goto out_free; + } + symbuf[err] = '\0'; + } + + /* Now we lock the parent, and create the object in the new branch. */ + new_hidden_parent_dentry = lock_parent(new_hidden_dentry); + + /* create the new inode */ + err = __copyup_ndentry(old_hidden_dentry, new_hidden_dentry, + new_hidden_parent_dentry, symbuf); + + if (err) { + __clear(dentry, old_hidden_dentry, + old_bstart, old_bend, + new_hidden_dentry, new_bindex); + goto out_unlock; + } + + /* We actually copyup the file here. */ + if (S_ISREG(old_hidden_dentry->d_inode->i_mode)) + err = __copyup_reg_data(dentry, new_hidden_dentry, new_bindex, + old_hidden_dentry, old_bindex, + copyup_file, len); + if (err) + goto out_unlink; + + /* Set permissions. */ + if ((err = copyup_permissions(sb, old_hidden_dentry, + new_hidden_dentry))) + goto out_unlink; + +#ifdef CONFIG_UNION_FS_XATTR + /* Selinux uses extended attributes for permissions. */ + if ((err = copyup_xattrs(old_hidden_dentry, new_hidden_dentry))) + goto out_unlink; +#endif + + /* do not allow files getting deleted to be re-interposed */ + if (!d_deleted(dentry)) + unionfs_reinterpose(dentry); + + goto out_unlock; + +out_unlink: + /* + * copyup failed, because we possibly ran out of space or + * quota, or something else happened so let's unlink; we don't + * really care about the return value of vfs_unlink + */ + vfs_unlink(new_hidden_parent_dentry->d_inode, new_hidden_dentry); + + if (copyup_file) { + /* need to close the file */ + + fput(*copyup_file); + branchput(sb, new_bindex); + } + + /* + * TODO: should we reset the error to something like -EIO? + * + * If we don't reset, the user may get some nonsensical errors, but + * on the other hand, if we reset to EIO, we guarantee that the user + * will get a "confusing" error message. + */ + +out_unlock: + unlock_dir(new_hidden_parent_dentry); + +out_free: + /* + * If old_hidden_dentry was a directory, we need to dput it. If it + * was a file, then it was already dput indirectly by other + * functions we call above which operate on regular files. + */ + if (old_hidden_dentry && old_hidden_dentry->d_inode && + S_ISDIR(old_hidden_dentry->d_inode->i_mode)) + dput(old_hidden_dentry); + kfree(symbuf); + +out: + return err; +} + +/* + * This function creates a copy of a file represented by 'file' which + * currently resides in branch 'bstart' to branch 'new_bindex.' The copy + * will be named "name". + */ +int copyup_named_file(struct inode *dir, struct file *file, char *name, + int bstart, int new_bindex, loff_t len) +{ + int err = 0; + struct file *output_file = NULL; + + err = copyup_named_dentry(dir, file->f_dentry, bstart, + new_bindex, name, strlen(name), &output_file, + len); + if (!err) { + fbstart(file) = new_bindex; + unionfs_set_lower_file_idx(file, new_bindex, output_file); + } + + return err; +} + +/* + * This function creates a copy of a file represented by 'file' which + * currently resides in branch 'bstart' to branch 'new_bindex'. + */ +int copyup_file(struct inode *dir, struct file *file, int bstart, + int new_bindex, loff_t len) +{ + int err = 0; + struct file *output_file = NULL; + + err = copyup_dentry(dir, file->f_dentry, bstart, new_bindex, + &output_file, len); + if (!err) { + fbstart(file) = new_bindex; + unionfs_set_lower_file_idx(file, new_bindex, output_file); + } + + return err; +} + +/* + * This function replicates the directory structure up-to given dentry in the + * bindex branch. Can create directory structure recursively to the right + * also. + */ +struct dentry *create_parents(struct inode *dir, struct dentry *dentry, + int bindex) +{ + return create_parents_named(dir, dentry, dentry->d_name.name, bindex); +} + +/* purge a dentry's lower-branch states (dput/mntput, etc.) */ +static void __cleanup_dentry(struct dentry *dentry, int bindex, + int old_bstart, int old_bend) +{ + int loop_start; + int loop_end; + int new_bstart = -1; + int new_bend = -1; + int i; + + loop_start = min(old_bstart, bindex); + loop_end = max(old_bend, bindex); + + /* + * This loop sets the bstart and bend for the new dentry by + * traversing from left to right. It also dputs all negative + * dentries except bindex + */ + for (i = loop_start; i <= loop_end; i++) { + if (!unionfs_lower_dentry_idx(dentry, i)) + continue; + + if (i == bindex) { + new_bend = i; + if (new_bstart < 0) + new_bstart = i; + continue; + } + + if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) { + dput(unionfs_lower_dentry_idx(dentry, i)); + unionfs_set_lower_dentry_idx(dentry, i, NULL); + + unionfs_mntput(dentry, i); + unionfs_set_lower_mnt_idx(dentry, i, NULL); + } else { + if (new_bstart < 0) + new_bstart = i; + new_bend = i; + } + } + + if (new_bstart < 0) + new_bstart = bindex; + if (new_bend < 0) + new_bend = bindex; + set_dbstart(dentry, new_bstart); + set_dbend(dentry, new_bend); + +} + +/* set lower inode ptr and update bstart & bend if necessary */ +static void __set_inode(struct dentry *upper, struct dentry *lower, + int bindex) +{ + unionfs_set_lower_inode_idx(upper->d_inode, bindex, + igrab(lower->d_inode)); + if (likely(ibstart(upper->d_inode) > bindex)) + ibstart(upper->d_inode) = bindex; + if (likely(ibend(upper->d_inode) < bindex)) + ibend(upper->d_inode) = bindex; + +} + +/* set lower dentry ptr and update bstart & bend if necessary */ +static void __set_dentry(struct dentry *upper, struct dentry *lower, + int bindex) +{ + unionfs_set_lower_dentry_idx(upper, bindex, lower); + if (likely(dbstart(upper) > bindex)) + set_dbstart(upper, bindex); + if (likely(dbend(upper) < bindex)) + set_dbend(upper, bindex); +} + +/* + * This function replicates the directory structure up-to given dentry + * in the bindex branch. + */ +static struct dentry *create_parents_named(struct inode *dir, + struct dentry *dentry, + const char *name, int bindex) +{ + int err; + struct dentry *child_dentry; + struct dentry *parent_dentry; + struct dentry *hidden_parent_dentry = NULL; + struct dentry *hidden_dentry = NULL; + const char *childname; + unsigned int childnamelen; + + int nr_dentry; + int count = 0; + + int old_bstart; + int old_bend; + struct dentry **path = NULL; + struct super_block *sb; + + verify_locked(dentry); + + if ((err = is_robranch_super(dir->i_sb, bindex))) { + hidden_dentry = ERR_PTR(err); + goto out; + } + + old_bstart = dbstart(dentry); + old_bend = dbend(dentry); + + hidden_dentry = ERR_PTR(-ENOMEM); + + /* There is no sense allocating any less than the minimum. */ + nr_dentry = 1; + path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL); + if (!path) + goto out; + + /* assume the negative dentry of unionfs as the parent dentry */ + parent_dentry = dentry; + + /* + * This loop finds the first parent that exists in the given branch. + * We start building the directory structure from there. At the end + * of the loop, the following should hold: + * - child_dentry is the first nonexistent child + * - parent_dentry is the first existent parent + * - path[0] is the = deepest child + * - path[count] is the first child to create + */ + do { + child_dentry = parent_dentry; + + /* find the parent directory dentry in unionfs */ + parent_dentry = child_dentry->d_parent; + unionfs_lock_dentry(parent_dentry); + + /* find out the hidden_parent_dentry in the given branch */ + hidden_parent_dentry = + unionfs_lower_dentry_idx(parent_dentry, bindex); + + /* grow path table */ + if (count == nr_dentry) { + void *p; + + nr_dentry *= 2; + p = krealloc(path, nr_dentry * sizeof(struct dentry *), GFP_KERNEL); + if (!p) { + hidden_dentry = ERR_PTR(-ENOMEM); + goto out; + } + path = p; + } + + /* store the child dentry */ + path[count++] = child_dentry; + } while (!hidden_parent_dentry); + count--; + + sb = dentry->d_sb; + + /* + * This is basically while(child_dentry != dentry). This loop is + * horrible to follow and should be replaced with cleaner code. + */ + while (1) { + /* get hidden parent dir in the current branch */ + hidden_parent_dentry = + unionfs_lower_dentry_idx(parent_dentry, bindex); + unionfs_unlock_dentry(parent_dentry); + + /* init the values to lookup */ + childname = child_dentry->d_name.name; + childnamelen = child_dentry->d_name.len; + + if (child_dentry != dentry) { + /* lookup child in the underlying file system */ + hidden_dentry = + lookup_one_len(childname, hidden_parent_dentry, + childnamelen); + if (IS_ERR(hidden_dentry)) + goto out; + } else { + + /* + * is the name a whiteout of the child name ? + * lookup the whiteout child in the underlying file + * system + */ + hidden_dentry = + lookup_one_len(name, hidden_parent_dentry, + strlen(name)); + if (IS_ERR(hidden_dentry)) + goto out; + + /* + * Replace the current dentry (if any) with the new + * one. + */ + dput(unionfs_lower_dentry_idx(dentry, bindex)); + unionfs_set_lower_dentry_idx(dentry, bindex, + hidden_dentry); + + __cleanup_dentry(dentry, bindex, old_bstart, old_bend); + break; + } + + if (hidden_dentry->d_inode) { + /* + * since this already exists we dput to avoid + * multiple references on the same dentry + */ + dput(hidden_dentry); + } else { + struct sioq_args args; + + /* its a negative dentry, create a new dir */ + hidden_parent_dentry = lock_parent(hidden_dentry); + + args.mkdir.parent = hidden_parent_dentry->d_inode; + args.mkdir.dentry = hidden_dentry; + args.mkdir.mode = child_dentry->d_inode->i_mode; + + run_sioq(__unionfs_mkdir, &args); + err = args.err; + + if (!err) + err = copyup_permissions(dir->i_sb, + child_dentry, + hidden_dentry); + unlock_dir(hidden_parent_dentry); + if (err) { + struct inode *inode = hidden_dentry->d_inode; + /* + * If we get here, it means that we created a new + * dentry+inode, but copying permissions failed. + * Therefore, we should delete this inode and dput + * the dentry so as not to leave cruft behind. + * + * XXX: call dentry_iput() instead, but then we have + * to export that symbol. + */ + if (hidden_dentry->d_op && hidden_dentry->d_op->d_iput) + hidden_dentry->d_op->d_iput(hidden_dentry, + inode); + else + iput(inode); + hidden_dentry->d_inode = NULL; + + dput(hidden_dentry); + hidden_dentry = ERR_PTR(err); + goto out; + } + + } + + __set_inode(child_dentry, hidden_dentry, bindex); + __set_dentry(child_dentry, hidden_dentry, bindex); + + parent_dentry = child_dentry; + child_dentry = path[--count]; + } +out: + /* cleanup any leftover locks from the do/while loop above */ + if (IS_ERR(hidden_dentry)) + while (count) + unionfs_unlock_dentry(path[count--]); + kfree(path); + return hidden_dentry; +} diff -Nurb linux-2.6.22-570/fs/unionfs/dentry.c linux-2.6.22-591/fs/unionfs/dentry.c --- linux-2.6.22-570/fs/unionfs/dentry.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/dentry.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * Revalidate a single dentry. + * Assume that dentry's info node is locked. + * Assume that parent(s) are all valid already, but + * the child may not yet be valid. + * Returns 1 if valid, 0 otherwise. + */ +static int __unionfs_d_revalidate_one(struct dentry *dentry, + struct nameidata *nd) +{ + int valid = 1; /* default is valid (1); invalid is 0. */ + struct dentry *hidden_dentry; + int bindex, bstart, bend; + int sbgen, dgen; + int positive = 0; + int locked = 0; + int interpose_flag; + + struct nameidata lowernd; /* TODO: be gentler to the stack */ + + if (nd) + memcpy(&lowernd, nd, sizeof(struct nameidata)); + else + memset(&lowernd, 0, sizeof(struct nameidata)); + + verify_locked(dentry); + + /* if the dentry is unhashed, do NOT revalidate */ + if (d_deleted(dentry)) { + printk(KERN_DEBUG "unionfs: unhashed dentry being " + "revalidated: %*s\n", + dentry->d_name.len, dentry->d_name.name); + goto out; + } + + BUG_ON(dbstart(dentry) == -1); + if (dentry->d_inode) + positive = 1; + dgen = atomic_read(&UNIONFS_D(dentry)->generation); + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); + /* + * If we are working on an unconnected dentry, then there is no + * revalidation to be done, because this file does not exist within + * the namespace, and Unionfs operates on the namespace, not data. + */ + if (sbgen != dgen) { + struct dentry *result; + int pdgen; + + /* The root entry should always be valid */ + BUG_ON(IS_ROOT(dentry)); + + /* We can't work correctly if our parent isn't valid. */ + pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation); + BUG_ON(pdgen != sbgen); /* should never happen here */ + + /* Free the pointers for our inodes and this dentry. */ + bstart = dbstart(dentry); + bend = dbend(dentry); + if (bstart >= 0) { + struct dentry *hidden_dentry; + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = + unionfs_lower_dentry_idx(dentry, + bindex); + dput(hidden_dentry); + } + } + set_dbstart(dentry, -1); + set_dbend(dentry, -1); + + interpose_flag = INTERPOSE_REVAL_NEG; + if (positive) { + interpose_flag = INTERPOSE_REVAL; + /* + * During BRM, the VFS could already hold a lock on + * a file being read, so don't lock it again + * (deadlock), but if you lock it in this function, + * then release it here too. + */ + if (!mutex_is_locked(&dentry->d_inode->i_mutex)) { + mutex_lock(&dentry->d_inode->i_mutex); + locked = 1; + } + + bstart = ibstart(dentry->d_inode); + bend = ibend(dentry->d_inode); + if (bstart >= 0) { + struct inode *hidden_inode; + for (bindex = bstart; bindex <= bend; + bindex++) { + hidden_inode = + unionfs_lower_inode_idx( + dentry->d_inode, + bindex); + iput(hidden_inode); + } + } + kfree(UNIONFS_I(dentry->d_inode)->lower_inodes); + UNIONFS_I(dentry->d_inode)->lower_inodes = NULL; + ibstart(dentry->d_inode) = -1; + ibend(dentry->d_inode) = -1; + if (locked) + mutex_unlock(&dentry->d_inode->i_mutex); + } + + result = unionfs_lookup_backend(dentry, &lowernd, + interpose_flag); + if (result) { + if (IS_ERR(result)) { + valid = 0; + goto out; + } + /* + * current unionfs_lookup_backend() doesn't return + * a valid dentry + */ + dput(dentry); + dentry = result; + } + + if (positive && UNIONFS_I(dentry->d_inode)->stale) { + make_bad_inode(dentry->d_inode); + d_drop(dentry); + valid = 0; + goto out; + } + goto out; + } + + /* The revalidation must occur across all branches */ + bstart = dbstart(dentry); + bend = dbend(dentry); + BUG_ON(bstart == -1); + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry || !hidden_dentry->d_op + || !hidden_dentry->d_op->d_revalidate) + continue; + if (!hidden_dentry->d_op->d_revalidate(hidden_dentry, + &lowernd)) + valid = 0; + } + + if (!dentry->d_inode) + valid = 0; + + if (valid) { + fsstack_copy_attr_all(dentry->d_inode, + unionfs_lower_inode(dentry->d_inode), + unionfs_get_nlinks); + fsstack_copy_inode_size(dentry->d_inode, + unionfs_lower_inode(dentry->d_inode)); + } + +out: + return valid; +} + +/* + * Revalidate a parent chain of dentries, then the actual node. + * Assumes that dentry is locked, but will lock all parents if/when needed. + */ +int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd) +{ + int valid = 0; /* default is invalid (0); valid is 1. */ + struct dentry **chain = NULL; /* chain of dentries to reval */ + int chain_len = 0; + struct dentry *dtmp; + int sbgen, dgen, i; + int saved_bstart, saved_bend, bindex; + + /* find length of chain needed to revalidate */ + /* XXX: should I grab some global (dcache?) lock? */ + chain_len = 0; + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); + dtmp = dentry->d_parent; + dgen = atomic_read(&UNIONFS_D(dtmp)->generation); + while (sbgen != dgen) { + /* The root entry should always be valid */ + BUG_ON(IS_ROOT(dtmp)); + chain_len++; + dtmp = dtmp->d_parent; + dgen = atomic_read(&UNIONFS_D(dtmp)->generation); + } + if (chain_len == 0) + goto out_this; /* shortcut if parents are OK */ + + /* + * Allocate array of dentries to reval. We could use linked lists, + * but the number of entries we need to alloc here is often small, + * and short lived, so locality will be better. + */ + chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL); + if (!chain) { + printk("unionfs: no more memory in %s\n", __FUNCTION__); + goto out; + } + + /* + * lock all dentries in chain, in child to parent order. + * if failed, then sleep for a little, then retry. + */ + dtmp = dentry->d_parent; + for (i=chain_len-1; i>=0; i--) { + chain[i] = dget(dtmp); + dtmp = dtmp->d_parent; + } + + /* + * call __unionfs_d_revalidate() on each dentry, but in parent to + * child order. + */ + for (i=0; id_sb)->generation); + dgen = atomic_read(&UNIONFS_D(chain[i])->generation); + + valid = __unionfs_d_revalidate_one(chain[i], nd); + /* XXX: is this the correct mntput condition?! */ + if (valid && chain_len > 0 && + sbgen != dgen && chain[i]->d_inode && + S_ISDIR(chain[i]->d_inode->i_mode)) { + for (bindex = saved_bstart; bindex <= saved_bend; + bindex++) + unionfs_mntput(chain[i], bindex); + } + unionfs_unlock_dentry(chain[i]); + + if (!valid) + goto out_free; + } + + +out_this: + /* finally, lock this dentry and revalidate it */ + verify_locked(dentry); + dgen = atomic_read(&UNIONFS_D(dentry)->generation); + valid = __unionfs_d_revalidate_one(dentry, nd); + + /* + * If __unionfs_d_revalidate_one() succeeded above, then it will + * have incremented the refcnt of the mnt's, but also the branch + * indices of the dentry will have been updated (to take into + * account any branch insertions/deletion. So the current + * dbstart/dbend match the current, and new, indices of the mnts + * which __unionfs_d_revalidate_one has incremented. Note: the "if" + * test below does not depend on whether chain_len was 0 or greater. + */ + if (valid && sbgen != dgen) + for (bindex = dbstart(dentry); + bindex <= dbend(dentry); + bindex++) + unionfs_mntput(dentry, bindex); + +out_free: + /* unlock/dput all dentries in chain and return status */ + if (chain_len > 0) { + for (i=0; id_sb); + + unionfs_lock_dentry(dentry); + err = __unionfs_d_revalidate_chain(dentry, nd); + unionfs_unlock_dentry(dentry); + + unionfs_read_unlock(dentry->d_sb); + + return err; +} + +/* + * At this point no one can reference this dentry, so we don't have to be + * careful about concurrent access. + */ +static void unionfs_d_release(struct dentry *dentry) +{ + int bindex, bstart, bend; + + unionfs_read_lock(dentry->d_sb); + + /* this could be a negative dentry, so check first */ + if (!UNIONFS_D(dentry)) { + printk(KERN_DEBUG "unionfs: dentry without private data: %.*s", + dentry->d_name.len, dentry->d_name.name); + goto out; + } else if (dbstart(dentry) < 0) { + /* this is due to a failed lookup */ + printk(KERN_DEBUG "unionfs: dentry without hidden " + "dentries: %.*s", + dentry->d_name.len, dentry->d_name.name); + goto out_free; + } + + /* Release all the hidden dentries */ + bstart = dbstart(dentry); + bend = dbend(dentry); + for (bindex = bstart; bindex <= bend; bindex++) { + dput(unionfs_lower_dentry_idx(dentry, bindex)); + unionfs_mntput(dentry, bindex); + + unionfs_set_lower_dentry_idx(dentry, bindex, NULL); + unionfs_set_lower_mnt_idx(dentry, bindex, NULL); + } + /* free private data (unionfs_dentry_info) here */ + kfree(UNIONFS_D(dentry)->lower_paths); + UNIONFS_D(dentry)->lower_paths = NULL; + +out_free: + /* No need to unlock it, because it is disappeared. */ + free_dentry_private_data(dentry); + +out: + unionfs_read_unlock(dentry->d_sb); + return; +} + +struct dentry_operations unionfs_dops = { + .d_revalidate = unionfs_d_revalidate, + .d_release = unionfs_d_release, +}; diff -Nurb linux-2.6.22-570/fs/unionfs/dirfops.c linux-2.6.22-591/fs/unionfs/dirfops.c --- linux-2.6.22-570/fs/unionfs/dirfops.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/dirfops.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* Make sure our rdstate is playing by the rules. */ +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate) +{ + BUG_ON(rdstate->offset >= DIREOF); + BUG_ON(rdstate->cookie >= MAXRDCOOKIE); +} + +struct unionfs_getdents_callback { + struct unionfs_dir_state *rdstate; + void *dirent; + int entries_written; + int filldir_called; + int filldir_error; + filldir_t filldir; + struct super_block *sb; +}; + +/* based on generic filldir in fs/readir.c */ +static int unionfs_filldir(void *dirent, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct unionfs_getdents_callback *buf = dirent; + struct filldir_node *found = NULL; + int err = 0; + int is_wh_entry = 0; + + buf->filldir_called++; + + if ((namelen > UNIONFS_WHLEN) && + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) { + name += UNIONFS_WHLEN; + namelen -= UNIONFS_WHLEN; + is_wh_entry = 1; + } + + found = find_filldir_node(buf->rdstate, name, namelen); + + if (found) + goto out; + + /* if 'name' isn't a whiteout, filldir it. */ + if (!is_wh_entry) { + off_t pos = rdstate2offset(buf->rdstate); + u64 unionfs_ino = ino; + + if (!err) { + err = buf->filldir(buf->dirent, name, namelen, pos, + unionfs_ino, d_type); + buf->rdstate->offset++; + verify_rdstate_offset(buf->rdstate); + } + } + /* + * If we did fill it, stuff it in our hash, otherwise return an + * error. + */ + if (err) { + buf->filldir_error = err; + goto out; + } + buf->entries_written++; + if ((err = add_filldir_node(buf->rdstate, name, namelen, + buf->rdstate->bindex, is_wh_entry))) + buf->filldir_error = err; + +out: + return err; +} + +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int err = 0; + struct file *hidden_file = NULL; + struct inode *inode = NULL; + struct unionfs_getdents_callback buf; + struct unionfs_dir_state *uds; + int bend; + loff_t offset; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 0))) + goto out; + + inode = file->f_dentry->d_inode; + + uds = UNIONFS_F(file)->rdstate; + if (!uds) { + if (file->f_pos == DIREOF) { + goto out; + } else if (file->f_pos > 0) { + uds = find_rdstate(inode, file->f_pos); + if (!uds) { + err = -ESTALE; + goto out; + } + UNIONFS_F(file)->rdstate = uds; + } else { + init_rdstate(file); + uds = UNIONFS_F(file)->rdstate; + } + } + bend = fbend(file); + + while (uds->bindex <= bend) { + hidden_file = unionfs_lower_file_idx(file, uds->bindex); + if (!hidden_file) { + uds->bindex++; + uds->dirpos = 0; + continue; + } + + /* prepare callback buffer */ + buf.filldir_called = 0; + buf.filldir_error = 0; + buf.entries_written = 0; + buf.dirent = dirent; + buf.filldir = filldir; + buf.rdstate = uds; + buf.sb = inode->i_sb; + + /* Read starting from where we last left off. */ + offset = vfs_llseek(hidden_file, uds->dirpos, SEEK_SET); + if (offset < 0) { + err = offset; + goto out; + } + err = vfs_readdir(hidden_file, unionfs_filldir, &buf); + + /* Save the position for when we continue. */ + offset = vfs_llseek(hidden_file, 0, SEEK_CUR); + if (offset < 0) { + err = offset; + goto out; + } + uds->dirpos = offset; + + /* Copy the atime. */ + fsstack_copy_attr_atime(inode, hidden_file->f_dentry->d_inode); + + if (err < 0) + goto out; + + if (buf.filldir_error) + break; + + if (!buf.entries_written) { + uds->bindex++; + uds->dirpos = 0; + } + } + + if (!buf.filldir_error && uds->bindex >= bend) { + /* Save the number of hash entries for next time. */ + UNIONFS_I(inode)->hashsize = uds->hashentries; + free_rdstate(uds); + UNIONFS_F(file)->rdstate = NULL; + file->f_pos = DIREOF; + } else + file->f_pos = rdstate2offset(uds); + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} + +/* + * This is not meant to be a generic repositioning function. If you do + * things that aren't supported, then we return EINVAL. + * + * What is allowed: + * (1) seeking to the same position that you are currently at + * This really has no effect, but returns where you are. + * (2) seeking to the beginning of the file + * This throws out all state, and lets you begin again. + */ +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + struct unionfs_dir_state *rdstate; + loff_t err; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 0))) + goto out; + + rdstate = UNIONFS_F(file)->rdstate; + + /* + * we let users seek to their current position, but not anywhere + * else. + */ + if (!offset) { + switch (origin) { + case SEEK_SET: + if (rdstate) { + free_rdstate(rdstate); + UNIONFS_F(file)->rdstate = NULL; + } + init_rdstate(file); + err = 0; + break; + case SEEK_CUR: + err = file->f_pos; + break; + case SEEK_END: + /* Unsupported, because we would break everything. */ + err = -EINVAL; + break; + } + } else { + switch (origin) { + case SEEK_SET: + if (rdstate) { + if (offset == rdstate2offset(rdstate)) + err = offset; + else if (file->f_pos == DIREOF) + err = DIREOF; + else + err = -EINVAL; + } else { + rdstate = find_rdstate(file->f_dentry->d_inode, + offset); + if (rdstate) { + UNIONFS_F(file)->rdstate = rdstate; + err = rdstate->offset; + } else + err = -EINVAL; + } + break; + case SEEK_CUR: + case SEEK_END: + /* Unsupported, because we would break everything. */ + err = -EINVAL; + break; + } + } + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} + +/* + * Trimmed directory options, we shouldn't pass everything down since + * we don't want to operate on partial directories. + */ +struct file_operations unionfs_dir_fops = { + .llseek = unionfs_dir_llseek, + .read = generic_read_dir, + .readdir = unionfs_readdir, + .unlocked_ioctl = unionfs_ioctl, + .open = unionfs_open, + .release = unionfs_file_release, + .flush = unionfs_flush, +}; diff -Nurb linux-2.6.22-570/fs/unionfs/dirhelper.c linux-2.6.22-591/fs/unionfs/dirhelper.c --- linux-2.6.22-570/fs/unionfs/dirhelper.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/dirhelper.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * Delete all of the whiteouts in a given directory for rmdir. + * + * hidden directory inode should be locked + */ +int do_delete_whiteouts(struct dentry *dentry, int bindex, + struct unionfs_dir_state *namelist) +{ + int err = 0; + struct dentry *hidden_dir_dentry = NULL; + struct dentry *hidden_dentry; + char *name = NULL, *p; + struct inode *hidden_dir; + + int i; + struct list_head *pos; + struct filldir_node *cursor; + + /* Find out hidden parent dentry */ + hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); + BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode)); + hidden_dir = hidden_dir_dentry->d_inode; + BUG_ON(!S_ISDIR(hidden_dir->i_mode)); + + err = -ENOMEM; + name = __getname(); + if (!name) + goto out; + strcpy(name, UNIONFS_WHPFX); + p = name + UNIONFS_WHLEN; + + err = 0; + for (i = 0; !err && i < namelist->size; i++) { + list_for_each(pos, &namelist->list[i]) { + cursor = + list_entry(pos, struct filldir_node, + file_list); + /* Only operate on whiteouts in this branch. */ + if (cursor->bindex != bindex) + continue; + if (!cursor->whiteout) + continue; + + strcpy(p, cursor->name); + hidden_dentry = + lookup_one_len(name, hidden_dir_dentry, + cursor->namelen + + UNIONFS_WHLEN); + if (IS_ERR(hidden_dentry)) { + err = PTR_ERR(hidden_dentry); + break; + } + if (hidden_dentry->d_inode) + err = vfs_unlink(hidden_dir, hidden_dentry); + dput(hidden_dentry); + if (err) + break; + } + } + + __putname(name); + + /* After all of the removals, we should copy the attributes once. */ + fsstack_copy_attr_times(dentry->d_inode, hidden_dir_dentry->d_inode); + +out: + return err; +} + +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */ +int delete_whiteouts(struct dentry *dentry, int bindex, + struct unionfs_dir_state *namelist) +{ + int err; + struct super_block *sb; + struct dentry *hidden_dir_dentry; + struct inode *hidden_dir; + + struct sioq_args args; + + sb = dentry->d_sb; + + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); + BUG_ON(bindex < dbstart(dentry)); + BUG_ON(bindex > dbend(dentry)); + err = is_robranch_super(sb, bindex); + if (err) + goto out; + + hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); + BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode)); + hidden_dir = hidden_dir_dentry->d_inode; + BUG_ON(!S_ISDIR(hidden_dir->i_mode)); + + mutex_lock(&hidden_dir->i_mutex); + if (!permission(hidden_dir, MAY_WRITE | MAY_EXEC, NULL)) + err = do_delete_whiteouts(dentry, bindex, namelist); + else { + args.deletewh.namelist = namelist; + args.deletewh.dentry = dentry; + args.deletewh.bindex = bindex; + run_sioq(__delete_whiteouts, &args); + err = args.err; + } + mutex_unlock(&hidden_dir->i_mutex); + +out: + return err; +} + +#define RD_NONE 0 +#define RD_CHECK_EMPTY 1 +/* The callback structure for check_empty. */ +struct unionfs_rdutil_callback { + int err; + int filldir_called; + struct unionfs_dir_state *rdstate; + int mode; +}; + +/* This filldir function makes sure only whiteouts exist within a directory. */ +static int readdir_util_callback(void *dirent, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) +{ + int err = 0; + struct unionfs_rdutil_callback *buf = dirent; + int whiteout = 0; + struct filldir_node *found; + + buf->filldir_called = 1; + + if (name[0] == '.' && (namelen == 1 || + (name[1] == '.' && namelen == 2))) + goto out; + + if (namelen > UNIONFS_WHLEN && + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) { + namelen -= UNIONFS_WHLEN; + name += UNIONFS_WHLEN; + whiteout = 1; + } + + found = find_filldir_node(buf->rdstate, name, namelen); + /* If it was found in the table there was a previous whiteout. */ + if (found) + goto out; + + /* + * if it wasn't found and isn't a whiteout, the directory isn't + * empty. + */ + err = -ENOTEMPTY; + if ((buf->mode == RD_CHECK_EMPTY) && !whiteout) + goto out; + + err = add_filldir_node(buf->rdstate, name, namelen, + buf->rdstate->bindex, whiteout); + +out: + buf->err = err; + return err; +} + +/* Is a directory logically empty? */ +int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist) +{ + int err = 0; + struct dentry *hidden_dentry = NULL; + struct super_block *sb; + struct file *hidden_file; + struct unionfs_rdutil_callback *buf = NULL; + int bindex, bstart, bend, bopaque; + + sb = dentry->d_sb; + + + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); + + if ((err = unionfs_partial_lookup(dentry))) + goto out; + + bstart = dbstart(dentry); + bend = dbend(dentry); + bopaque = dbopaque(dentry); + if (0 <= bopaque && bopaque < bend) + bend = bopaque; + + buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto out; + } + buf->err = 0; + buf->mode = RD_CHECK_EMPTY; + buf->rdstate = alloc_rdstate(dentry->d_inode, bstart); + if (!buf->rdstate) { + err = -ENOMEM; + goto out; + } + + /* Process the hidden directories with rdutil_callback as a filldir. */ + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + continue; + if (!hidden_dentry->d_inode) + continue; + if (!S_ISDIR(hidden_dentry->d_inode->i_mode)) + continue; + + dget(hidden_dentry); + unionfs_mntget(dentry, bindex); + branchget(sb, bindex); + hidden_file = + dentry_open(hidden_dentry, + unionfs_lower_mnt_idx(dentry, bindex), + O_RDONLY); + if (IS_ERR(hidden_file)) { + err = PTR_ERR(hidden_file); + dput(hidden_dentry); + branchput(sb, bindex); + goto out; + } + + do { + buf->filldir_called = 0; + buf->rdstate->bindex = bindex; + err = vfs_readdir(hidden_file, + readdir_util_callback, buf); + if (buf->err) + err = buf->err; + } while ((err >= 0) && buf->filldir_called); + + /* fput calls dput for hidden_dentry */ + fput(hidden_file); + branchput(sb, bindex); + + if (err < 0) + goto out; + } + +out: + if (buf) { + if (namelist && !err) + *namelist = buf->rdstate; + else if (buf->rdstate) + free_rdstate(buf->rdstate); + kfree(buf); + } + + + return err; +} diff -Nurb linux-2.6.22-570/fs/unionfs/fanout.h linux-2.6.22-591/fs/unionfs/fanout.h --- linux-2.6.22-570/fs/unionfs/fanout.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/fanout.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _FANOUT_H_ +#define _FANOUT_H_ + +/* + * Inode to private data + * + * Since we use containers and the struct inode is _inside_ the + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL + * inode pointer), return a valid non-NULL pointer. + */ +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode) +{ + return container_of(inode, struct unionfs_inode_info, vfs_inode); +} + +#define ibstart(ino) (UNIONFS_I(ino)->bstart) +#define ibend(ino) (UNIONFS_I(ino)->bend) + +/* Superblock to private data */ +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info) +#define sbstart(sb) 0 +#define sbend(sb) (UNIONFS_SB(sb)->bend) +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1) +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id) + +/* File to private Data */ +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data)) +#define fbstart(file) (UNIONFS_F(file)->bstart) +#define fbend(file) (UNIONFS_F(file)->bend) + +/* macros to manipulate branch IDs in stored in our superblock */ +static inline int branch_id(struct super_block *sb, int index) +{ + BUG_ON(!sb || index < 0); + return UNIONFS_SB(sb)->data[index].branch_id; +} + +static inline void set_branch_id(struct super_block *sb, int index, int val) +{ + BUG_ON(!sb || index < 0); + UNIONFS_SB(sb)->data[index].branch_id = val; +} + +static inline void new_branch_id(struct super_block *sb, int index) +{ + BUG_ON(!sb || index < 0); + set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id); +} + +/* + * Find new index of matching branch with an existing superblock a a known + * (possibly old) id. This is needed because branches could have been + * added/deleted causing the branchs of any open files to shift. + * + * @sb: the new superblock which may have new/different branch IDs + * @id: the old/existing id we're looking for + * Returns index of newly found branch (0 or greater), -1 otherwise. + */ +static inline int branch_id_to_idx(struct super_block *sb, int id) +{ + int i; + for (i = 0; i < sbmax(sb); i++) { + if (branch_id(sb, i) == id) + return i; + } + /* + * XXX: maybe we should BUG_ON if not found new branch index? + * (really that should never happen). + */ + printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id); + return -1; +} + +/* File to lower file. */ +static inline struct file *unionfs_lower_file(const struct file *f) +{ + BUG_ON(!f); + return UNIONFS_F(f)->lower_files[fbstart(f)]; +} + +static inline struct file *unionfs_lower_file_idx(const struct file *f, + int index) +{ + BUG_ON(!f || index < 0); + return UNIONFS_F(f)->lower_files[index]; +} + +static inline void unionfs_set_lower_file_idx(struct file *f, int index, + struct file *val) +{ + BUG_ON(!f || index < 0); + UNIONFS_F(f)->lower_files[index] = val; + /* save branch ID (may be redundant?) */ + UNIONFS_F(f)->saved_branch_ids[index] = + branch_id((f)->f_dentry->d_sb, index); +} + +static inline void unionfs_set_lower_file(struct file *f, struct file *val) +{ + BUG_ON(!f); + unionfs_set_lower_file_idx((f), fbstart(f), (val)); +} + +/* Inode to lower inode. */ +static inline struct inode *unionfs_lower_inode(const struct inode *i) +{ + BUG_ON(!i); + return UNIONFS_I(i)->lower_inodes[ibstart(i)]; +} + +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i, + int index) +{ + BUG_ON(!i || index < 0); + return UNIONFS_I(i)->lower_inodes[index]; +} + +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index, + struct inode *val) +{ + BUG_ON(!i || index < 0); + UNIONFS_I(i)->lower_inodes[index] = val; +} + +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val) +{ + BUG_ON(!i); + UNIONFS_I(i)->lower_inodes[ibstart(i)] = val; +} + +/* Superblock to lower superblock. */ +static inline struct super_block *unionfs_lower_super( + const struct super_block *sb) +{ + BUG_ON(!sb); + return UNIONFS_SB(sb)->data[sbstart(sb)].sb; +} + +static inline struct super_block *unionfs_lower_super_idx( + const struct super_block *sb, + int index) +{ + BUG_ON(!sb || index < 0); + return UNIONFS_SB(sb)->data[index].sb; +} + +static inline void unionfs_set_lower_super_idx(struct super_block *sb, + int index, + struct super_block *val) +{ + BUG_ON(!sb || index < 0); + UNIONFS_SB(sb)->data[index].sb = val; +} + +static inline void unionfs_set_lower_super(struct super_block *sb, + struct super_block *val) +{ + BUG_ON(!sb); + UNIONFS_SB(sb)->data[sbstart(sb)].sb = val; +} + +/* Branch count macros. */ +static inline int branch_count(const struct super_block *sb, int index) +{ + BUG_ON(!sb || index < 0); + return atomic_read(&UNIONFS_SB(sb)->data[index].open_files); +} + +static inline void set_branch_count(struct super_block *sb, int index, int val) +{ + BUG_ON(!sb || index < 0); + atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val); +} + +static inline void branchget(struct super_block *sb, int index) +{ + BUG_ON(!sb || index < 0); + atomic_inc(&UNIONFS_SB(sb)->data[index].open_files); +} + +static inline void branchput(struct super_block *sb, int index) +{ + BUG_ON(!sb || index < 0); + atomic_dec(&UNIONFS_SB(sb)->data[index].open_files); +} + +/* Dentry macros */ +static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent) +{ + BUG_ON(!dent); + return dent->d_fsdata; +} + +static inline int dbstart(const struct dentry *dent) +{ + BUG_ON(!dent); + return UNIONFS_D(dent)->bstart; +} + +static inline void set_dbstart(struct dentry *dent, int val) +{ + BUG_ON(!dent); + UNIONFS_D(dent)->bstart = val; +} + +static inline int dbend(const struct dentry *dent) +{ + BUG_ON(!dent); + return UNIONFS_D(dent)->bend; +} + +static inline void set_dbend(struct dentry *dent, int val) +{ + BUG_ON(!dent); + UNIONFS_D(dent)->bend = val; +} + +static inline int dbopaque(const struct dentry *dent) +{ + BUG_ON(!dent); + return UNIONFS_D(dent)->bopaque; +} + +static inline void set_dbopaque(struct dentry *dent, int val) +{ + BUG_ON(!dent); + UNIONFS_D(dent)->bopaque = val; +} + +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index, + struct dentry *val) +{ + BUG_ON(!dent || index < 0); + UNIONFS_D(dent)->lower_paths[index].dentry = val; +} + +static inline struct dentry *unionfs_lower_dentry_idx( + const struct dentry *dent, + int index) +{ + BUG_ON(!dent || index < 0); + return UNIONFS_D(dent)->lower_paths[index].dentry; +} + +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent) +{ + BUG_ON(!dent); + return unionfs_lower_dentry_idx(dent, dbstart(dent)); +} + +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index, + struct vfsmount *mnt) +{ + BUG_ON(!dent || index < 0); + UNIONFS_D(dent)->lower_paths[index].mnt = mnt; +} + +static inline struct vfsmount *unionfs_lower_mnt_idx( + const struct dentry *dent, + int index) +{ + BUG_ON(!dent || index < 0); + return UNIONFS_D(dent)->lower_paths[index].mnt; +} + +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent) +{ + BUG_ON(!dent); + return unionfs_lower_mnt_idx(dent, dbstart(dent)); +} + +/* Macros for locking a dentry. */ +static inline void unionfs_lock_dentry(struct dentry *d) +{ + BUG_ON(!d); + mutex_lock(&UNIONFS_D(d)->lock); +} + +static inline void unionfs_unlock_dentry(struct dentry *d) +{ + BUG_ON(!d); + mutex_unlock(&UNIONFS_D(d)->lock); +} + +static inline void verify_locked(struct dentry *d) +{ + BUG_ON(!d); + BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock)); +} + +#endif /* _FANOUT_H */ diff -Nurb linux-2.6.22-570/fs/unionfs/file.c linux-2.6.22-591/fs/unionfs/file.c --- linux-2.6.22-570/fs/unionfs/file.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/file.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/******************* + * File Operations * + *******************/ + +static ssize_t unionfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int err; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 0))) + goto out; + + err = do_sync_read(file, buf, count, ppos); + + if (err >= 0) + touch_atime(unionfs_lower_mnt(file->f_path.dentry), + unionfs_lower_dentry(file->f_path.dentry)); + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} + +static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + int err = 0; + struct file *file = iocb->ki_filp; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 0))) + goto out; + + err = generic_file_aio_read(iocb, iov, nr_segs, pos); + + if (err == -EIOCBQUEUED) + err = wait_on_sync_kiocb(iocb); + + if (err >= 0) + touch_atime(unionfs_lower_mnt(file->f_path.dentry), + unionfs_lower_dentry(file->f_path.dentry)); + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} +static ssize_t unionfs_write(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + int err = 0; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 1))) + goto out; + + err = do_sync_write(file, buf, count, ppos); + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} + +static int unionfs_file_readdir(struct file *file, void *dirent, + filldir_t filldir) +{ + return -ENOTDIR; +} + +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err = 0; + int willwrite; + struct file *lower_file; + + unionfs_read_lock(file->f_path.dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 1))) + goto out; + + /* This might be deferred to mmap's writepage */ + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); + if ((err = unionfs_file_revalidate(file, willwrite))) + goto out; + + /* + * File systems which do not implement ->writepage may use + * generic_file_readonly_mmap as their ->mmap op. If you call + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. + * But we cannot call the lower ->mmap op, so we can't tell that + * writeable mappings won't work. Therefore, our only choice is to + * check if the lower file system supports the ->writepage, and if + * not, return EINVAL (the same error that + * generic_file_readonly_mmap returns in that case). + */ + lower_file = unionfs_lower_file(file); + if (willwrite && !lower_file->f_mapping->a_ops->writepage) { + err = -EINVAL; + printk("unionfs: branch %d file system does not support " + "writeable mmap\n", fbstart(file)); + } else { + err = generic_file_mmap(file, vma); + if (err) + printk("unionfs: generic_file_mmap failed %d\n", err); + } + +out: + unionfs_read_unlock(file->f_path.dentry->d_sb); + return err; +} + +struct file_operations unionfs_main_fops = { + .llseek = generic_file_llseek, + .read = unionfs_read, + .aio_read = unionfs_aio_read, + .write = unionfs_write, + .aio_write = generic_file_aio_write, + .readdir = unionfs_file_readdir, + .unlocked_ioctl = unionfs_ioctl, + .mmap = unionfs_mmap, + .open = unionfs_open, + .flush = unionfs_flush, + .release = unionfs_file_release, + .fsync = file_fsync, + .sendfile = generic_file_sendfile, +}; diff -Nurb linux-2.6.22-570/fs/unionfs/inode.c linux-2.6.22-591/fs/unionfs/inode.c --- linux-2.6.22-570/fs/unionfs/inode.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/inode.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,1138 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +static int unionfs_create(struct inode *parent, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + int err = 0; + struct dentry *hidden_dentry = NULL; + struct dentry *wh_dentry = NULL; + struct dentry *new_hidden_dentry; + struct dentry *hidden_parent_dentry = NULL; + int bindex = 0, bstart; + char *name = NULL; + int valid = 0; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + unionfs_lock_dentry(dentry->d_parent); + valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd); + unionfs_unlock_dentry(dentry->d_parent); + if (!valid) { + err = -ESTALE; /* same as what real_lookup does */ + goto out; + } + valid = __unionfs_d_revalidate_chain(dentry, nd); + /* + * It's only a bug if this dentry was not negative and couldn't be + * revalidated (shouldn't happen). + */ + BUG_ON(!valid && dentry->d_inode); + + /* We start out in the leftmost branch. */ + bstart = dbstart(dentry); + hidden_dentry = unionfs_lower_dentry(dentry); + + /* + * check if whiteout exists in this branch, i.e. lookup .wh.foo + * first. + */ + name = alloc_whname(dentry->d_name.name, dentry->d_name.len); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + + wh_dentry = lookup_one_len(name, hidden_dentry->d_parent, + dentry->d_name.len + UNIONFS_WHLEN); + if (IS_ERR(wh_dentry)) { + err = PTR_ERR(wh_dentry); + wh_dentry = NULL; + goto out; + } + + if (wh_dentry->d_inode) { + /* + * .wh.foo has been found. + * First truncate it and then rename it to foo (hence having + * the same overall effect as a normal create. + */ + struct dentry *hidden_dir_dentry; + struct iattr newattrs; + + mutex_lock(&wh_dentry->d_inode->i_mutex); + newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_ATIME + | ATTR_MTIME | ATTR_UID | ATTR_GID | ATTR_FORCE + | ATTR_KILL_SUID | ATTR_KILL_SGID; + + newattrs.ia_mode = mode & ~current->fs->umask; + newattrs.ia_uid = current->fsuid; + newattrs.ia_gid = current->fsgid; + + if (wh_dentry->d_inode->i_size != 0) { + newattrs.ia_valid |= ATTR_SIZE; + newattrs.ia_size = 0; + } + + err = notify_change(wh_dentry, &newattrs); + + mutex_unlock(&wh_dentry->d_inode->i_mutex); + + if (err) + printk(KERN_WARNING "unionfs: %s:%d: notify_change " + "failed: %d, ignoring..\n", + __FILE__, __LINE__, err); + + new_hidden_dentry = unionfs_lower_dentry(dentry); + dget(new_hidden_dentry); + + hidden_dir_dentry = dget_parent(wh_dentry); + lock_rename(hidden_dir_dentry, hidden_dir_dentry); + + if (!(err = is_robranch_super(dentry->d_sb, bstart))) { + err = vfs_rename(hidden_dir_dentry->d_inode, + wh_dentry, + hidden_dir_dentry->d_inode, + new_hidden_dentry); + } + if (!err) { + fsstack_copy_attr_times(parent, + new_hidden_dentry->d_parent-> + d_inode); + fsstack_copy_inode_size(parent, + new_hidden_dentry->d_parent-> + d_inode); + parent->i_nlink = unionfs_get_nlinks(parent); + } + + unlock_rename(hidden_dir_dentry, hidden_dir_dentry); + dput(hidden_dir_dentry); + + dput(new_hidden_dentry); + + if (err) { + /* exit if the error returned was NOT -EROFS */ + if (!IS_COPYUP_ERR(err)) + goto out; + /* + * We were not able to create the file in this + * branch, so, we try to create it in one branch to + * left + */ + bstart--; + } else { + /* + * reset the unionfs dentry to point to the .wh.foo + * entry. + */ + + /* Discard any old reference. */ + dput(unionfs_lower_dentry(dentry)); + + /* Trade one reference to another. */ + unionfs_set_lower_dentry_idx(dentry, bstart, + wh_dentry); + wh_dentry = NULL; + + err = unionfs_interpose(dentry, parent->i_sb, 0); + goto out; + } + } + + for (bindex = bstart; bindex >= 0; bindex--) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) { + /* + * if hidden_dentry is NULL, create the entire + * dentry directory structure in branch 'bindex'. + * hidden_dentry will NOT be null when bindex == bstart + * because lookup passed as a negative unionfs dentry + * pointing to a lone negative underlying dentry. + */ + hidden_dentry = create_parents(parent, dentry, bindex); + if (!hidden_dentry || IS_ERR(hidden_dentry)) { + if (IS_ERR(hidden_dentry)) + err = PTR_ERR(hidden_dentry); + continue; + } + } + + hidden_parent_dentry = lock_parent(hidden_dentry); + if (IS_ERR(hidden_parent_dentry)) { + err = PTR_ERR(hidden_parent_dentry); + goto out; + } + /* We shouldn't create things in a read-only branch. */ + if (!(err = is_robranch_super(dentry->d_sb, bindex))) + err = vfs_create(hidden_parent_dentry->d_inode, + hidden_dentry, mode, nd); + + if (err || !hidden_dentry->d_inode) { + unlock_dir(hidden_parent_dentry); + + /* break out of for loop if the error wasn't -EROFS */ + if (!IS_COPYUP_ERR(err)) + break; + } else { + err = unionfs_interpose(dentry, parent->i_sb, 0); + if (!err) { + fsstack_copy_attr_times(parent, + hidden_parent_dentry-> + d_inode); + fsstack_copy_inode_size(parent, + hidden_parent_dentry-> + d_inode); + /* update no. of links on parent directory */ + parent->i_nlink = unionfs_get_nlinks(parent); + } + unlock_dir(hidden_parent_dentry); + break; + } + } + +out: + dput(wh_dentry); + kfree(name); + + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +static struct dentry *unionfs_lookup(struct inode *parent, + struct dentry *dentry, + struct nameidata *nd) +{ + struct path path_save; + struct dentry *ret; + + unionfs_read_lock(dentry->d_sb); + + /* save the dentry & vfsmnt from namei */ + if (nd) { + path_save.dentry = nd->dentry; + path_save.mnt = nd->mnt; + } + + /* The locking is done by unionfs_lookup_backend. */ + ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP); + + /* restore the dentry & vfsmnt in namei */ + if (nd) { + nd->dentry = path_save.dentry; + nd->mnt = path_save.mnt; + } + + unionfs_read_unlock(dentry->d_sb); + + return ret; +} + +static int unionfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + int err = 0; + struct dentry *hidden_old_dentry = NULL; + struct dentry *hidden_new_dentry = NULL; + struct dentry *hidden_dir_dentry = NULL; + struct dentry *whiteout_dentry; + char *name = NULL; + + unionfs_read_lock(old_dentry->d_sb); + unionfs_double_lock_dentry(new_dentry, old_dentry); + + if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) { + err = -ESTALE; + goto out; + } + if (new_dentry->d_inode && + !__unionfs_d_revalidate_chain(new_dentry, NULL)) { + err = -ESTALE; + goto out; + } + + hidden_new_dentry = unionfs_lower_dentry(new_dentry); + + /* + * check if whiteout exists in the branch of new dentry, i.e. lookup + * .wh.foo first. If present, delete it + */ + name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + + whiteout_dentry = lookup_one_len(name, hidden_new_dentry->d_parent, + new_dentry->d_name.len + + UNIONFS_WHLEN); + if (IS_ERR(whiteout_dentry)) { + err = PTR_ERR(whiteout_dentry); + goto out; + } + + if (!whiteout_dentry->d_inode) { + dput(whiteout_dentry); + whiteout_dentry = NULL; + } else { + /* found a .wh.foo entry, unlink it and then call vfs_link() */ + hidden_dir_dentry = lock_parent(whiteout_dentry); + err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry)); + if (!err) + err = vfs_unlink(hidden_dir_dentry->d_inode, + whiteout_dentry); + + fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); + dir->i_nlink = unionfs_get_nlinks(dir); + unlock_dir(hidden_dir_dentry); + hidden_dir_dentry = NULL; + dput(whiteout_dentry); + if (err) + goto out; + } + + if (dbstart(old_dentry) != dbstart(new_dentry)) { + hidden_new_dentry = + create_parents(dir, new_dentry, dbstart(old_dentry)); + err = PTR_ERR(hidden_new_dentry); + if (IS_COPYUP_ERR(err)) + goto docopyup; + if (!hidden_new_dentry || IS_ERR(hidden_new_dentry)) + goto out; + } + hidden_new_dentry = unionfs_lower_dentry(new_dentry); + hidden_old_dentry = unionfs_lower_dentry(old_dentry); + + BUG_ON(dbstart(old_dentry) != dbstart(new_dentry)); + hidden_dir_dentry = lock_parent(hidden_new_dentry); + if (!(err = is_robranch(old_dentry))) + err = vfs_link(hidden_old_dentry, hidden_dir_dentry->d_inode, + hidden_new_dentry); + unlock_dir(hidden_dir_dentry); + +docopyup: + if (IS_COPYUP_ERR(err)) { + int old_bstart = dbstart(old_dentry); + int bindex; + + for (bindex = old_bstart - 1; bindex >= 0; bindex--) { + err = copyup_dentry(old_dentry->d_parent->d_inode, + old_dentry, old_bstart, + bindex, NULL, + old_dentry->d_inode->i_size); + if (!err) { + hidden_new_dentry = + create_parents(dir, new_dentry, + bindex); + hidden_old_dentry = + unionfs_lower_dentry(old_dentry); + hidden_dir_dentry = + lock_parent(hidden_new_dentry); + /* do vfs_link */ + err = vfs_link(hidden_old_dentry, + hidden_dir_dentry->d_inode, + hidden_new_dentry); + unlock_dir(hidden_dir_dentry); + goto check_link; + } + } + goto out; + } + +check_link: + if (err || !hidden_new_dentry->d_inode) + goto out; + + /* Its a hard link, so use the same inode */ + new_dentry->d_inode = igrab(old_dentry->d_inode); + d_instantiate(new_dentry, new_dentry->d_inode); + fsstack_copy_attr_all(dir, hidden_new_dentry->d_parent->d_inode, + unionfs_get_nlinks); + fsstack_copy_inode_size(dir, hidden_new_dentry->d_parent->d_inode); + + /* propagate number of hard-links */ + old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode); + +out: + if (!new_dentry->d_inode) + d_drop(new_dentry); + + kfree(name); + + unionfs_unlock_dentry(new_dentry); + unionfs_unlock_dentry(old_dentry); + + unionfs_read_unlock(old_dentry->d_sb); + + return err; +} + +static int unionfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int err = 0; + struct dentry *hidden_dentry = NULL; + struct dentry *whiteout_dentry = NULL; + struct dentry *hidden_dir_dentry = NULL; + umode_t mode; + int bindex = 0, bstart; + char *name = NULL; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (dentry->d_inode && + !__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + /* We start out in the leftmost branch. */ + bstart = dbstart(dentry); + + hidden_dentry = unionfs_lower_dentry(dentry); + + /* + * check if whiteout exists in this branch, i.e. lookup .wh.foo + * first. If present, delete it + */ + name = alloc_whname(dentry->d_name.name, dentry->d_name.len); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + + whiteout_dentry = + lookup_one_len(name, hidden_dentry->d_parent, + dentry->d_name.len + UNIONFS_WHLEN); + if (IS_ERR(whiteout_dentry)) { + err = PTR_ERR(whiteout_dentry); + goto out; + } + + if (!whiteout_dentry->d_inode) { + dput(whiteout_dentry); + whiteout_dentry = NULL; + } else { + /* + * found a .wh.foo entry, unlink it and then call + * vfs_symlink(). + */ + hidden_dir_dentry = lock_parent(whiteout_dentry); + + if (!(err = is_robranch_super(dentry->d_sb, bstart))) + err = vfs_unlink(hidden_dir_dentry->d_inode, + whiteout_dentry); + dput(whiteout_dentry); + + fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); + /* propagate number of hard-links */ + dir->i_nlink = unionfs_get_nlinks(dir); + + unlock_dir(hidden_dir_dentry); + + if (err) { + /* exit if the error returned was NOT -EROFS */ + if (!IS_COPYUP_ERR(err)) + goto out; + /* + * should now try to create symlink in the another + * branch. + */ + bstart--; + } + } + + /* + * deleted whiteout if it was present, now do a normal vfs_symlink() + * with possible recursive directory creation + */ + for (bindex = bstart; bindex >= 0; bindex--) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) { + /* + * if hidden_dentry is NULL, create the entire + * dentry directory structure in branch 'bindex'. + * hidden_dentry will NOT be null when bindex == + * bstart because lookup passed as a negative + * unionfs dentry pointing to a lone negative + * underlying dentry + */ + hidden_dentry = create_parents(dir, dentry, bindex); + if (!hidden_dentry || IS_ERR(hidden_dentry)) { + if (IS_ERR(hidden_dentry)) + err = PTR_ERR(hidden_dentry); + + printk(KERN_DEBUG "unionfs: hidden dentry " + "NULL (or error) for bindex = %d\n", + bindex); + continue; + } + } + + hidden_dir_dentry = lock_parent(hidden_dentry); + + if (!(err = is_robranch_super(dentry->d_sb, bindex))) { + mode = S_IALLUGO; + err = + vfs_symlink(hidden_dir_dentry->d_inode, + hidden_dentry, symname, mode); + } + unlock_dir(hidden_dir_dentry); + + if (err || !hidden_dentry->d_inode) { + /* + * break out of for loop if error returned was NOT + * -EROFS. + */ + if (!IS_COPYUP_ERR(err)) + break; + } else { + err = unionfs_interpose(dentry, dir->i_sb, 0); + if (!err) { + fsstack_copy_attr_times(dir, + hidden_dir_dentry-> + d_inode); + fsstack_copy_inode_size(dir, + hidden_dir_dentry-> + d_inode); + /* + * update number of links on parent + * directory. + */ + dir->i_nlink = unionfs_get_nlinks(dir); + } + break; + } + } + +out: + if (!dentry->d_inode) + d_drop(dentry); + + kfree(name); + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode) +{ + int err = 0; + struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL; + struct dentry *hidden_parent_dentry = NULL; + int bindex = 0, bstart; + char *name = NULL; + int whiteout_unlinked = 0; + struct sioq_args args; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (dentry->d_inode && + !__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + bstart = dbstart(dentry); + + hidden_dentry = unionfs_lower_dentry(dentry); + + /* + * check if whiteout exists in this branch, i.e. lookup .wh.foo + * first. + */ + name = alloc_whname(dentry->d_name.name, dentry->d_name.len); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + + whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent, + dentry->d_name.len + UNIONFS_WHLEN); + if (IS_ERR(whiteout_dentry)) { + err = PTR_ERR(whiteout_dentry); + goto out; + } + + if (!whiteout_dentry->d_inode) { + dput(whiteout_dentry); + whiteout_dentry = NULL; + } else { + hidden_parent_dentry = lock_parent(whiteout_dentry); + + /* found a.wh.foo entry, remove it then do vfs_mkdir */ + if (!(err = is_robranch_super(dentry->d_sb, bstart))) { + args.unlink.parent = hidden_parent_dentry->d_inode; + args.unlink.dentry = whiteout_dentry; + run_sioq(__unionfs_unlink, &args); + err = args.err; + } + dput(whiteout_dentry); + + unlock_dir(hidden_parent_dentry); + + if (err) { + /* exit if the error returned was NOT -EROFS */ + if (!IS_COPYUP_ERR(err)) + goto out; + bstart--; + } else + whiteout_unlinked = 1; + } + + for (bindex = bstart; bindex >= 0; bindex--) { + int i; + int bend = dbend(dentry); + + if (is_robranch_super(dentry->d_sb, bindex)) + continue; + + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) { + hidden_dentry = create_parents(parent, dentry, bindex); + if (!hidden_dentry || IS_ERR(hidden_dentry)) { + printk(KERN_DEBUG "unionfs: hidden dentry " + " NULL for bindex = %d\n", bindex); + continue; + } + } + + hidden_parent_dentry = lock_parent(hidden_dentry); + + if (IS_ERR(hidden_parent_dentry)) { + err = PTR_ERR(hidden_parent_dentry); + goto out; + } + + err = vfs_mkdir(hidden_parent_dentry->d_inode, hidden_dentry, + mode); + + unlock_dir(hidden_parent_dentry); + + /* did the mkdir succeed? */ + if (err) + break; + + for (i = bindex + 1; i < bend; i++) { + if (unionfs_lower_dentry_idx(dentry, i)) { + dput(unionfs_lower_dentry_idx(dentry, i)); + unionfs_set_lower_dentry_idx(dentry, i, NULL); + } + } + set_dbend(dentry, bindex); + + err = unionfs_interpose(dentry, parent->i_sb, 0); + if (!err) { + fsstack_copy_attr_times(parent, + hidden_parent_dentry->d_inode); + fsstack_copy_inode_size(parent, + hidden_parent_dentry->d_inode); + + /* update number of links on parent directory */ + parent->i_nlink = unionfs_get_nlinks(parent); + } + + err = make_dir_opaque(dentry, dbstart(dentry)); + if (err) { + printk(KERN_ERR "unionfs: mkdir: error creating " + ".wh.__dir_opaque: %d\n", err); + goto out; + } + + /* we are done! */ + break; + } + +out: + if (!dentry->d_inode) + d_drop(dentry); + + kfree(name); + + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t dev) +{ + int err = 0; + struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL; + struct dentry *hidden_parent_dentry = NULL; + int bindex = 0, bstart; + char *name = NULL; + int whiteout_unlinked = 0; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (dentry->d_inode && + !__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + bstart = dbstart(dentry); + + hidden_dentry = unionfs_lower_dentry(dentry); + + /* + * check if whiteout exists in this branch, i.e. lookup .wh.foo + * first. + */ + name = alloc_whname(dentry->d_name.name, dentry->d_name.len); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + + whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent, + dentry->d_name.len + UNIONFS_WHLEN); + if (IS_ERR(whiteout_dentry)) { + err = PTR_ERR(whiteout_dentry); + goto out; + } + + if (!whiteout_dentry->d_inode) { + dput(whiteout_dentry); + whiteout_dentry = NULL; + } else { + /* found .wh.foo, unlink it */ + hidden_parent_dentry = lock_parent(whiteout_dentry); + + /* found a.wh.foo entry, remove it then do vfs_mkdir */ + if (!(err = is_robranch_super(dentry->d_sb, bstart))) + err = vfs_unlink(hidden_parent_dentry->d_inode, + whiteout_dentry); + dput(whiteout_dentry); + + unlock_dir(hidden_parent_dentry); + + if (err) { + if (!IS_COPYUP_ERR(err)) + goto out; + + bstart--; + } else + whiteout_unlinked = 1; + } + + for (bindex = bstart; bindex >= 0; bindex--) { + if (is_robranch_super(dentry->d_sb, bindex)) + continue; + + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) { + hidden_dentry = create_parents(dir, dentry, bindex); + if (IS_ERR(hidden_dentry)) { + printk(KERN_DEBUG "unionfs: failed to create " + "parents on %d, err = %ld\n", + bindex, PTR_ERR(hidden_dentry)); + continue; + } + } + + hidden_parent_dentry = lock_parent(hidden_dentry); + if (IS_ERR(hidden_parent_dentry)) { + err = PTR_ERR(hidden_parent_dentry); + goto out; + } + + err = vfs_mknod(hidden_parent_dentry->d_inode, + hidden_dentry, mode, dev); + + if (err) { + unlock_dir(hidden_parent_dentry); + break; + } + + err = unionfs_interpose(dentry, dir->i_sb, 0); + if (!err) { + fsstack_copy_attr_times(dir, + hidden_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, + hidden_parent_dentry->d_inode); + /* update number of links on parent directory */ + dir->i_nlink = unionfs_get_nlinks(dir); + } + unlock_dir(hidden_parent_dentry); + + break; + } + +out: + if (!dentry->d_inode) + d_drop(dentry); + + kfree(name); + + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +static int unionfs_readlink(struct dentry *dentry, char __user *buf, + int bufsiz) +{ + int err; + struct dentry *hidden_dentry; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + hidden_dentry = unionfs_lower_dentry(dentry); + + if (!hidden_dentry->d_inode->i_op || + !hidden_dentry->d_inode->i_op->readlink) { + err = -EINVAL; + goto out; + } + + err = hidden_dentry->d_inode->i_op->readlink(hidden_dentry, + buf, bufsiz); + if (err > 0) + fsstack_copy_attr_atime(dentry->d_inode, + hidden_dentry->d_inode); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +/* + * Check if dentry is valid or not, as per our generation numbers. + * @dentry: dentry to check. + * Returns 1 (valid) or 0 (invalid/stale). + */ +static inline int is_valid_dentry(struct dentry *dentry) +{ + BUG_ON(!UNIONFS_D(dentry)); + BUG_ON(!UNIONFS_SB(dentry->d_sb)); + return (atomic_read(&UNIONFS_D(dentry)->generation) == + atomic_read(&UNIONFS_SB(dentry->d_sb)->generation)); +} + +/* We don't lock the dentry here, because readlink does the heavy lifting. */ +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *buf; + int len = PAGE_SIZE, err; + mm_segment_t old_fs; + + /* + * FIXME: Really nasty...we can get called from two distinct places: + * 1) read_link - locks the dentry + * 2) VFS lookup code - does NOT lock the dentry + * + * The proper thing would be to call dentry revalidate. It however + * expects a locked dentry, and we can't cleanly guarantee that. + */ + BUG_ON(!is_valid_dentry(dentry)); + + unionfs_read_lock(dentry->d_sb); + + /* This is freed by the put_link method assuming a successful call. */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto out; + } + + /* read the symlink, and then we will follow it */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); + set_fs(old_fs); + if (err < 0) { + kfree(buf); + buf = NULL; + goto out; + } + buf[err] = 0; + nd_set_link(nd, buf); + err = 0; + +out: + unionfs_read_unlock(dentry->d_sb); + return ERR_PTR(err); +} + +/* FIXME: We may not have to lock here */ +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + unionfs_read_lock(dentry->d_sb); + kfree(nd_get_link(nd)); + unionfs_read_unlock(dentry->d_sb); +} + +/* + * Basically copied from the kernel vfs permission(), but we've changed + * the following: + * (1) the IS_RDONLY check is skipped, and + * (2) if you set the mount option `mode=nfsro', we assume that -EACCES + * means that the export is read-only and we should check standard Unix + * permissions. This means that NFS ACL checks (or other advanced + * permission features) are bypassed. Note however, that we do call + * security_inode_permission, and therefore security inside SELinux, etc. + * are performed. + */ +static int inode_permission(struct inode *inode, int mask, + struct nameidata *nd, int bindex) +{ + int retval, submask; + + if (mask & MAY_WRITE) { + /* The first branch is allowed to be really readonly. */ + if (bindex == 0) { + umode_t mode = inode->i_mode; + if (IS_RDONLY(inode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; + } + /* + * Nobody gets write access to an immutable file. + */ + if (IS_IMMUTABLE(inode)) + return -EACCES; + } + + /* Ordinary permission routines do not understand MAY_APPEND. */ + submask = mask & ~MAY_APPEND; + if (inode->i_op && inode->i_op->permission) { + retval = inode->i_op->permission(inode, submask, nd); + if ((retval == -EACCES) && (submask & MAY_WRITE) && + (!strcmp("nfs", (inode)->i_sb->s_type->name)) && + (nd) && (nd->mnt) && (nd->mnt->mnt_sb)) { + int perms; + perms = branchperms(nd->mnt->mnt_sb, bindex); + if (perms & MAY_NFSRO) + retval = generic_permission(inode, submask, + NULL); + } + } else + retval = generic_permission(inode, submask, NULL); + + if (retval && retval != -EROFS) /* ignore EROFS */ + return retval; + + retval = security_inode_permission(inode, mask, nd); + return ((retval == -EROFS) ? 0 : retval); /* ignore EROFS */ +} + +static int unionfs_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct inode *hidden_inode = NULL; + int err = 0; + int bindex, bstart, bend; + const int is_file = !S_ISDIR(inode->i_mode); + const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ); + + unionfs_read_lock(inode->i_sb); + + bstart = ibstart(inode); + bend = ibend(inode); + if (bstart < 0 || bend < 0) { + /* + * With branch-management, we can get a stale inode here. + * If so, we return ESTALE back to link_path_walk, which + * would discard the dcache entry and re-lookup the + * dentry+inode. This should be equivalent to issuing + * __unionfs_d_revalidate_chain on nd.dentry here. + */ + err = -ESTALE; /* force revalidate */ + goto out; + } + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_inode = unionfs_lower_inode_idx(inode, bindex); + if (!hidden_inode) + continue; + + /* + * check the condition for D-F-D underlying files/directories, + * we don't have to check for files, if we are checking for + * directories. + */ + if (!is_file && !S_ISDIR(hidden_inode->i_mode)) + continue; + + /* + * We use our own special version of permission, such that + * only the first branch returns -EROFS. + */ + err = inode_permission(hidden_inode, mask, nd, bindex); + + /* + * The permissions are an intersection of the overall directory + * permissions, so we fail if one fails. + */ + if (err) + goto out; + + /* only the leftmost file matters. */ + if (is_file || write_mask) { + if (is_file && write_mask) { + err = get_write_access(hidden_inode); + if (!err) + put_write_access(hidden_inode); + } + break; + } + } + +out: + unionfs_read_unlock(inode->i_sb); + return err; +} + +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia) +{ + int err = 0; + struct dentry *hidden_dentry; + struct inode *inode = NULL; + struct inode *hidden_inode = NULL; + int bstart, bend, bindex; + int i; + int copyup = 0; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + bstart = dbstart(dentry); + bend = dbend(dentry); + inode = dentry->d_inode; + + for (bindex = bstart; (bindex <= bend) || (bindex == bstart); + bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + continue; + BUG_ON(hidden_dentry->d_inode == NULL); + + /* If the file is on a read only branch */ + if (is_robranch_super(dentry->d_sb, bindex) + || IS_RDONLY(hidden_dentry->d_inode)) { + if (copyup || (bindex != bstart)) + continue; + /* Only if its the leftmost file, copyup the file */ + for (i = bstart - 1; i >= 0; i--) { + loff_t size = dentry->d_inode->i_size; + if (ia->ia_valid & ATTR_SIZE) + size = ia->ia_size; + err = copyup_dentry(dentry->d_parent->d_inode, + dentry, bstart, i, NULL, + size); + + if (!err) { + copyup = 1; + hidden_dentry = + unionfs_lower_dentry(dentry); + break; + } + /* + * if error is in the leftmost branch, pass + * it up. + */ + if (i == 0) + goto out; + } + + } + err = notify_change(hidden_dentry, ia); + if (err) + goto out; + break; + } + + /* for mmap */ + if (ia->ia_valid & ATTR_SIZE) { + if (ia->ia_size != i_size_read(inode)) { + err = vmtruncate(inode, ia->ia_size); + if (err) + printk("unionfs_setattr: vmtruncate failed\n"); + } + } + + /* get the size from the first hidden inode */ + hidden_inode = unionfs_lower_inode(dentry->d_inode); + fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks); + fsstack_copy_inode_size(inode, hidden_inode); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +struct inode_operations unionfs_symlink_iops = { + .readlink = unionfs_readlink, + .permission = unionfs_permission, + .follow_link = unionfs_follow_link, + .setattr = unionfs_setattr, + .put_link = unionfs_put_link, +}; + +struct inode_operations unionfs_dir_iops = { + .create = unionfs_create, + .lookup = unionfs_lookup, + .link = unionfs_link, + .unlink = unionfs_unlink, + .symlink = unionfs_symlink, + .mkdir = unionfs_mkdir, + .rmdir = unionfs_rmdir, + .mknod = unionfs_mknod, + .rename = unionfs_rename, + .permission = unionfs_permission, + .setattr = unionfs_setattr, +#ifdef CONFIG_UNION_FS_XATTR + .setxattr = unionfs_setxattr, + .getxattr = unionfs_getxattr, + .removexattr = unionfs_removexattr, + .listxattr = unionfs_listxattr, +#endif +}; + +struct inode_operations unionfs_main_iops = { + .permission = unionfs_permission, + .setattr = unionfs_setattr, +#ifdef CONFIG_UNION_FS_XATTR + .setxattr = unionfs_setxattr, + .getxattr = unionfs_getxattr, + .removexattr = unionfs_removexattr, + .listxattr = unionfs_listxattr, +#endif +}; diff -Nurb linux-2.6.22-570/fs/unionfs/lookup.c linux-2.6.22-591/fs/unionfs/lookup.c --- linux-2.6.22-570/fs/unionfs/lookup.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/lookup.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* is the filename valid == !(whiteout for a file or opaque dir marker) */ +static int is_validname(const char *name) +{ + if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) + return 0; + if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME, + sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1)) + return 0; + return 1; +} + +/* The rest of these are utility functions for lookup. */ +static noinline int is_opaque_dir(struct dentry *dentry, int bindex) +{ + int err = 0; + struct dentry *hidden_dentry; + struct dentry *wh_hidden_dentry; + struct inode *hidden_inode; + struct sioq_args args; + + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + hidden_inode = hidden_dentry->d_inode; + + BUG_ON(!S_ISDIR(hidden_inode->i_mode)); + + mutex_lock(&hidden_inode->i_mutex); + + if (!permission(hidden_inode, MAY_EXEC, NULL)) + wh_hidden_dentry = + lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry, + sizeof(UNIONFS_DIR_OPAQUE) - 1); + else { + args.is_opaque.dentry = hidden_dentry; + run_sioq(__is_opaque_dir, &args); + wh_hidden_dentry = args.ret; + } + + mutex_unlock(&hidden_inode->i_mutex); + + if (IS_ERR(wh_hidden_dentry)) { + err = PTR_ERR(wh_hidden_dentry); + goto out; + } + + /* This is an opaque dir iff wh_hidden_dentry is positive */ + err = !!wh_hidden_dentry->d_inode; + + dput(wh_hidden_dentry); +out: + return err; +} + +/* main (and complex) driver function for Unionfs's lookup */ +struct dentry *unionfs_lookup_backend(struct dentry *dentry, + struct nameidata *nd, int lookupmode) +{ + int err = 0; + struct dentry *hidden_dentry = NULL; + struct dentry *wh_hidden_dentry = NULL; + struct dentry *hidden_dir_dentry = NULL; + struct dentry *parent_dentry = NULL; + int bindex, bstart, bend, bopaque; + int dentry_count = 0; /* Number of positive dentries. */ + int first_dentry_offset = -1; /* -1 is uninitialized */ + struct dentry *first_dentry = NULL; + struct dentry *first_hidden_dentry = NULL; + struct vfsmount *first_hidden_mnt = NULL; + int locked_parent = 0; + int locked_child = 0; + int allocated_new_info = 0; + + int opaque; + char *whname = NULL; + const char *name; + int namelen; + + /* + * We should already have a lock on this dentry in the case of a + * partial lookup, or a revalidation. Otherwise it is returned from + * new_dentry_private_data already locked. + */ + if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL || + lookupmode == INTERPOSE_REVAL_NEG) + verify_locked(dentry); + else { + BUG_ON(UNIONFS_D(dentry) != NULL); + locked_child = 1; + } + + switch(lookupmode) { + case INTERPOSE_PARTIAL: + break; + case INTERPOSE_LOOKUP: + if ((err = new_dentry_private_data(dentry))) + goto out; + allocated_new_info = 1; + break; + default: + if ((err = realloc_dentry_private_data(dentry))) + goto out; + allocated_new_info = 1; + break; + } + + /* must initialize dentry operations */ + dentry->d_op = &unionfs_dops; + + parent_dentry = dget_parent(dentry); + /* We never partial lookup the root directory. */ + if (parent_dentry != dentry) { + unionfs_lock_dentry(parent_dentry); + locked_parent = 1; + } else { + dput(parent_dentry); + parent_dentry = NULL; + goto out; + } + + name = dentry->d_name.name; + namelen = dentry->d_name.len; + + /* No dentries should get created for possible whiteout names. */ + if (!is_validname(name)) { + err = -EPERM; + goto out_free; + } + + /* Now start the actual lookup procedure. */ + bstart = dbstart(parent_dentry); + bend = dbend(parent_dentry); + bopaque = dbopaque(parent_dentry); + BUG_ON(bstart < 0); + + /* + * It would be ideal if we could convert partial lookups to only have + * to do this work when they really need to. It could probably improve + * performance quite a bit, and maybe simplify the rest of the code. + */ + if (lookupmode == INTERPOSE_PARTIAL) { + bstart++; + if ((bopaque != -1) && (bopaque < bend)) + bend = bopaque; + } + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (lookupmode == INTERPOSE_PARTIAL && hidden_dentry) + continue; + BUG_ON(hidden_dentry != NULL); + + hidden_dir_dentry = + unionfs_lower_dentry_idx(parent_dentry, bindex); + + /* if the parent hidden dentry does not exist skip this */ + if (!(hidden_dir_dentry && hidden_dir_dentry->d_inode)) + continue; + + /* also skip it if the parent isn't a directory. */ + if (!S_ISDIR(hidden_dir_dentry->d_inode->i_mode)) + continue; + + /* Reuse the whiteout name because its value doesn't change. */ + if (!whname) { + whname = alloc_whname(name, namelen); + if (IS_ERR(whname)) { + err = PTR_ERR(whname); + goto out_free; + } + } + + /* check if whiteout exists in this branch: lookup .wh.foo */ + wh_hidden_dentry = lookup_one_len(whname, hidden_dir_dentry, + namelen + UNIONFS_WHLEN); + if (IS_ERR(wh_hidden_dentry)) { + dput(first_hidden_dentry); + unionfs_mntput(first_dentry, first_dentry_offset); + err = PTR_ERR(wh_hidden_dentry); + goto out_free; + } + + if (wh_hidden_dentry->d_inode) { + /* We found a whiteout so lets give up. */ + if (S_ISREG(wh_hidden_dentry->d_inode->i_mode)) { + set_dbend(dentry, bindex); + set_dbopaque(dentry, bindex); + dput(wh_hidden_dentry); + break; + } + err = -EIO; + printk(KERN_NOTICE "unionfs: EIO: invalid whiteout " + "entry type %d.\n", + wh_hidden_dentry->d_inode->i_mode); + dput(wh_hidden_dentry); + dput(first_hidden_dentry); + unionfs_mntput(first_dentry, first_dentry_offset); + goto out_free; + } + + dput(wh_hidden_dentry); + wh_hidden_dentry = NULL; + + /* Now do regular lookup; lookup foo */ + nd->dentry = unionfs_lower_dentry_idx(dentry, bindex); + /* FIXME: fix following line for mount point crossing */ + nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex); + + hidden_dentry = lookup_one_len_nd(name, hidden_dir_dentry, + namelen, nd); + if (IS_ERR(hidden_dentry)) { + dput(first_hidden_dentry); + unionfs_mntput(first_dentry, first_dentry_offset); + err = PTR_ERR(hidden_dentry); + goto out_free; + } + + /* + * Store the first negative dentry specially, because if they + * are all negative we need this for future creates. + */ + if (!hidden_dentry->d_inode) { + if (!first_hidden_dentry && (dbstart(dentry) == -1)) { + first_hidden_dentry = hidden_dentry; + /* + * FIXME: following line needs to be changed + * to allow mount-point crossing + */ + first_dentry = parent_dentry; + first_hidden_mnt = + unionfs_mntget(parent_dentry, bindex); + first_dentry_offset = bindex; + } else + dput(hidden_dentry); + + continue; + } + + /* number of positive dentries */ + dentry_count++; + + /* store underlying dentry */ + if (dbstart(dentry) == -1) + set_dbstart(dentry, bindex); + unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry); + /* + * FIXME: the following line needs to get fixed to allow + * mount-point crossing + */ + unionfs_set_lower_mnt_idx(dentry, bindex, + unionfs_mntget(parent_dentry, + bindex)); + set_dbend(dentry, bindex); + + /* update parent directory's atime with the bindex */ + fsstack_copy_attr_atime(parent_dentry->d_inode, + hidden_dir_dentry->d_inode); + + /* We terminate file lookups here. */ + if (!S_ISDIR(hidden_dentry->d_inode->i_mode)) { + if (lookupmode == INTERPOSE_PARTIAL) + continue; + if (dentry_count == 1) + goto out_positive; + /* This can only happen with mixed D-*-F-* */ + BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)-> + d_inode->i_mode)); + continue; + } + + opaque = is_opaque_dir(dentry, bindex); + if (opaque < 0) { + dput(first_hidden_dentry); + unionfs_mntput(first_dentry, first_dentry_offset); + err = opaque; + goto out_free; + } else if (opaque) { + set_dbend(dentry, bindex); + set_dbopaque(dentry, bindex); + break; + } + } + + if (dentry_count) + goto out_positive; + else + goto out_negative; + +out_negative: + if (lookupmode == INTERPOSE_PARTIAL) + goto out; + + /* If we've only got negative dentries, then use the leftmost one. */ + if (lookupmode == INTERPOSE_REVAL) { + if (dentry->d_inode) + UNIONFS_I(dentry->d_inode)->stale = 1; + + goto out; + } + /* This should only happen if we found a whiteout. */ + if (first_dentry_offset == -1) { + nd->dentry = dentry; + /* FIXME: fix following line for mount point crossing */ + nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex); + + first_hidden_dentry = + lookup_one_len_nd(name, hidden_dir_dentry, + namelen, nd); + first_dentry_offset = bindex; + if (IS_ERR(first_hidden_dentry)) { + err = PTR_ERR(first_hidden_dentry); + goto out; + } + + /* + * FIXME: the following line needs to be changed to allow + * mount-point crossing + */ + first_dentry = dentry; + first_hidden_mnt = unionfs_mntget(dentry, bindex); + } + unionfs_set_lower_dentry_idx(dentry, first_dentry_offset, + first_hidden_dentry); + unionfs_set_lower_mnt_idx(dentry, first_dentry_offset, + first_hidden_mnt); + set_dbstart(dentry, first_dentry_offset); + set_dbend(dentry, first_dentry_offset); + + if (lookupmode == INTERPOSE_REVAL_NEG) + BUG_ON(dentry->d_inode != NULL); + else + d_add(dentry, NULL); + goto out; + +/* This part of the code is for positive dentries. */ +out_positive: + BUG_ON(dentry_count <= 0); + + /* + * If we're holding onto the first negative dentry & corresponding + * vfsmount - throw it out. + */ + dput(first_hidden_dentry); + unionfs_mntput(first_dentry, first_dentry_offset); + + /* Partial lookups need to re-interpose, or throw away older negs. */ + if (lookupmode == INTERPOSE_PARTIAL) { + if (dentry->d_inode) { + unionfs_reinterpose(dentry); + goto out; + } + + /* + * This somehow turned positive, so it is as if we had a + * negative revalidation. + */ + lookupmode = INTERPOSE_REVAL_NEG; + + update_bstart(dentry); + bstart = dbstart(dentry); + bend = dbend(dentry); + } + + err = unionfs_interpose(dentry, dentry->d_sb, lookupmode); + if (err) + goto out_drop; + + goto out; + +out_drop: + d_drop(dentry); + +out_free: + /* should dput all the underlying dentries on error condition */ + bstart = dbstart(dentry); + if (bstart >= 0) { + bend = dbend(dentry); + for (bindex = bstart; bindex <= bend; bindex++) { + dput(unionfs_lower_dentry_idx(dentry, bindex)); + unionfs_mntput(dentry, bindex); + } + } + kfree(UNIONFS_D(dentry)->lower_paths); + UNIONFS_D(dentry)->lower_paths = NULL; + set_dbstart(dentry, -1); + set_dbend(dentry, -1); + +out: + if (!err && UNIONFS_D(dentry)) { + BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount); + BUG_ON(dbend(dentry) > sbmax(dentry->d_sb)); + BUG_ON(dbstart(dentry) < 0); + } + kfree(whname); + if (locked_parent) + unionfs_unlock_dentry(parent_dentry); + dput(parent_dentry); + if (locked_child || (err && allocated_new_info)) + unionfs_unlock_dentry(dentry); + return ERR_PTR(err); +} + +/* This is a utility function that fills in a unionfs dentry */ +int unionfs_partial_lookup(struct dentry *dentry) +{ + struct dentry *tmp; + struct nameidata nd = { .flags = 0 }; + + tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL); + if (!tmp) + return 0; + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + /* need to change the interface */ + BUG_ON(tmp != dentry); + return -ENOSYS; +} + +/* The dentry cache is just so we have properly sized dentries. */ +static struct kmem_cache *unionfs_dentry_cachep; +int unionfs_init_dentry_cache(void) +{ + unionfs_dentry_cachep = + kmem_cache_create("unionfs_dentry", + sizeof(struct unionfs_dentry_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL); + + return (unionfs_dentry_cachep ? 0 : -ENOMEM); +} + +void unionfs_destroy_dentry_cache(void) +{ + if (unionfs_dentry_cachep) + kmem_cache_destroy(unionfs_dentry_cachep); +} + +void free_dentry_private_data(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static inline int __realloc_dentry_private_data(struct dentry *dentry) +{ + struct unionfs_dentry_info *info = UNIONFS_D(dentry); + void *p; + int size; + + BUG_ON(!info); + + size = sizeof(struct path) * sbmax(dentry->d_sb); + p = krealloc(info->lower_paths, size, GFP_ATOMIC); + if (!p) + return -ENOMEM; + + info->lower_paths = p; + + info->bstart = -1; + info->bend = -1; + info->bopaque = -1; + info->bcount = sbmax(dentry->d_sb); + atomic_set(&info->generation, + atomic_read(&UNIONFS_SB(dentry->d_sb)->generation)); + + memset(info->lower_paths, 0, size); + + return 0; +} + +/* UNIONFS_D(dentry)->lock must be locked */ +int realloc_dentry_private_data(struct dentry *dentry) +{ + if (!__realloc_dentry_private_data(dentry)) + return 0; + + kfree(UNIONFS_D(dentry)->lower_paths); + free_dentry_private_data(dentry); + return -ENOMEM; +} + +/* allocate new dentry private data */ +int new_dentry_private_data(struct dentry *dentry) +{ + struct unionfs_dentry_info *info = UNIONFS_D(dentry); + + BUG_ON(info); + + info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC); + if (!info) + return -ENOMEM; + + mutex_init(&info->lock); + mutex_lock(&info->lock); + + info->lower_paths = NULL; + + dentry->d_fsdata = info; + + if (!__realloc_dentry_private_data(dentry)) + return 0; + + mutex_unlock(&info->lock); + free_dentry_private_data(dentry); + return -ENOMEM; +} + +/* + * scan through the lower dentry objects, and set bstart to reflect the + * starting branch + */ +void update_bstart(struct dentry *dentry) +{ + int bindex; + int bstart = dbstart(dentry); + int bend = dbend(dentry); + struct dentry *hidden_dentry; + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + continue; + if (hidden_dentry->d_inode) { + set_dbstart(dentry, bindex); + break; + } + dput(hidden_dentry); + unionfs_set_lower_dentry_idx(dentry, bindex, NULL); + } +} diff -Nurb linux-2.6.22-570/fs/unionfs/main.c linux-2.6.22-591/fs/unionfs/main.c --- linux-2.6.22-570/fs/unionfs/main.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/main.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,729 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" +#include +#include + +/* + * Connect a unionfs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @sb: unionfs's super_block + */ +int unionfs_interpose(struct dentry *dentry, struct super_block *sb, int flag) +{ + struct inode *hidden_inode; + struct dentry *hidden_dentry; + int err = 0; + struct inode *inode; + int is_negative_dentry = 1; + int bindex, bstart, bend; + + verify_locked(dentry); + + bstart = dbstart(dentry); + bend = dbend(dentry); + + /* Make sure that we didn't get a negative dentry. */ + for (bindex = bstart; bindex <= bend; bindex++) { + if (unionfs_lower_dentry_idx(dentry, bindex) && + unionfs_lower_dentry_idx(dentry, bindex)->d_inode) { + is_negative_dentry = 0; + break; + } + } + BUG_ON(is_negative_dentry); + + /* + * We allocate our new inode below, by calling iget. + * iget will call our read_inode which will initialize some + * of the new inode's fields + */ + + /* + * On revalidate we've already got our own inode and just need + * to fix it up. + */ + if (flag == INTERPOSE_REVAL) { + inode = dentry->d_inode; + UNIONFS_I(inode)->bstart = -1; + UNIONFS_I(inode)->bend = -1; + atomic_set(&UNIONFS_I(inode)->generation, + atomic_read(&UNIONFS_SB(sb)->generation)); + + UNIONFS_I(inode)->lower_inodes = + kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL); + if (!UNIONFS_I(inode)->lower_inodes) { + err = -ENOMEM; + goto out; + } + } else { + /* get unique inode number for unionfs */ + inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO)); + if (!inode) { + err = -EACCES; + goto out; + } + + if (atomic_read(&inode->i_count) > 1) + goto skip; + } + + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) { + unionfs_set_lower_inode_idx(inode, bindex, NULL); + continue; + } + + /* Initialize the hidden inode to the new hidden inode. */ + if (!hidden_dentry->d_inode) + continue; + + unionfs_set_lower_inode_idx(inode, bindex, + igrab(hidden_dentry->d_inode)); + } + + ibstart(inode) = dbstart(dentry); + ibend(inode) = dbend(dentry); + + /* Use attributes from the first branch. */ + hidden_inode = unionfs_lower_inode(inode); + + /* Use different set of inode ops for symlinks & directories */ + if (S_ISLNK(hidden_inode->i_mode)) + inode->i_op = &unionfs_symlink_iops; + else if (S_ISDIR(hidden_inode->i_mode)) + inode->i_op = &unionfs_dir_iops; + + /* Use different set of file ops for directories */ + if (S_ISDIR(hidden_inode->i_mode)) + inode->i_fop = &unionfs_dir_fops; + + /* properly initialize special inodes */ + if (S_ISBLK(hidden_inode->i_mode) || S_ISCHR(hidden_inode->i_mode) || + S_ISFIFO(hidden_inode->i_mode) || S_ISSOCK(hidden_inode->i_mode)) + init_special_inode(inode, hidden_inode->i_mode, + hidden_inode->i_rdev); + + /* all well, copy inode attributes */ + fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks); + fsstack_copy_inode_size(inode, hidden_inode); + +skip: + /* only (our) lookup wants to do a d_add */ + switch (flag) { + case INTERPOSE_DEFAULT: + case INTERPOSE_REVAL_NEG: + d_instantiate(dentry, inode); + break; + case INTERPOSE_LOOKUP: + err = PTR_ERR(d_splice_alias(inode, dentry)); + break; + case INTERPOSE_REVAL: + /* Do nothing. */ + break; + default: + printk(KERN_ERR "unionfs: invalid interpose flag passed!"); + BUG(); + } + +out: + return err; +} + +/* like interpose above, but for an already existing dentry */ +void unionfs_reinterpose(struct dentry *dentry) +{ + struct dentry *hidden_dentry; + struct inode *inode; + int bindex, bstart, bend; + + verify_locked(dentry); + + /* This is pre-allocated inode */ + inode = dentry->d_inode; + + bstart = dbstart(dentry); + bend = dbend(dentry); + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + continue; + + if (!hidden_dentry->d_inode) + continue; + if (unionfs_lower_inode_idx(inode, bindex)) + continue; + unionfs_set_lower_inode_idx(inode, bindex, + igrab(hidden_dentry->d_inode)); + } + ibstart(inode) = dbstart(dentry); + ibend(inode) = dbend(dentry); +} + +/* + * make sure the branch we just looked up (nd) makes sense: + * + * 1) we're not trying to stack unionfs on top of unionfs + * 2) it exists + * 3) is a directory + */ +int check_branch(struct nameidata *nd) +{ + if (!strcmp(nd->dentry->d_sb->s_type->name, "unionfs")) + return -EINVAL; + if (!nd->dentry->d_inode) + return -ENOENT; + if (!S_ISDIR(nd->dentry->d_inode->i_mode)) + return -ENOTDIR; + return 0; +} + +/* checks if two hidden_dentries have overlapping branches */ +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2) +{ + struct dentry *dent = NULL; + + dent = dent1; + while ((dent != dent2) && (dent->d_parent != dent)) + dent = dent->d_parent; + + if (dent == dent2) + return 1; + + dent = dent2; + while ((dent != dent1) && (dent->d_parent != dent)) + dent = dent->d_parent; + + return (dent == dent1); +} + +/* + * Parse branch mode helper function + */ +int __parse_branch_mode(const char *name) +{ + if (!name) + return 0; + if (!strcmp(name, "ro")) + return MAY_READ; + if (!strcmp(name, "rw")) + return (MAY_READ | MAY_WRITE); + return 0; +} + +/* + * Parse "ro" or "rw" options, but default to "rw" of no mode options + * was specified. + */ +int parse_branch_mode(const char *name) +{ + int perms = __parse_branch_mode(name); + + if (perms == 0) + perms = MAY_READ | MAY_WRITE; + return perms; +} + +/* + * parse the dirs= mount argument + * + * We don't need to lock the superblock private data's rwsem, as we get + * called only by unionfs_read_super - it is still a long time before anyone + * can even get a reference to us. + */ +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info + *hidden_root_info, char *options) +{ + struct nameidata nd; + char *name; + int err = 0; + int branches = 1; + int bindex = 0; + int i = 0; + int j = 0; + + struct dentry *dent1; + struct dentry *dent2; + + if (options[0] == '\0') { + printk(KERN_WARNING "unionfs: no branches specified\n"); + err = -EINVAL; + goto out; + } + + /* + * Each colon means we have a separator, this is really just a rough + * guess, since strsep will handle empty fields for us. + */ + for (i = 0; options[i]; i++) + if (options[i] == ':') + branches++; + + /* allocate space for underlying pointers to hidden dentry */ + UNIONFS_SB(sb)->data = + kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL); + if (!UNIONFS_SB(sb)->data) { + err = -ENOMEM; + goto out; + } + + hidden_root_info->lower_paths = + kcalloc(branches, sizeof(struct path), GFP_KERNEL); + if (!hidden_root_info->lower_paths) { + err = -ENOMEM; + goto out; + } + + /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */ + branches = 0; + while ((name = strsep(&options, ":")) != NULL) { + int perms; + char *mode = strchr(name, '='); + + if (!name || !*name) + continue; + + branches++; + + /* strip off '=' if any */ + if (mode) + *mode++ = '\0'; + + perms = parse_branch_mode(mode); + if (!bindex && !(perms & MAY_WRITE)) { + err = -EINVAL; + goto out; + } + + err = path_lookup(name, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_WARNING "unionfs: error accessing " + "hidden directory '%s' (error %d)\n", + name, err); + goto out; + } + + if ((err = check_branch(&nd))) { + printk(KERN_WARNING "unionfs: hidden directory " + "'%s' is not a valid branch\n", name); + path_release(&nd); + goto out; + } + + hidden_root_info->lower_paths[bindex].dentry = nd.dentry; + hidden_root_info->lower_paths[bindex].mnt = nd.mnt; + + set_branchperms(sb, bindex, perms); + set_branch_count(sb, bindex, 0); + new_branch_id(sb, bindex); + + if (hidden_root_info->bstart < 0) + hidden_root_info->bstart = bindex; + hidden_root_info->bend = bindex; + bindex++; + } + + if (branches == 0) { + printk(KERN_WARNING "unionfs: no branches specified\n"); + err = -EINVAL; + goto out; + } + + BUG_ON(branches != (hidden_root_info->bend + 1)); + + /* + * Ensure that no overlaps exist in the branches. + * + * This test is required because the Linux kernel has no support + * currently for ensuring coherency between stackable layers and + * branches. If we were to allow overlapping branches, it would be + * possible, for example, to delete a file via one branch, which + * would not be reflected in another branch. Such incoherency could + * lead to inconsistencies and even kernel oopses. Rather than + * implement hacks to work around some of these cache-coherency + * problems, we prevent branch overlapping, for now. A complete + * solution will involve proper kernel/VFS support for cache + * coherency, at which time we could safely remove this + * branch-overlapping test. + */ + for (i = 0; i < branches; i++) { + for (j = i + 1; j < branches; j++) { + dent1 = hidden_root_info->lower_paths[i].dentry; + dent2 = hidden_root_info->lower_paths[j].dentry; + + if (is_branch_overlap(dent1, dent2)) { + printk(KERN_WARNING "unionfs: branches %d and " + "%d overlap\n", i, j); + err = -EINVAL; + goto out; + } + } + } + +out: + if (err) { + for (i = 0; i < branches; i++) + if (hidden_root_info->lower_paths[i].dentry) { + dput(hidden_root_info->lower_paths[i].dentry); + /* initialize: can't use unionfs_mntput here */ + mntput(hidden_root_info->lower_paths[i].mnt); + } + + kfree(hidden_root_info->lower_paths); + kfree(UNIONFS_SB(sb)->data); + + /* + * MUST clear the pointers to prevent potential double free if + * the caller dies later on + */ + hidden_root_info->lower_paths = NULL; + UNIONFS_SB(sb)->data = NULL; + } + return err; +} + +/* + * Parse mount options. See the manual page for usage instructions. + * + * Returns the dentry object of the lower-level (hidden) directory; + * We want to mount our stackable file system on top of that hidden directory. + */ +static struct unionfs_dentry_info *unionfs_parse_options( + struct super_block *sb, + char *options) +{ + struct unionfs_dentry_info *hidden_root_info; + char *optname; + int err = 0; + int bindex; + int dirsfound = 0; + + /* allocate private data area */ + err = -ENOMEM; + hidden_root_info = + kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL); + if (!hidden_root_info) + goto out_error; + hidden_root_info->bstart = -1; + hidden_root_info->bend = -1; + hidden_root_info->bopaque = -1; + + while ((optname = strsep(&options, ",")) != NULL) { + char *optarg; + char *endptr; + int intval; + + if (!optname || !*optname) + continue; + + optarg = strchr(optname, '='); + if (optarg) + *optarg++ = '\0'; + + /* + * All of our options take an argument now. Insert ones that + * don't, above this check. + */ + if (!optarg) { + printk("unionfs: %s requires an argument.\n", optname); + err = -EINVAL; + goto out_error; + } + + if (!strcmp("dirs", optname)) { + if (++dirsfound > 1) { + printk(KERN_WARNING + "unionfs: multiple dirs specified\n"); + err = -EINVAL; + goto out_error; + } + err = parse_dirs_option(sb, hidden_root_info, optarg); + if (err) + goto out_error; + continue; + } + + /* All of these options require an integer argument. */ + intval = simple_strtoul(optarg, &endptr, 0); + if (*endptr) { + printk(KERN_WARNING + "unionfs: invalid %s option '%s'\n", + optname, optarg); + err = -EINVAL; + goto out_error; + } + + err = -EINVAL; + printk(KERN_WARNING + "unionfs: unrecognized option '%s'\n", optname); + goto out_error; + } + if (dirsfound != 1) { + printk(KERN_WARNING "unionfs: dirs option required\n"); + err = -EINVAL; + goto out_error; + } + goto out; + +out_error: + if (hidden_root_info && hidden_root_info->lower_paths) { + for (bindex = hidden_root_info->bstart; + bindex >= 0 && bindex <= hidden_root_info->bend; + bindex++) { + struct dentry *d; + struct vfsmount *m; + + d = hidden_root_info->lower_paths[bindex].dentry; + m = hidden_root_info->lower_paths[bindex].mnt; + + dput(d); + /* initializing: can't use unionfs_mntput here */ + mntput(m); + } + } + + kfree(hidden_root_info->lower_paths); + kfree(hidden_root_info); + + kfree(UNIONFS_SB(sb)->data); + UNIONFS_SB(sb)->data = NULL; + + hidden_root_info = ERR_PTR(err); +out: + return hidden_root_info; +} + +/* + * our custom d_alloc_root work-alike + * + * we can't use d_alloc_root if we want to use our own interpose function + * unchanged, so we simply call our own "fake" d_alloc_root + */ +static struct dentry *unionfs_d_alloc_root(struct super_block *sb) +{ + struct dentry *ret = NULL; + + if (sb) { + static const struct qstr name = {.name = "/",.len = 1 }; + + ret = d_alloc(NULL, &name); + if (ret) { + ret->d_op = &unionfs_dops; + ret->d_sb = sb; + ret->d_parent = ret; + } + } + return ret; +} + +/* + * There is no need to lock the unionfs_super_info's rwsem as there is no + * way anyone can have a reference to the superblock at this point in time. + */ +static int unionfs_read_super(struct super_block *sb, void *raw_data, + int silent) +{ + int err = 0; + + struct unionfs_dentry_info *hidden_root_info = NULL; + int bindex, bstart, bend; + + if (!raw_data) { + printk(KERN_WARNING + "unionfs: read_super: missing data argument\n"); + err = -EINVAL; + goto out; + } + + /* Allocate superblock private data */ + sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL); + if (!UNIONFS_SB(sb)) { + printk(KERN_WARNING "unionfs: read_super: out of memory\n"); + err = -ENOMEM; + goto out; + } + + UNIONFS_SB(sb)->bend = -1; + atomic_set(&UNIONFS_SB(sb)->generation, 1); + init_rwsem(&UNIONFS_SB(sb)->rwsem); + UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */ + + hidden_root_info = unionfs_parse_options(sb, raw_data); + if (IS_ERR(hidden_root_info)) { + printk(KERN_WARNING + "unionfs: read_super: error while parsing options " + "(err = %ld)\n", PTR_ERR(hidden_root_info)); + err = PTR_ERR(hidden_root_info); + hidden_root_info = NULL; + goto out_free; + } + if (hidden_root_info->bstart == -1) { + err = -ENOENT; + goto out_free; + } + + /* set the hidden superblock field of upper superblock */ + bstart = hidden_root_info->bstart; + BUG_ON(bstart != 0); + sbend(sb) = bend = hidden_root_info->bend; + for (bindex = bstart; bindex <= bend; bindex++) { + struct dentry *d = hidden_root_info->lower_paths[bindex].dentry; + unionfs_set_lower_super_idx(sb, bindex, d->d_sb); + } + + /* max Bytes is the maximum bytes from highest priority branch */ + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; + + sb->s_op = &unionfs_sops; + + /* See comment next to the definition of unionfs_d_alloc_root */ + sb->s_root = unionfs_d_alloc_root(sb); + if (!sb->s_root) { + err = -ENOMEM; + goto out_dput; + } + + /* link the upper and lower dentries */ + sb->s_root->d_fsdata = NULL; + if ((err = new_dentry_private_data(sb->s_root))) + goto out_freedpd; + + /* Set the hidden dentries for s_root */ + for (bindex = bstart; bindex <= bend; bindex++) { + struct dentry *d; + struct vfsmount *m; + + d = hidden_root_info->lower_paths[bindex].dentry; + m = hidden_root_info->lower_paths[bindex].mnt; + + unionfs_set_lower_dentry_idx(sb->s_root, bindex, d); + unionfs_set_lower_mnt_idx(sb->s_root, bindex, m); + } + set_dbstart(sb->s_root, bstart); + set_dbend(sb->s_root, bend); + + /* Set the generation number to one, since this is for the mount. */ + atomic_set(&UNIONFS_D(sb->s_root)->generation, 1); + + /* call interpose to create the upper level inode */ + err = unionfs_interpose(sb->s_root, sb, 0); + unionfs_unlock_dentry(sb->s_root); + if (!err) + goto out; + /* else fall through */ + +out_freedpd: + if (UNIONFS_D(sb->s_root)) { + kfree(UNIONFS_D(sb->s_root)->lower_paths); + free_dentry_private_data(sb->s_root); + } + dput(sb->s_root); + +out_dput: + if (hidden_root_info && !IS_ERR(hidden_root_info)) { + for (bindex = hidden_root_info->bstart; + bindex <= hidden_root_info->bend; bindex++) { + struct dentry *d; + struct vfsmount *m; + + d = hidden_root_info->lower_paths[bindex].dentry; + m = hidden_root_info->lower_paths[bindex].mnt; + + dput(d); + /* initializing: can't use unionfs_mntput here */ + mntput(m); + } + kfree(hidden_root_info->lower_paths); + kfree(hidden_root_info); + hidden_root_info = NULL; + } + +out_free: + kfree(UNIONFS_SB(sb)->data); + kfree(UNIONFS_SB(sb)); + sb->s_fs_info = NULL; + +out: + if (hidden_root_info && !IS_ERR(hidden_root_info)) { + kfree(hidden_root_info->lower_paths); + kfree(hidden_root_info); + } + return err; +} + +static int unionfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data, struct vfsmount *mnt) +{ + return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt); +} + +static struct file_system_type unionfs_fs_type = { + .owner = THIS_MODULE, + .name = "unionfs", + .get_sb = unionfs_get_sb, + .kill_sb = generic_shutdown_super, + .fs_flags = FS_REVAL_DOT, +}; + +static int __init init_unionfs_fs(void) +{ + int err; + + printk("Registering unionfs " UNIONFS_VERSION "\n"); + + if ((err = unionfs_init_filldir_cache())) + goto out; + if ((err = unionfs_init_inode_cache())) + goto out; + if ((err = unionfs_init_dentry_cache())) + goto out; + if ((err = init_sioq())) + goto out; + err = register_filesystem(&unionfs_fs_type); +out: + if (err) { + stop_sioq(); + unionfs_destroy_filldir_cache(); + unionfs_destroy_inode_cache(); + unionfs_destroy_dentry_cache(); + } + return err; +} + +static void __exit exit_unionfs_fs(void) +{ + stop_sioq(); + unionfs_destroy_filldir_cache(); + unionfs_destroy_inode_cache(); + unionfs_destroy_dentry_cache(); + unregister_filesystem(&unionfs_fs_type); + printk("Completed unionfs module unload.\n"); +} + +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" + " (http://www.fsl.cs.sunysb.edu)"); +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION + " (http://unionfs.filesystems.org)"); +MODULE_LICENSE("GPL"); + +module_init(init_unionfs_fs); +module_exit(exit_unionfs_fs); diff -Nurb linux-2.6.22-570/fs/unionfs/mmap.c linux-2.6.22-591/fs/unionfs/mmap.c --- linux-2.6.22-570/fs/unionfs/mmap.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/mmap.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2006 Shaya Potter + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of State University of New York + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * Unionfs doesn't implement ->writepages, which is OK with the VFS and + * nkeeps our code simpler and smaller. Nevertheless, somehow, our own + * ->writepage must be called so we can sync the upper pages with the lower + * pages: otherwise data changed at the upper layer won't get written to the + * lower layer. + * + * Some lower file systems (e.g., NFS) expect the VFS to call its writepages + * only, which in turn will call generic_writepages and invoke each of the + * lower file system's ->writepage. NFS in particular uses the + * wbc->fs_private field in its nfs_writepage, which is set in its + * nfs_writepages. So if we don't call the lower nfs_writepages first, then + * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an + * OOPS. If, however, we implement a unionfs_writepages and then we do call + * the lower nfs_writepages, then we "lose control" over the pages we're + * trying to write to the lower file system: we won't be writing our own + * new/modified data from the upper pages to the lower pages, and any + * mmap-based changes are lost. + * + * This is a fundamental cache-coherency problem in Linux. The kernel isn't + * able to support such stacking abstractions cleanly. One possible clean + * way would be that a lower file system's ->writepage method have some sort + * of a callback to validate if any upper pages for the same file+offset + * exist and have newer content in them. + * + * This whole NULL ptr dereference is triggered at the lower file system + * (NFS) because the wbc->for_writepages is set to 1. Therefore, to avoid + * this NULL pointer dereference, we set this flag to 0 and restore it upon + * exit. This probably means that we're slightly less efficient in writing + * pages out, doing them one at a time, but at least we avoid the oops until + * such day as Linux can better support address_space_ops in a stackable + * fashion. + */ +int unionfs_writepage(struct page *page, struct writeback_control *wbc) +{ + int err = -EIO; + struct inode *inode; + struct inode *lower_inode; + struct page *lower_page; + char *kaddr, *lower_kaddr; + int saved_for_writepages = wbc->for_writepages; + + inode = page->mapping->host; + lower_inode = unionfs_lower_inode(inode); + + /* find lower page (returns a locked page) */ + lower_page = grab_cache_page(lower_inode->i_mapping, page->index); + if (!lower_page) + goto out; + + /* get page address, and encode it */ + kaddr = kmap(page); + lower_kaddr = kmap(lower_page); + + memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE); + + kunmap(page); + kunmap(lower_page); + + BUG_ON(!lower_inode->i_mapping->a_ops->writepage); + + /* workaround for some lower file systems: see big comment on top */ + if (wbc->for_writepages && !wbc->fs_private) + wbc->for_writepages = 0; + + /* call lower writepage (expects locked page) */ + err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc); + wbc->for_writepages = saved_for_writepages; /* restore value */ + + /* + * update mtime and ctime of lower level file system + * unionfs' mtime and ctime are updated by generic_file_write + */ + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + + page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt */ + + if (err) + ClearPageUptodate(page); + else + SetPageUptodate(page); + +out: + unlock_page(page); + return err; +} + +/* + * readpage is called from generic_page_read and the fault handler. + * If your file system uses generic_page_read for the read op, it + * must implement readpage. + * + * Readpage expects a locked page, and must unlock it. + */ +static int unionfs_do_readpage(struct file *file, struct page *page) +{ + int err = -EIO; + struct dentry *dentry; + struct file *lower_file = NULL; + struct inode *inode, *lower_inode; + char *page_data; + struct page *lower_page; + char *lower_page_data; + + dentry = file->f_dentry; + if (UNIONFS_F(file) == NULL) { + err = -ENOENT; + goto out_err; + } + + lower_file = unionfs_lower_file(file); + inode = dentry->d_inode; + lower_inode = unionfs_lower_inode(inode); + + lower_page = NULL; + + /* find lower page (returns a locked page) */ + lower_page = read_cache_page(lower_inode->i_mapping, + page->index, + (filler_t *) lower_inode->i_mapping-> + a_ops->readpage, (void *)lower_file); + + if (IS_ERR(lower_page)) { + err = PTR_ERR(lower_page); + lower_page = NULL; + goto out_release; + } + + /* + * wait for the page data to show up + * (signaled by readpage as unlocking the page) + */ + wait_on_page_locked(lower_page); + if (!PageUptodate(lower_page)) { + /* + * call readpage() again if we returned from wait_on_page + * with a page that's not up-to-date; that can happen when a + * partial page has a few buffers which are ok, but not the + * whole page. + */ + lock_page(lower_page); + err = lower_inode->i_mapping->a_ops->readpage(lower_file, + lower_page); + if (err) { + lower_page = NULL; + goto out_release; + } + + wait_on_page_locked(lower_page); + if (!PageUptodate(lower_page)) { + err = -EIO; + goto out_release; + } + } + + /* map pages, get their addresses */ + page_data = (char *)kmap(page); + lower_page_data = (char *)kmap(lower_page); + + memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); + + err = 0; + + kunmap(lower_page); + kunmap(page); + +out_release: + if (lower_page) + page_cache_release(lower_page); /* undo read_cache_page */ + + if (err == 0) + SetPageUptodate(page); + else + ClearPageUptodate(page); + +out_err: + return err; +} + +int unionfs_readpage(struct file *file, struct page *page) +{ + int err; + + unionfs_read_lock(file->f_dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 0))) + goto out; + + err = unionfs_do_readpage(file, page); + + if (!err) + touch_atime(unionfs_lower_mnt(file->f_path.dentry), + unionfs_lower_dentry(file->f_path.dentry)); + + /* + * we have to unlock our page, b/c we _might_ have gotten a locked + * page. but we no longer have to wakeup on our page here, b/c + * UnlockPage does it + */ +out: + unlock_page(page); + unionfs_read_unlock(file->f_dentry->d_sb); + + return err; +} + +int unionfs_prepare_write(struct file *file, struct page *page, unsigned from, + unsigned to) +{ + int err; + + unionfs_read_lock(file->f_dentry->d_sb); + + err = unionfs_file_revalidate(file, 1); + + unionfs_read_unlock(file->f_dentry->d_sb); + + return err; +} + +int unionfs_commit_write(struct file *file, struct page *page, unsigned from, + unsigned to) +{ + int err = -ENOMEM; + struct inode *inode, *lower_inode; + struct file *lower_file = NULL; + loff_t pos; + unsigned bytes = to - from; + char *page_data = NULL; + mm_segment_t old_fs; + + BUG_ON(file == NULL); + + unionfs_read_lock(file->f_dentry->d_sb); + + if ((err = unionfs_file_revalidate(file, 1))) + goto out; + + inode = page->mapping->host; + lower_inode = unionfs_lower_inode(inode); + + if (UNIONFS_F(file) != NULL) + lower_file = unionfs_lower_file(file); + + /* FIXME: is this assertion right here? */ + BUG_ON(lower_file == NULL); + + page_data = (char *)kmap(page); + lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from; + + /* SP: I use vfs_write instead of copying page data and the + * prepare_write/commit_write combo because file system's like + * GFS/OCFS2 don't like things touching those directly, + * calling the underlying write op, while a little bit slower, will + * call all the FS specific code as well + */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = vfs_write(lower_file, page_data + from, bytes, + &lower_file->f_pos); + set_fs(old_fs); + + kunmap(page); + + if (err < 0) + goto out; + + inode->i_blocks = lower_inode->i_blocks; + /* we may have to update i_size */ + pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; + if (pos > i_size_read(inode)) + i_size_write(inode, pos); + + /* + * update mtime and ctime of lower level file system + * unionfs' mtime and ctime are updated by generic_file_write + */ + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + + mark_inode_dirty_sync(inode); + +out: + if (err < 0) + ClearPageUptodate(page); + + unionfs_read_unlock(file->f_dentry->d_sb); + return err; /* assume all is ok */ +} + +void unionfs_sync_page(struct page *page) +{ + struct inode *inode; + struct inode *lower_inode; + struct page *lower_page; + struct address_space *mapping; + + inode = page->mapping->host; + lower_inode = unionfs_lower_inode(inode); + + /* find lower page (returns a locked page) */ + lower_page = grab_cache_page(lower_inode->i_mapping, page->index); + if (!lower_page) + goto out; + + /* do the actual sync */ + mapping = lower_page->mapping; + /* + * XXX: can we optimize ala RAIF and set the lower page to be + * discarded after a successful sync_page? + */ + if (mapping && mapping->a_ops && mapping->a_ops->sync_page) + mapping->a_ops->sync_page(lower_page); + + unlock_page(lower_page); /* b/c grab_cache_page locked it */ + page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt */ + +out: + return; +} + +struct address_space_operations unionfs_aops = { + .writepage = unionfs_writepage, + .readpage = unionfs_readpage, + .prepare_write = unionfs_prepare_write, + .commit_write = unionfs_commit_write, + .sync_page = unionfs_sync_page, +}; diff -Nurb linux-2.6.22-570/fs/unionfs/rdstate.c linux-2.6.22-591/fs/unionfs/rdstate.c --- linux-2.6.22-570/fs/unionfs/rdstate.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/rdstate.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* This file contains the routines for maintaining readdir state. */ + +/* + * There are two structures here, rdstate which is a hash table + * of the second structure which is a filldir_node. + */ + +/* + * This is a struct kmem_cache for filldir nodes, because we allocate a lot + * of them and they shouldn't waste memory. If the node has a small name + * (as defined by the dentry structure), then we use an inline name to + * preserve kmalloc space. + */ +static struct kmem_cache *unionfs_filldir_cachep; + +int unionfs_init_filldir_cache(void) +{ + unionfs_filldir_cachep = + kmem_cache_create("unionfs_filldir", + sizeof(struct filldir_node), 0, + SLAB_RECLAIM_ACCOUNT, NULL, NULL); + + return (unionfs_filldir_cachep ? 0 : -ENOMEM); +} + +void unionfs_destroy_filldir_cache(void) +{ + if (unionfs_filldir_cachep) + kmem_cache_destroy(unionfs_filldir_cachep); +} + +/* + * This is a tuning parameter that tells us roughly how big to make the + * hash table in directory entries per page. This isn't perfect, but + * at least we get a hash table size that shouldn't be too overloaded. + * The following averages are based on my home directory. + * 14.44693 Overall + * 12.29 Single Page Directories + * 117.93 Multi-page directories + */ +#define DENTPAGE 4096 +#define DENTPERONEPAGE 12 +#define DENTPERPAGE 118 +#define MINHASHSIZE 1 +static int guesstimate_hash_size(struct inode *inode) +{ + struct inode *hidden_inode; + int bindex; + int hashsize = MINHASHSIZE; + + if (UNIONFS_I(inode)->hashsize > 0) + return UNIONFS_I(inode)->hashsize; + + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { + if (!(hidden_inode = unionfs_lower_inode_idx(inode, bindex))) + continue; + + if (hidden_inode->i_size == DENTPAGE) + hashsize += DENTPERONEPAGE; + else + hashsize += (hidden_inode->i_size / DENTPAGE) * + DENTPERPAGE; + } + + return hashsize; +} + +int init_rdstate(struct file *file) +{ + BUG_ON(sizeof(loff_t) != + (sizeof(unsigned int) + sizeof(unsigned int))); + BUG_ON(UNIONFS_F(file)->rdstate != NULL); + + UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_dentry->d_inode, + fbstart(file)); + + return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM); +} + +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos) +{ + struct unionfs_dir_state *rdstate = NULL; + struct list_head *pos; + + spin_lock(&UNIONFS_I(inode)->rdlock); + list_for_each(pos, &UNIONFS_I(inode)->readdircache) { + struct unionfs_dir_state *r = + list_entry(pos, struct unionfs_dir_state, cache); + if (fpos == rdstate2offset(r)) { + UNIONFS_I(inode)->rdcount--; + list_del(&r->cache); + rdstate = r; + break; + } + } + spin_unlock(&UNIONFS_I(inode)->rdlock); + return rdstate; +} + +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex) +{ + int i = 0; + int hashsize; + unsigned long mallocsize = sizeof(struct unionfs_dir_state); + struct unionfs_dir_state *rdstate; + + hashsize = guesstimate_hash_size(inode); + mallocsize += hashsize * sizeof(struct list_head); + mallocsize = __roundup_pow_of_two(mallocsize); + + /* This should give us about 500 entries anyway. */ + if (mallocsize > PAGE_SIZE) + mallocsize = PAGE_SIZE; + + hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) / + sizeof(struct list_head); + + rdstate = kmalloc(mallocsize, GFP_KERNEL); + if (!rdstate) + return NULL; + + spin_lock(&UNIONFS_I(inode)->rdlock); + if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1)) + UNIONFS_I(inode)->cookie = 1; + else + UNIONFS_I(inode)->cookie++; + + rdstate->cookie = UNIONFS_I(inode)->cookie; + spin_unlock(&UNIONFS_I(inode)->rdlock); + rdstate->offset = 1; + rdstate->access = jiffies; + rdstate->bindex = bindex; + rdstate->dirpos = 0; + rdstate->hashentries = 0; + rdstate->size = hashsize; + for (i = 0; i < rdstate->size; i++) + INIT_LIST_HEAD(&rdstate->list[i]); + + return rdstate; +} + +static void free_filldir_node(struct filldir_node *node) +{ + if (node->namelen >= DNAME_INLINE_LEN_MIN) + kfree(node->name); + kmem_cache_free(unionfs_filldir_cachep, node); +} + +void free_rdstate(struct unionfs_dir_state *state) +{ + struct filldir_node *tmp; + int i; + + for (i = 0; i < state->size; i++) { + struct list_head *head = &(state->list[i]); + struct list_head *pos, *n; + + /* traverse the list and deallocate space */ + list_for_each_safe(pos, n, head) { + tmp = list_entry(pos, struct filldir_node, file_list); + list_del(&tmp->file_list); + free_filldir_node(tmp); + } + } + + kfree(state); +} + +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, + const char *name, int namelen) +{ + int index; + unsigned int hash; + struct list_head *head; + struct list_head *pos; + struct filldir_node *cursor = NULL; + int found = 0; + + BUG_ON(namelen <= 0); + + hash = full_name_hash(name, namelen); + index = hash % rdstate->size; + + head = &(rdstate->list[index]); + list_for_each(pos, head) { + cursor = list_entry(pos, struct filldir_node, file_list); + + if (cursor->namelen == namelen && cursor->hash == hash && + !strncmp(cursor->name, name, namelen)) { + /* + * a duplicate exists, and hence no need to create + * entry to the list + */ + found = 1; + + /* + * if the duplicate is in this branch, then the file + * system is corrupted. + */ + if (cursor->bindex == rdstate->bindex) { + printk(KERN_DEBUG "unionfs: filldir: possible " + "I/O error: a file is duplicated " + "in the same branch %d: %s\n", + rdstate->bindex, cursor->name); + } + break; + } + } + + if (!found) + cursor = NULL; + + return cursor; +} + +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name, + int namelen, int bindex, int whiteout) +{ + struct filldir_node *new; + unsigned int hash; + int index; + int err = 0; + struct list_head *head; + + BUG_ON(namelen <= 0); + + hash = full_name_hash(name, namelen); + index = hash % rdstate->size; + head = &(rdstate->list[index]); + + new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL); + if (!new) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&new->file_list); + new->namelen = namelen; + new->hash = hash; + new->bindex = bindex; + new->whiteout = whiteout; + + if (namelen < DNAME_INLINE_LEN_MIN) + new->name = new->iname; + else { + new->name = kmalloc(namelen + 1, GFP_KERNEL); + if (!new->name) { + kmem_cache_free(unionfs_filldir_cachep, new); + new = NULL; + goto out; + } + } + + memcpy(new->name, name, namelen); + new->name[namelen] = '\0'; + + rdstate->hashentries++; + + list_add(&(new->file_list), head); +out: + return err; +} diff -Nurb linux-2.6.22-570/fs/unionfs/rename.c linux-2.6.22-591/fs/unionfs/rename.c --- linux-2.6.22-570/fs/unionfs/rename.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/rename.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + int bindex, struct dentry **wh_old) +{ + int err = 0; + struct dentry *hidden_old_dentry; + struct dentry *hidden_new_dentry; + struct dentry *hidden_old_dir_dentry; + struct dentry *hidden_new_dir_dentry; + struct dentry *hidden_wh_dentry; + struct dentry *hidden_wh_dir_dentry; + char *wh_name = NULL; + + hidden_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); + hidden_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex); + + if (!hidden_new_dentry) { + hidden_new_dentry = + create_parents(new_dentry->d_parent->d_inode, + new_dentry, bindex); + if (IS_ERR(hidden_new_dentry)) { + printk(KERN_DEBUG "unionfs: error creating directory " + "tree for rename, bindex = %d, err = %ld\n", + bindex, PTR_ERR(hidden_new_dentry)); + err = PTR_ERR(hidden_new_dentry); + goto out; + } + } + + wh_name = alloc_whname(new_dentry->d_name.name, + new_dentry->d_name.len); + if (IS_ERR(wh_name)) { + err = PTR_ERR(wh_name); + goto out; + } + + hidden_wh_dentry = lookup_one_len(wh_name, hidden_new_dentry->d_parent, + new_dentry->d_name.len + + UNIONFS_WHLEN); + if (IS_ERR(hidden_wh_dentry)) { + err = PTR_ERR(hidden_wh_dentry); + goto out; + } + + if (hidden_wh_dentry->d_inode) { + /* get rid of the whiteout that is existing */ + if (hidden_new_dentry->d_inode) { + printk(KERN_WARNING "unionfs: both a whiteout and a " + "dentry exist when doing a rename!\n"); + err = -EIO; + + dput(hidden_wh_dentry); + goto out; + } + + hidden_wh_dir_dentry = lock_parent(hidden_wh_dentry); + if (!(err = is_robranch_super(old_dentry->d_sb, bindex))) + err = vfs_unlink(hidden_wh_dir_dentry->d_inode, + hidden_wh_dentry); + + dput(hidden_wh_dentry); + unlock_dir(hidden_wh_dir_dentry); + if (err) + goto out; + } else + dput(hidden_wh_dentry); + + dget(hidden_old_dentry); + hidden_old_dir_dentry = dget_parent(hidden_old_dentry); + hidden_new_dir_dentry = dget_parent(hidden_new_dentry); + + lock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry); + + err = is_robranch_super(old_dentry->d_sb, bindex); + if (err) + goto out_unlock; + + /* + * ready to whiteout for old_dentry. caller will create the actual + * whiteout, and must dput(*wh_old) + */ + if (wh_old) { + char *whname; + whname = alloc_whname(old_dentry->d_name.name, + old_dentry->d_name.len); + err = PTR_ERR(whname); + if (IS_ERR(whname)) + goto out_unlock; + *wh_old = lookup_one_len(whname, hidden_old_dir_dentry, + old_dentry->d_name.len + + UNIONFS_WHLEN); + kfree(whname); + err = PTR_ERR(*wh_old); + if (IS_ERR(*wh_old)) { + *wh_old = NULL; + goto out_unlock; + } + } + + err = vfs_rename(hidden_old_dir_dentry->d_inode, hidden_old_dentry, + hidden_new_dir_dentry->d_inode, hidden_new_dentry); + +out_unlock: + unlock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry); + + dput(hidden_old_dir_dentry); + dput(hidden_new_dir_dentry); + dput(hidden_old_dentry); + +out: + if (!err) { + /* Fixup the new_dentry. */ + if (bindex < dbstart(new_dentry)) + set_dbstart(new_dentry, bindex); + else if (bindex > dbend(new_dentry)) + set_dbend(new_dentry, bindex); + } + + kfree(wh_name); + + return err; +} + +/* + * Main rename code. This is sufficienly complex, that it's documented in + * Docmentation/filesystems/unionfs/rename.txt. This routine calls + * __unionfs_rename() above to perform some of the work. + */ +static int do_unionfs_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry) +{ + int err = 0; + int bindex, bwh_old; + int old_bstart, old_bend; + int new_bstart, new_bend; + int do_copyup = -1; + struct dentry *parent_dentry; + int local_err = 0; + int eio = 0; + int revert = 0; + struct dentry *wh_old = NULL; + + old_bstart = dbstart(old_dentry); + bwh_old = old_bstart; + old_bend = dbend(old_dentry); + parent_dentry = old_dentry->d_parent; + + new_bstart = dbstart(new_dentry); + new_bend = dbend(new_dentry); + + /* Rename source to destination. */ + err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry, + old_bstart, &wh_old); + if (err) { + if (!IS_COPYUP_ERR(err)) + goto out; + do_copyup = old_bstart - 1; + } else + revert = 1; + + /* + * Unlink all instances of destination that exist to the left of + * bstart of source. On error, revert back, goto out. + */ + for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) { + struct dentry *unlink_dentry; + struct dentry *unlink_dir_dentry; + + unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); + if (!unlink_dentry) + continue; + + unlink_dir_dentry = lock_parent(unlink_dentry); + if (!(err = is_robranch_super(old_dir->i_sb, bindex))) + err = vfs_unlink(unlink_dir_dentry->d_inode, + unlink_dentry); + + fsstack_copy_attr_times(new_dentry->d_parent->d_inode, + unlink_dir_dentry->d_inode); + /* propagate number of hard-links */ + new_dentry->d_parent->d_inode->i_nlink = + unionfs_get_nlinks(new_dentry->d_parent->d_inode); + + unlock_dir(unlink_dir_dentry); + if (!err) { + if (bindex != new_bstart) { + dput(unlink_dentry); + unionfs_set_lower_dentry_idx(new_dentry, + bindex, NULL); + } + } else if (IS_COPYUP_ERR(err)) { + do_copyup = bindex - 1; + } else if (revert) { + dput(wh_old); + goto revert; + } + } + + if (do_copyup != -1) { + for (bindex = do_copyup; bindex >= 0; bindex--) { + /* + * copyup the file into some left directory, so that + * you can rename it + */ + err = copyup_dentry(old_dentry->d_parent->d_inode, + old_dentry, old_bstart, bindex, + NULL, old_dentry->d_inode->i_size); + if (!err) { + dput(wh_old); + bwh_old = bindex; + err = __unionfs_rename(old_dir, old_dentry, + new_dir, new_dentry, + bindex, &wh_old); + break; + } + } + } + + /* make it opaque */ + if (S_ISDIR(old_dentry->d_inode->i_mode)) { + err = make_dir_opaque(old_dentry, dbstart(old_dentry)); + if (err) + goto revert; + } + + /* + * Create whiteout for source, only if: + * (1) There is more than one underlying instance of source. + * (2) We did a copy_up + */ + if ((old_bstart != old_bend) || (do_copyup != -1)) { + struct dentry *hidden_parent; + BUG_ON(!wh_old || wh_old->d_inode || bwh_old < 0); + hidden_parent = lock_parent(wh_old); + local_err = vfs_create(hidden_parent->d_inode, wh_old, S_IRUGO, + NULL); + unlock_dir(hidden_parent); + if (!local_err) + set_dbopaque(old_dentry, bwh_old); + else { + /* + * we can't fix anything now, so we cop-out and use + * -EIO. + */ + printk(KERN_ERR "unionfs: can't create a whiteout for " + "the source in rename!\n"); + err = -EIO; + } + } + +out: + dput(wh_old); + return err; + +revert: + /* Do revert here. */ + local_err = unionfs_refresh_hidden_dentry(new_dentry, old_bstart); + if (local_err) { + printk(KERN_WARNING "unionfs: revert failed in rename: " + "the new refresh failed.\n"); + eio = -EIO; + } + + local_err = unionfs_refresh_hidden_dentry(old_dentry, old_bstart); + if (local_err) { + printk(KERN_WARNING "unionfs: revert failed in rename: " + "the old refresh failed.\n"); + eio = -EIO; + goto revert_out; + } + + if (!unionfs_lower_dentry_idx(new_dentry, bindex) || + !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) { + printk(KERN_WARNING "unionfs: revert failed in rename: " + "the object disappeared from under us!\n"); + eio = -EIO; + goto revert_out; + } + + if (unionfs_lower_dentry_idx(old_dentry, bindex) && + unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) { + printk(KERN_WARNING "unionfs: revert failed in rename: " + "the object was created underneath us!\n"); + eio = -EIO; + goto revert_out; + } + + local_err = __unionfs_rename(new_dir, new_dentry, + old_dir, old_dentry, old_bstart, NULL); + + /* If we can't fix it, then we cop-out with -EIO. */ + if (local_err) { + printk(KERN_WARNING "unionfs: revert failed in rename!\n"); + eio = -EIO; + } + + local_err = unionfs_refresh_hidden_dentry(new_dentry, bindex); + if (local_err) + eio = -EIO; + local_err = unionfs_refresh_hidden_dentry(old_dentry, bindex); + if (local_err) + eio = -EIO; + +revert_out: + if (eio) + err = eio; + return err; +} + +static struct dentry *lookup_whiteout(struct dentry *dentry) +{ + char *whname; + int bindex = -1, bstart = -1, bend = -1; + struct dentry *parent, *hidden_parent, *wh_dentry; + + whname = alloc_whname(dentry->d_name.name, dentry->d_name.len); + if (IS_ERR(whname)) + return (void *)whname; + + parent = dget_parent(dentry); + unionfs_lock_dentry(parent); + bstart = dbstart(parent); + bend = dbend(parent); + wh_dentry = ERR_PTR(-ENOENT); + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_parent = unionfs_lower_dentry_idx(parent, bindex); + if (!hidden_parent) + continue; + wh_dentry = lookup_one_len(whname, hidden_parent, + dentry->d_name.len + UNIONFS_WHLEN); + if (IS_ERR(wh_dentry)) + continue; + if (wh_dentry->d_inode) + break; + dput(wh_dentry); + wh_dentry = ERR_PTR(-ENOENT); + } + unionfs_unlock_dentry(parent); + dput(parent); + kfree(whname); + return wh_dentry; +} + +/* + * We can't copyup a directory, because it may involve huge numbers of + * children, etc. Doing that in the kernel would be bad, so instead we + * return EXDEV to the user-space utility that caused this, and let the + * user-space recurse and ask us to copy up each file separately. + */ +static int may_rename_dir(struct dentry *dentry) +{ + int err, bstart; + + err = check_empty(dentry, NULL); + if (err == -ENOTEMPTY) { + if (is_robranch(dentry)) + return -EXDEV; + } else if (err) + return err; + + bstart = dbstart(dentry); + if (dbend(dentry) == bstart || dbopaque(dentry) == bstart) + return 0; + + set_dbstart(dentry, bstart + 1); + err = check_empty(dentry, NULL); + set_dbstart(dentry, bstart); + if (err == -ENOTEMPTY) + err = -EXDEV; + return err; +} + +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int err = 0; + struct dentry *wh_dentry; + + unionfs_read_lock(old_dentry->d_sb); + unionfs_double_lock_dentry(old_dentry, new_dentry); + + if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) { + err = -ESTALE; + goto out; + } + if (!d_deleted(new_dentry) && new_dentry->d_inode && + !__unionfs_d_revalidate_chain(new_dentry, NULL)) { + err = -ESTALE; + goto out; + } + + if (!S_ISDIR(old_dentry->d_inode->i_mode)) + err = unionfs_partial_lookup(old_dentry); + else + err = may_rename_dir(old_dentry); + + if (err) + goto out; + + err = unionfs_partial_lookup(new_dentry); + if (err) + goto out; + + /* + * if new_dentry is already hidden because of whiteout, + * simply override it even if the whited-out dir is not empty. + */ + wh_dentry = lookup_whiteout(new_dentry); + if (!IS_ERR(wh_dentry)) + dput(wh_dentry); + else if (new_dentry->d_inode) { + if (S_ISDIR(old_dentry->d_inode->i_mode) != + S_ISDIR(new_dentry->d_inode->i_mode)) { + err = S_ISDIR(old_dentry->d_inode->i_mode) ? + -ENOTDIR : -EISDIR; + goto out; + } + + if (S_ISDIR(new_dentry->d_inode->i_mode)) { + struct unionfs_dir_state *namelist; + /* check if this unionfs directory is empty or not */ + err = check_empty(new_dentry, &namelist); + if (err) + goto out; + + if (!is_robranch(new_dentry)) + err = delete_whiteouts(new_dentry, + dbstart(new_dentry), + namelist); + + free_rdstate(namelist); + + if (err) + goto out; + } + } + err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry); + +out: + if (err) + /* clear the new_dentry stuff created */ + d_drop(new_dentry); + else + /* + * force re-lookup since the dir on ro branch is not renamed, + * and hidden dentries still indicate the un-renamed ones. + */ + if (S_ISDIR(old_dentry->d_inode->i_mode)) + atomic_dec(&UNIONFS_D(old_dentry)->generation); + + unionfs_unlock_dentry(new_dentry); + unionfs_unlock_dentry(old_dentry); + unionfs_read_unlock(old_dentry->d_sb); + return err; +} diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.c linux-2.6.22-591/fs/unionfs/sioq.c --- linux-2.6.22-570/fs/unionfs/sioq.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/sioq.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * Super-user IO work Queue - sometimes we need to perform actions which + * would fail due to the unix permissions on the parent directory (e.g., + * rmdir a directory which appears empty, but in reality contains + * whiteouts). + */ + +static struct workqueue_struct *superio_workqueue; + +int __init init_sioq(void) +{ + int err; + + superio_workqueue = create_workqueue("unionfs_siod"); + if (!IS_ERR(superio_workqueue)) + return 0; + + err = PTR_ERR(superio_workqueue); + printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err); + superio_workqueue = NULL; + return err; +} + +void stop_sioq(void) +{ + if (superio_workqueue) + destroy_workqueue(superio_workqueue); +} + +void run_sioq(work_func_t func, struct sioq_args *args) +{ + INIT_WORK(&args->work, func); + + init_completion(&args->comp); + while (!queue_work(superio_workqueue, &args->work)) { + /* TODO: do accounting if needed */ + schedule(); + } + wait_for_completion(&args->comp); +} + +void __unionfs_create(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + struct create_args *c = &args->create; + + args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd); + complete(&args->comp); +} + +void __unionfs_mkdir(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + struct mkdir_args *m = &args->mkdir; + + args->err = vfs_mkdir(m->parent, m->dentry, m->mode); + complete(&args->comp); +} + +void __unionfs_mknod(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + struct mknod_args *m = &args->mknod; + + args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev); + complete(&args->comp); +} + +void __unionfs_symlink(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + struct symlink_args *s = &args->symlink; + + args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode); + complete(&args->comp); +} + +void __unionfs_unlink(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + struct unlink_args *u = &args->unlink; + + args->err = vfs_unlink(u->parent, u->dentry); + complete(&args->comp); +} + +void __delete_whiteouts(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + struct deletewh_args *d = &args->deletewh; + + args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist); + complete(&args->comp); +} + +void __is_opaque_dir(struct work_struct *work) +{ + struct sioq_args *args = container_of(work, struct sioq_args, work); + + args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry, + sizeof(UNIONFS_DIR_OPAQUE) - 1); + complete(&args->comp); +} diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.h linux-2.6.22-591/fs/unionfs/sioq.h --- linux-2.6.22-570/fs/unionfs/sioq.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/sioq.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _SIOQ_H +#define _SIOQ_H + +struct deletewh_args { + struct unionfs_dir_state *namelist; + struct dentry *dentry; + int bindex; +}; + +struct is_opaque_args { + struct dentry *dentry; +}; + +struct create_args { + struct inode *parent; + struct dentry *dentry; + umode_t mode; + struct nameidata *nd; +}; + +struct mkdir_args { + struct inode *parent; + struct dentry *dentry; + umode_t mode; +}; + +struct mknod_args { + struct inode *parent; + struct dentry *dentry; + umode_t mode; + dev_t dev; +}; + +struct symlink_args { + struct inode *parent; + struct dentry *dentry; + char *symbuf; + umode_t mode; +}; + +struct unlink_args { + struct inode *parent; + struct dentry *dentry; +}; + + +struct sioq_args { + struct completion comp; + struct work_struct work; + int err; + void *ret; + + union { + struct deletewh_args deletewh; + struct is_opaque_args is_opaque; + struct create_args create; + struct mkdir_args mkdir; + struct mknod_args mknod; + struct symlink_args symlink; + struct unlink_args unlink; + }; +}; + +/* Extern definitions for SIOQ functions */ +extern int __init init_sioq(void); +extern void stop_sioq(void); +extern void run_sioq(work_func_t func, struct sioq_args *args); + +/* Extern definitions for our privilege escalation helpers */ +extern void __unionfs_create(struct work_struct *work); +extern void __unionfs_mkdir(struct work_struct *work); +extern void __unionfs_mknod(struct work_struct *work); +extern void __unionfs_symlink(struct work_struct *work); +extern void __unionfs_unlink(struct work_struct *work); +extern void __delete_whiteouts(struct work_struct *work); +extern void __is_opaque_dir(struct work_struct *work); + +#endif /* _SIOQ_H */ diff -Nurb linux-2.6.22-570/fs/unionfs/subr.c linux-2.6.22-591/fs/unionfs/subr.c --- linux-2.6.22-570/fs/unionfs/subr.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/subr.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * Pass an unionfs dentry and an index. It will try to create a whiteout + * for the filename in dentry, and will try in branch 'index'. On error, + * it will proceed to a branch to the left. + */ +int create_whiteout(struct dentry *dentry, int start) +{ + int bstart, bend, bindex; + struct dentry *hidden_dir_dentry; + struct dentry *hidden_dentry; + struct dentry *hidden_wh_dentry; + char *name = NULL; + int err = -EINVAL; + + verify_locked(dentry); + + bstart = dbstart(dentry); + bend = dbend(dentry); + + /* create dentry's whiteout equivalent */ + name = alloc_whname(dentry->d_name.name, dentry->d_name.len); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + + for (bindex = start; bindex >= 0; bindex--) { + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + + if (!hidden_dentry) { + /* + * if hidden dentry is not present, create the + * entire hidden dentry directory structure and go + * ahead. Since we want to just create whiteout, we + * only want the parent dentry, and hence get rid of + * this dentry. + */ + hidden_dentry = create_parents(dentry->d_inode, + dentry, bindex); + if (!hidden_dentry || IS_ERR(hidden_dentry)) { + printk(KERN_DEBUG "unionfs: create_parents " + "failed for bindex = %d\n", bindex); + continue; + } + } + + hidden_wh_dentry = + lookup_one_len(name, hidden_dentry->d_parent, + dentry->d_name.len + UNIONFS_WHLEN); + if (IS_ERR(hidden_wh_dentry)) + continue; + + /* + * The whiteout already exists. This used to be impossible, + * but now is possible because of opaqueness. + */ + if (hidden_wh_dentry->d_inode) { + dput(hidden_wh_dentry); + err = 0; + goto out; + } + + hidden_dir_dentry = lock_parent(hidden_wh_dentry); + if (!(err = is_robranch_super(dentry->d_sb, bindex))) + err = vfs_create(hidden_dir_dentry->d_inode, + hidden_wh_dentry, + ~current->fs->umask & S_IRWXUGO, + NULL); + unlock_dir(hidden_dir_dentry); + dput(hidden_wh_dentry); + + if (!err || !IS_COPYUP_ERR(err)) + break; + } + + /* set dbopaque so that lookup will not proceed after this branch */ + if (!err) + set_dbopaque(dentry, bindex); + +out: + kfree(name); + return err; +} + +/* + * This is a helper function for rename, which ends up with hosed over + * dentries when it needs to revert. + */ +int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex) +{ + struct dentry *hidden_dentry; + struct dentry *hidden_parent; + int err = 0; + + verify_locked(dentry); + + unionfs_lock_dentry(dentry->d_parent); + hidden_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex); + unionfs_unlock_dentry(dentry->d_parent); + + BUG_ON(!S_ISDIR(hidden_parent->d_inode->i_mode)); + + hidden_dentry = lookup_one_len(dentry->d_name.name, hidden_parent, + dentry->d_name.len); + if (IS_ERR(hidden_dentry)) { + err = PTR_ERR(hidden_dentry); + goto out; + } + + dput(unionfs_lower_dentry_idx(dentry, bindex)); + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex)); + unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL); + + if (!hidden_dentry->d_inode) { + dput(hidden_dentry); + unionfs_set_lower_dentry_idx(dentry, bindex, NULL); + } else { + unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry); + unionfs_set_lower_inode_idx(dentry->d_inode, bindex, + igrab(hidden_dentry->d_inode)); + } + +out: + return err; +} + +int make_dir_opaque(struct dentry *dentry, int bindex) +{ + int err = 0; + struct dentry *hidden_dentry, *diropq; + struct inode *hidden_dir; + + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + hidden_dir = hidden_dentry->d_inode; + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) || + !S_ISDIR(hidden_dir->i_mode)); + + mutex_lock(&hidden_dir->i_mutex); + diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry, + sizeof(UNIONFS_DIR_OPAQUE) - 1); + if (IS_ERR(diropq)) { + err = PTR_ERR(diropq); + goto out; + } + + if (!diropq->d_inode) + err = vfs_create(hidden_dir, diropq, S_IRUGO, NULL); + if (!err) + set_dbopaque(dentry, bindex); + + dput(diropq); + +out: + mutex_unlock(&hidden_dir->i_mutex); + return err; +} + +/* + * returns the sum of the n_link values of all the underlying inodes of the + * passed inode + */ +int unionfs_get_nlinks(struct inode *inode) +{ + int sum_nlinks = 0; + int dirs = 0; + int bindex; + struct inode *hidden_inode; + + /* don't bother to do all the work since we're unlinked */ + if (inode->i_nlink == 0) + return 0; + + if (!S_ISDIR(inode->i_mode)) + return unionfs_lower_inode(inode)->i_nlink; + + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { + hidden_inode = unionfs_lower_inode_idx(inode, bindex); + + /* ignore files */ + if (!hidden_inode || !S_ISDIR(hidden_inode->i_mode)) + continue; + + BUG_ON(hidden_inode->i_nlink < 0); + + /* A deleted directory. */ + if (hidden_inode->i_nlink == 0) + continue; + dirs++; + + /* + * A broken directory... + * + * Some filesystems don't properly set the number of links + * on empty directories + */ + if (hidden_inode->i_nlink == 1) + sum_nlinks += 2; + else + sum_nlinks += (hidden_inode->i_nlink - 2); + } + + return (!dirs ? 0 : sum_nlinks + 2); +} + +/* construct whiteout filename */ +char *alloc_whname(const char *name, int len) +{ + char *buf; + + buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + strcpy(buf, UNIONFS_WHPFX); + strlcat(buf, name, len + UNIONFS_WHLEN + 1); + + return buf; +} diff -Nurb linux-2.6.22-570/fs/unionfs/super.c linux-2.6.22-591/fs/unionfs/super.c --- linux-2.6.22-570/fs/unionfs/super.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/super.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *unionfs_inode_cachep; + +static void unionfs_read_inode(struct inode *inode) +{ + extern struct address_space_operations unionfs_aops; + int size; + struct unionfs_inode_info *info = UNIONFS_I(inode); + + unionfs_read_lock(inode->i_sb); + + memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode)); + info->bstart = -1; + info->bend = -1; + atomic_set(&info->generation, + atomic_read(&UNIONFS_SB(inode->i_sb)->generation)); + spin_lock_init(&info->rdlock); + info->rdcount = 1; + info->hashsize = -1; + INIT_LIST_HEAD(&info->readdircache); + + size = sbmax(inode->i_sb) * sizeof(struct inode *); + info->lower_inodes = kzalloc(size, GFP_KERNEL); + if (!info->lower_inodes) { + printk(KERN_ERR "unionfs: no kernel memory when allocating " + "lower-pointer array!\n"); + BUG(); + } + + inode->i_version++; + inode->i_op = &unionfs_main_iops; + inode->i_fop = &unionfs_main_fops; + + inode->i_mapping->a_ops = &unionfs_aops; + + unionfs_read_unlock(inode->i_sb); +} + +/* + * we now define delete_inode, because there are two VFS paths that may + * destroy an inode: one of them calls clear inode before doing everything + * else that's needed, and the other is fine. This way we truncate the inode + * size (and its pages) and then clear our own inode, which will do an iput + * on our and the lower inode. + * + * No need to lock sb info's rwsem. + */ +static void unionfs_delete_inode(struct inode *inode) +{ + inode->i_size = 0; /* every f/s seems to do that */ + + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + + clear_inode(inode); +} + +/* + * final actions when unmounting a file system + * + * No need to lock rwsem. + */ +static void unionfs_put_super(struct super_block *sb) +{ + int bindex, bstart, bend; + struct unionfs_sb_info *spd; + int leaks = 0; + + spd = UNIONFS_SB(sb); + if (!spd) + return; + + bstart = sbstart(sb); + bend = sbend(sb); + + /* Make sure we have no leaks of branchget/branchput. */ + for (bindex = bstart; bindex <= bend; bindex++) + if (branch_count(sb, bindex) != 0) { + printk("unionfs: branch %d has %d references left!\n", + bindex, branch_count(sb, bindex)); + leaks = 1; + } + BUG_ON(leaks != 0); + + kfree(spd->data); + kfree(spd); + sb->s_fs_info = NULL; +} + +/* + * Since people use this to answer the "How big of a file can I write?" + * question, we report the size of the highest priority branch as the size of + * the union. + */ +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err = 0; + struct super_block *sb; + struct dentry *lower_dentry; + + sb = dentry->d_sb; + + unionfs_read_lock(sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + lower_dentry = unionfs_lower_dentry(sb->s_root); + err = vfs_statfs(lower_dentry, buf); + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = UNIONFS_SUPER_MAGIC; + + /* + * Our maximum file name can is shorter by a few bytes because every + * file name could potentially be whited-out. + */ + buf->f_namelen -= UNIONFS_WHLEN; + + memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t)); + memset(&buf->f_spare, 0, sizeof(buf->f_spare)); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(sb); + return err; +} + +/* handle mode changing during remount */ +static noinline int do_remount_mode_option(char *optarg, int cur_branches, + struct unionfs_data *new_data, + struct path *new_lower_paths) +{ + int err = -EINVAL; + int perms, idx; + char *modename = strchr(optarg, '='); + struct nameidata nd; + + /* by now, optarg contains the branch name */ + if (!*optarg) { + printk("unionfs: no branch specified for mode change.\n"); + goto out; + } + if (!modename) { + printk("unionfs: branch \"%s\" requires a mode.\n", optarg); + goto out; + } + *modename++ = '\0'; + perms = __parse_branch_mode(modename); + if (perms == 0) { + printk("unionfs: invalid mode \"%s\" for \"%s\".\n", + modename, optarg); + goto out; + } + + /* + * Find matching branch index. For now, this assumes that nothing + * has been mounted on top of this Unionfs stack. Once we have /odf + * and cache-coherency resolved, we'll address the branch-path + * uniqueness. + */ + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_WARNING "unionfs: error accessing " + "hidden directory \"%s\" (error %d)\n", + optarg, err); + goto out; + } + for (idx=0; idx 0) { + err = -EBUSY; + goto out; + } + + /* + * Now we have to delete the branch. First, release any handles it + * has. Then, move the remaining array indexes past "idx" in + * new_data and new_lower_paths one to the left. Finally, adjust + * cur_branches. + */ + pathput(&new_lower_paths[idx]); + + if (idx < cur_branches - 1) { + /* if idx==cur_branches-1, we delete last branch: easy */ + memmove(&new_data[idx], &new_data[idx+1], + (cur_branches - 1 - idx) * + sizeof(struct unionfs_data)); + memmove(&new_lower_paths[idx], &new_lower_paths[idx+1], + (cur_branches - 1 - idx) * sizeof(struct path)); + } + + err = 0; +out: + return err; +} + +/* handle branch insertion during remount */ +static noinline int do_remount_add_option(char *optarg, int cur_branches, + struct unionfs_data *new_data, + struct path *new_lower_paths, + int *high_branch_id) +{ + int err = -EINVAL; + int perms; + int idx = 0; /* default: insert at beginning */ + char *new_branch , *modename = NULL; + struct nameidata nd; + + /* + * optarg can be of several forms: + * + * /bar:/foo insert /foo before /bar + * /bar:/foo=ro insert /foo in ro mode before /bar + * /foo insert /foo in the beginning (prepend) + * :/foo insert /foo at the end (append) + */ + if (*optarg == ':') { /* append? */ + new_branch = optarg + 1; /* skip ':' */ + idx = cur_branches; + goto found_insertion_point; + } + new_branch = strchr(optarg, ':'); + if (!new_branch) { /* prepend? */ + new_branch = optarg; + goto found_insertion_point; + } + *new_branch++ = '\0'; /* holds path+mode of new branch */ + + /* + * Find matching branch index. For now, this assumes that nothing + * has been mounted on top of this Unionfs stack. Once we have /odf + * and cache-coherency resolved, we'll address the branch-path + * uniqueness. + */ + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_WARNING "unionfs: error accessing " + "hidden directory \"%s\" (error %d)\n", + optarg, err); + goto out; + } + for (idx=0; idx < cur_branches; idx++) + if (nd.mnt == new_lower_paths[idx].mnt && + nd.dentry == new_lower_paths[idx].dentry) + break; + path_release(&nd); /* no longer needed */ + if (idx == cur_branches) { + printk(KERN_WARNING "unionfs: branch \"%s\" " + "not found\n", optarg); + err = -ENOENT; + goto out; + } + + /* + * At this point idx will hold the index where the new branch should + * be inserted before. + */ +found_insertion_point: + /* find the mode for the new branch */ + if (new_branch) + modename = strchr(new_branch, '='); + if (modename) + *modename++ = '\0'; + perms = parse_branch_mode(modename); + + if (!new_branch || !*new_branch) { + printk(KERN_WARNING "unionfs: null new branch\n"); + err = -EINVAL; + goto out; + } + err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_WARNING "unionfs: error accessing " + "hidden directory \"%s\" (error %d)\n", + new_branch, err); + goto out; + } + /* + * It's probably safe to check_mode the new branch to insert. Note: + * we don't allow inserting branches which are unionfs's by + * themselves (check_branch returns EINVAL in that case). This is + * because this code base doesn't support stacking unionfs: the ODF + * code base supports that correctly. + */ + if ((err = check_branch(&nd))) { + printk(KERN_WARNING "unionfs: hidden directory " + "\"%s\" is not a valid branch\n", optarg); + path_release(&nd); + goto out; + } + + /* + * Now we have to insert the new branch. But first, move the bits + * to make space for the new branch, if needed. Finally, adjust + * cur_branches. + * We don't release nd here; it's kept until umount/remount. + */ + if (idx < cur_branches) { + /* if idx==cur_branches, we append: easy */ + memmove(&new_data[idx+1], &new_data[idx], + (cur_branches - idx) * sizeof(struct unionfs_data)); + memmove(&new_lower_paths[idx+1], &new_lower_paths[idx], + (cur_branches - idx) * sizeof(struct path)); + } + new_lower_paths[idx].dentry = nd.dentry; + new_lower_paths[idx].mnt = nd.mnt; + + new_data[idx].sb = nd.dentry->d_sb; + atomic_set(&new_data[idx].open_files, 0); + new_data[idx].branchperms = perms; + new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */ + + err = 0; +out: + return err; +} + + +/* + * Support branch management options on remount. + * + * See Documentation/filesystems/unionfs/ for details. + * + * @flags: numeric mount options + * @options: mount options string + * + * This function can rearrange a mounted union dynamically, adding and + * removing branches, including changing branch modes. Clearly this has to + * be done safely and atomically. Luckily, the VFS already calls this + * function with lock_super(sb) and lock_kernel() held, preventing + * concurrent mixing of new mounts, remounts, and unmounts. Moreover, + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb) + * to purge dentries/inodes from our superblock, and also called + * fsync_super(sb) to purge any dirty pages. So we're good. + * + * XXX: however, our remount code may also need to invalidate mapped pages + * so as to force them to be re-gotten from the (newly reconfigured) lower + * branches. This has to wait for proper mmap and cache coherency support + * in the VFS. + * + */ +static int unionfs_remount_fs(struct super_block *sb, int *flags, + char *options) +{ + int err = 0; + int i; + char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */ + char *optname; + int cur_branches = 0; /* no. of current branches */ + int new_branches = 0; /* no. of branches actually left in the end */ + int add_branches; /* est. no. of branches to add */ + int del_branches; /* est. no. of branches to del */ + int max_branches; /* max possible no. of branches */ + struct unionfs_data *new_data = NULL, *tmp_data = NULL; + struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL; + struct inode **new_lower_inodes = NULL; + int new_high_branch_id; /* new high branch ID */ + int size; /* memory allocation size, temp var */ + int old_ibstart, old_ibend; + + unionfs_write_lock(sb); + + /* + * The VFS will take care of "ro" and "rw" flags, and we can safely + * ignore MS_SILENT, but anything else left over is an error. So we + * need to check if any other flags may have been passed (none are + * allowed/supported as of now). + */ + if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) { + printk(KERN_WARNING + "unionfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + goto out_error; + } + + /* + * If 'options' is NULL, it's probably because the user just changed + * the union to a "ro" or "rw" and the VFS took care of it. So + * nothing to do and we're done. + */ + if (!options || options[0] == '\0') + goto out_error; + + /* + * Find out how many branches we will have in the end, counting + * "add" and "del" commands. Copy the "options" string because + * strsep modifies the string and we need it later. + */ + optionstmp = tmp_to_free = kstrdup(options, GFP_KERNEL); + if (!optionstmp) { + err = -ENOMEM; + goto out_free; + } + new_branches = cur_branches = sbmax(sb); /* current no. branches */ + add_branches = del_branches = 0; + new_high_branch_id = sbhbid(sb); /* save current high_branch_id */ + while ((optname = strsep(&optionstmp, ",")) != NULL) { + char *optarg; + + if (!optname || !*optname) + continue; + + optarg = strchr(optname, '='); + if (optarg) + *optarg++ = '\0'; + + if (!strcmp("add", optname)) + add_branches++; + else if (!strcmp("del", optname)) + del_branches++; + } + kfree(tmp_to_free); + /* after all changes, will we have at least one branch left? */ + if ((new_branches + add_branches - del_branches) < 1) { + printk(KERN_WARNING + "unionfs: no branches left after remount\n"); + err = -EINVAL; + goto out_free; + } + + /* + * Since we haven't actually parsed all the add/del options, nor + * have we checked them for errors, we don't know for sure how many + * branches we will have after all changes have taken place. In + * fact, the total number of branches left could be less than what + * we have now. So we need to allocate space for a temporary + * placeholder that is at least as large as the maximum number of + * branches we *could* have, which is the current number plus all + * the additions. Once we're done with these temp placeholders, we + * may have to re-allocate the final size, copy over from the temp, + * and then free the temps (done near the end of this function). + */ + max_branches = cur_branches + add_branches; + /* allocate space for new pointers to hidden dentry */ + tmp_data = kcalloc(max_branches, + sizeof(struct unionfs_data), GFP_KERNEL); + if (!tmp_data) { + err = -ENOMEM; + goto out_free; + } + /* allocate space for new pointers to lower paths */ + tmp_lower_paths = kcalloc(max_branches, + sizeof(struct path), GFP_KERNEL); + if (!tmp_lower_paths) { + err = -ENOMEM; + goto out_free; + } + /* copy current info into new placeholders, incrementing refcnts */ + memcpy(tmp_data, UNIONFS_SB(sb)->data, + cur_branches * sizeof(struct unionfs_data)); + memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths, + cur_branches * sizeof(struct path)); + for (i=0; i UNIONFS_MAX_BRANCHES) { + printk("unionfs: command exceeds " + "%d branches\n", UNIONFS_MAX_BRANCHES); + err = -E2BIG; + goto out_release; + } + continue; + } + if (!strcmp("del", optname)) { + err = do_remount_del_option(optarg, new_branches, + tmp_data, + tmp_lower_paths); + if (err) + goto out_release; + new_branches--; + continue; + } + if (!strcmp("mode", optname)) { + err = do_remount_mode_option(optarg, new_branches, + tmp_data, + tmp_lower_paths); + if (err) + goto out_release; + continue; + } + + /* + * When you use "mount -o remount,ro", mount(8) will + * reportedly pass the original dirs= string from + * /proc/mounts. So for now, we have to ignore dirs= and + * not consider it an error, unless we want to allow users + * to pass dirs= in remount. Note that to allow the VFS to + * actually process the ro/rw remount options, we have to + * return 0 from this function. + */ + if (!strcmp("dirs", optname)) { + printk(KERN_WARNING + "unionfs: remount ignoring option \"%s\".\n", + optname); + continue; + } + + err = -EINVAL; + printk(KERN_WARNING + "unionfs: unrecognized option \"%s\"\n", optname); + goto out_release; + } + +out_no_change: + + /****************************************************************** + * WE'RE ALMOST DONE: check if leftmost branch might be read-only, + * see if we need to allocate a small-sized new vector, copy the + * vectors to their correct place, release the refcnt of the older + * ones, and return. Also handle invalidating any pages that will + * have to be re-read. + *******************************************************************/ + + if (!(tmp_data[0].branchperms & MAY_WRITE)) { + printk("unionfs: leftmost branch cannot be read-only " + "(use \"remount,ro\" to create a read-only union)\n"); + err = -EINVAL; + goto out_release; + } + + /* (re)allocate space for new pointers to hidden dentry */ + size = new_branches * sizeof(struct unionfs_data); + new_data = krealloc(tmp_data, size, GFP_KERNEL); + if (!new_data) { + err = -ENOMEM; + goto out_release; + } + + /* allocate space for new pointers to lower paths */ + size = new_branches * sizeof(struct path); + new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL); + if (!new_lower_paths) { + err = -ENOMEM; + goto out_release; + } + + /* allocate space for new pointers to lower inodes */ + new_lower_inodes = kcalloc(new_branches, + sizeof(struct inode *), GFP_KERNEL); + if (!new_lower_inodes) { + err = -ENOMEM; + goto out_release; + } + + /* + * OK, just before we actually put the new set of branches in place, + * we need to ensure that our own f/s has no dirty objects left. + * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and + * fsync_super(sb), taking care of dentries, inodes, and dirty + * pages. So all that's left is for us to invalidate any leftover + * (non-dirty) pages to ensure that they will be re-read from the + * new lower branches (and to support mmap). + */ + + /* + * No we call drop_pagecache_sb() to invalidate all pages in this + * super. This function calls invalidate_inode_pages(mapping), + * which calls invalidate_mapping_pages(): the latter, however, will + * not invalidate pages which are dirty, locked, under writeback, or + * mapped into page tables. We shouldn't have to worry about dirty + * or under-writeback pages, because do_remount_sb() called + * fsync_super() which would not have returned until all dirty pages + * were flushed. + * + * But do we have to worry about locked pages? Is there any chance + * that in here we'll get locked pages? + * + * XXX: what about pages mapped into pagetables? Are these pages + * which user processes may have mmap(2)'ed? If so, then we need to + * invalidate those too, no? Maybe we'll have to write our own + * version of invalidate_mapping_pages() which also handled mapped + * pages. + * + * XXX: Alternatively, maybe we should call truncate_inode_pages(), + * which use two passes over the pages list, and will truncate all + * pages. + */ + drop_pagecache_sb(sb); + + /* copy new vectors into their correct place */ + tmp_data = UNIONFS_SB(sb)->data; + UNIONFS_SB(sb)->data = new_data; + new_data = NULL; /* so don't free good pointers below */ + tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths; + UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths; + new_lower_paths = NULL; /* so don't free good pointers below */ + + /* update our unionfs_sb_info and root dentry index of last branch */ + i = sbmax(sb); /* save no. of branches to release at end */ + sbend(sb) = new_branches - 1; + set_dbend(sb->s_root, new_branches - 1); + old_ibstart = ibstart(sb->s_root->d_inode); + old_ibend = ibend(sb->s_root->d_inode); + ibend(sb->s_root->d_inode) = new_branches - 1; + UNIONFS_D(sb->s_root)->bcount = new_branches; + new_branches = i; /* no. of branches to release below */ + + /* + * Update lower inodes: 3 steps + * 1. grab ref on all new lower inodes + */ + for (i=dbstart(sb->s_root); i<=dbend(sb->s_root); i++) { + struct dentry *lower_dentry = + unionfs_lower_dentry_idx(sb->s_root, i); + atomic_inc(&lower_dentry->d_inode->i_count); + new_lower_inodes[i] = lower_dentry->d_inode; + } + /* 2. release reference on all older lower inodes */ + for (i=old_ibstart; i<=old_ibend; i++) { + iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i)); + unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL); + } + kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes); + /* 3. update root dentry's inode to new lower_inodes array */ + UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes; + new_lower_inodes = NULL; + + /* maxbytes may have changed */ + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; + /* update high branch ID */ + sbhbid(sb) = new_high_branch_id; + + /* update our sb->generation for revalidating objects */ + i = atomic_inc_return(&UNIONFS_SB(sb)->generation); + atomic_set(&UNIONFS_D(sb->s_root)->generation, i); + atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i); + + err = 0; /* reset to success */ + + if (!(*flags & MS_SILENT)) + printk("unionfs: new generation number %d\n", i); + + /* + * The code above falls through to the next label, and releases the + * refcnts of the older ones (stored in tmp_*): if we fell through + * here, it means success. However, if we jump directly to this + * label from any error above, then an error occurred after we + * grabbed various refcnts, and so we have to release the + * temporarily constructed structures. + */ +out_release: + /* no need to cleanup/release anything in tmp_data */ + if (tmp_lower_paths) + for (i=0; ireaddircache) { + rdstate = list_entry(pos, struct unionfs_dir_state, cache); + list_del(&rdstate->cache); + free_rdstate(rdstate); + } + + /* + * Decrement a reference to a hidden_inode, which was incremented + * by our read_inode when it was created initially. + */ + bstart = ibstart(inode); + bend = ibend(inode); + if (bstart >= 0) { + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_inode = unionfs_lower_inode_idx(inode, bindex); + if (!hidden_inode) + continue; + iput(hidden_inode); + } + } + + kfree(UNIONFS_I(inode)->lower_inodes); + UNIONFS_I(inode)->lower_inodes = NULL; +} + +static struct inode *unionfs_alloc_inode(struct super_block *sb) +{ + struct unionfs_inode_info *i; + + i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL); + if (!i) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode)); + + i->vfs_inode.i_version = 1; + return &i->vfs_inode; +} + +static void unionfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode)); +} + +/* unionfs inode cache constructor */ +static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags) +{ + struct unionfs_inode_info *i = v; + + inode_init_once(&i->vfs_inode); +} + +int unionfs_init_inode_cache(void) +{ + int err = 0; + + unionfs_inode_cachep = + kmem_cache_create("unionfs_inode_cache", + sizeof(struct unionfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once, NULL); + if (!unionfs_inode_cachep) + err = -ENOMEM; + return err; +} + +/* unionfs inode cache destructor */ +void unionfs_destroy_inode_cache(void) +{ + if (unionfs_inode_cachep) + kmem_cache_destroy(unionfs_inode_cachep); +} + +/* + * Called when we have a dirty inode, right here we only throw out + * parts of our readdir list that are too old. + * + * No need to grab sb info's rwsem. + */ +static int unionfs_write_inode(struct inode *inode, int sync) +{ + struct list_head *pos, *n; + struct unionfs_dir_state *rdstate; + + spin_lock(&UNIONFS_I(inode)->rdlock); + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { + rdstate = list_entry(pos, struct unionfs_dir_state, cache); + /* We keep this list in LRU order. */ + if ((rdstate->access + RDCACHE_JIFFIES) > jiffies) + break; + UNIONFS_I(inode)->rdcount--; + list_del(&rdstate->cache); + free_rdstate(rdstate); + } + spin_unlock(&UNIONFS_I(inode)->rdlock); + + return 0; +} + +/* + * Used only in nfs, to kill any pending RPC tasks, so that subsequent + * code can actually succeed and won't leave tasks that need handling. + */ +static void unionfs_umount_begin(struct vfsmount *mnt, int flags) +{ + struct super_block *sb, *hidden_sb; + struct vfsmount *hidden_mnt; + int bindex, bstart, bend; + + if (!(flags & MNT_FORCE)) + /* + * we are not being MNT_FORCE'd, therefore we should emulate + * old behavior + */ + return; + + sb = mnt->mnt_sb; + + unionfs_read_lock(sb); + + bstart = sbstart(sb); + bend = sbend(sb); + for (bindex = bstart; bindex <= bend; bindex++) { + hidden_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex); + hidden_sb = unionfs_lower_super_idx(sb, bindex); + + if (hidden_mnt && hidden_sb && hidden_sb->s_op && + hidden_sb->s_op->umount_begin) + hidden_sb->s_op->umount_begin(hidden_mnt, flags); + } + + unionfs_read_unlock(sb); +} + +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct super_block *sb = mnt->mnt_sb; + int ret = 0; + char *tmp_page; + char *path; + int bindex, bstart, bend; + int perms; + + unionfs_read_lock(sb); + + unionfs_lock_dentry(sb->s_root); + + tmp_page = (char*) __get_free_page(GFP_KERNEL); + if (!tmp_page) { + ret = -ENOMEM; + goto out; + } + + bstart = sbstart(sb); + bend = sbend(sb); + + seq_printf(m, ",dirs="); + for (bindex = bstart; bindex <= bend; bindex++) { + path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex), + unionfs_lower_mnt_idx(sb->s_root, bindex), + tmp_page, PAGE_SIZE); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + goto out; + } + + perms = branchperms(sb, bindex); + + seq_printf(m, "%s=%s", path, + perms & MAY_WRITE ? "rw" : "ro"); + if (bindex != bend) + seq_printf(m, ":"); + } + +out: + free_page((unsigned long) tmp_page); + + unionfs_unlock_dentry(sb->s_root); + + unionfs_read_unlock(sb); + + return ret; +} + +struct super_operations unionfs_sops = { + .read_inode = unionfs_read_inode, + .delete_inode = unionfs_delete_inode, + .put_super = unionfs_put_super, + .statfs = unionfs_statfs, + .remount_fs = unionfs_remount_fs, + .clear_inode = unionfs_clear_inode, + .umount_begin = unionfs_umount_begin, + .show_options = unionfs_show_options, + .write_inode = unionfs_write_inode, + .alloc_inode = unionfs_alloc_inode, + .destroy_inode = unionfs_destroy_inode, +}; diff -Nurb linux-2.6.22-570/fs/unionfs/union.h linux-2.6.22-591/fs/unionfs/union.h --- linux-2.6.22-570/fs/unionfs/union.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/union.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _UNION_H_ +#define _UNION_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +/* the file system name */ +#define UNIONFS_NAME "unionfs" + +/* unionfs root inode number */ +#define UNIONFS_ROOT_INO 1 + +/* number of times we try to get a unique temporary file name */ +#define GET_TMPNAM_MAX_RETRY 5 + +/* maximum number of branches we support, to avoid memory blowup */ +#define UNIONFS_MAX_BRANCHES 128 + +/* Operations vectors defined in specific files. */ +extern struct file_operations unionfs_main_fops; +extern struct file_operations unionfs_dir_fops; +extern struct inode_operations unionfs_main_iops; +extern struct inode_operations unionfs_dir_iops; +extern struct inode_operations unionfs_symlink_iops; +extern struct super_operations unionfs_sops; +extern struct dentry_operations unionfs_dops; + +/* How long should an entry be allowed to persist */ +#define RDCACHE_JIFFIES (5*HZ) + +/* file private data. */ +struct unionfs_file_info { + int bstart; + int bend; + atomic_t generation; + + struct unionfs_dir_state *rdstate; + struct file **lower_files; + int *saved_branch_ids; /* IDs of branches when file was opened */ +}; + +/* unionfs inode data in memory */ +struct unionfs_inode_info { + int bstart; + int bend; + atomic_t generation; + int stale; + /* Stuff for readdir over NFS. */ + spinlock_t rdlock; + struct list_head readdircache; + int rdcount; + int hashsize; + int cookie; + + /* The hidden inodes */ + struct inode **lower_inodes; + /* to keep track of reads/writes for unlinks before closes */ + atomic_t totalopens; + + struct inode vfs_inode; +}; + +/* unionfs dentry data in memory */ +struct unionfs_dentry_info { + /* + * The semaphore is used to lock the dentry as soon as we get into a + * unionfs function from the VFS. Our lock ordering is that children + * go before their parents. + */ + struct mutex lock; + int bstart; + int bend; + int bopaque; + int bcount; + atomic_t generation; + struct path *lower_paths; +}; + +/* These are the pointers to our various objects. */ +struct unionfs_data { + struct super_block *sb; + atomic_t open_files; /* number of open files on branch */ + int branchperms; + int branch_id; /* unique branch ID at re/mount time */ +}; + +/* unionfs super-block data in memory */ +struct unionfs_sb_info { + int bend; + + atomic_t generation; + + /* + * This rwsem is used to make sure that a branch management + * operation... + * 1) will not begin before all currently in-flight operations + * complete + * 2) any new operations do not execute until the currently + * running branch management operation completes + */ + struct rw_semaphore rwsem; + int high_branch_id; /* last unique branch ID given */ + struct unionfs_data *data; +}; + +/* + * structure for making the linked list of entries by readdir on left branch + * to compare with entries on right branch + */ +struct filldir_node { + struct list_head file_list; /* list for directory entries */ + char *name; /* name entry */ + int hash; /* name hash */ + int namelen; /* name len since name is not 0 terminated */ + + /* + * we can check for duplicate whiteouts and files in the same branch + * in order to return -EIO. + */ + int bindex; + + /* is this a whiteout entry? */ + int whiteout; + + /* Inline name, so we don't need to separately kmalloc small ones */ + char iname[DNAME_INLINE_LEN_MIN]; +}; + +/* Directory hash table. */ +struct unionfs_dir_state { + unsigned int cookie; /* the cookie, based off of rdversion */ + unsigned int offset; /* The entry we have returned. */ + int bindex; + loff_t dirpos; /* offset within the lower level directory */ + int size; /* How big is the hash table? */ + int hashentries; /* How many entries have been inserted? */ + unsigned long access; + + /* This cache list is used when the inode keeps us around. */ + struct list_head cache; + struct list_head list[0]; +}; + +/* include miscellaneous macros */ +#include "fanout.h" +#include "sioq.h" + +/* externs for cache creation/deletion routines */ +extern void unionfs_destroy_filldir_cache(void); +extern int unionfs_init_filldir_cache(void); +extern int unionfs_init_inode_cache(void); +extern void unionfs_destroy_inode_cache(void); +extern int unionfs_init_dentry_cache(void); +extern void unionfs_destroy_dentry_cache(void); + +/* Initialize and free readdir-specific state. */ +extern int init_rdstate(struct file *file); +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex); +extern struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos); +extern void free_rdstate(struct unionfs_dir_state *state); +extern int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name, + int namelen, int bindex, int whiteout); +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, + const char *name, int namelen); + +extern struct dentry **alloc_new_dentries(int objs); +extern struct unionfs_data *alloc_new_data(int objs); + +/* We can only use 32-bits of offset for rdstate --- blech! */ +#define DIREOF (0xfffff) +#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */ +#define MAXRDCOOKIE (0xfff) +/* Turn an rdstate into an offset. */ +static inline off_t rdstate2offset(struct unionfs_dir_state *buf) +{ + off_t tmp; + + tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS) + | (buf->offset & DIREOF); + return tmp; +} + +#define unionfs_read_lock(sb) down_read(&UNIONFS_SB(sb)->rwsem) +#define unionfs_read_unlock(sb) up_read(&UNIONFS_SB(sb)->rwsem) +#define unionfs_write_lock(sb) down_write(&UNIONFS_SB(sb)->rwsem) +#define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem) + +static inline void unionfs_double_lock_dentry(struct dentry *d1, + struct dentry *d2) +{ + if (d2 < d1) { + struct dentry *tmp = d1; + d1 = d2; + d2 = tmp; + } + unionfs_lock_dentry(d1); + unionfs_lock_dentry(d2); +} + +extern int realloc_dentry_private_data(struct dentry *dentry); +extern int new_dentry_private_data(struct dentry *dentry); +extern void free_dentry_private_data(struct dentry *dentry); +extern void update_bstart(struct dentry *dentry); + +/* + * EXTERNALS: + */ + +/* replicates the directory structure up to given dentry in given branch */ +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry, + int bindex); +extern int make_dir_opaque(struct dentry *dir, int bindex); + +/* partial lookup */ +extern int unionfs_partial_lookup(struct dentry *dentry); + +/* + * Pass an unionfs dentry and an index and it will try to create a whiteout + * in branch 'index'. + * + * On error, it will proceed to a branch to the left + */ +extern int create_whiteout(struct dentry *dentry, int start); +/* copies a file from dbstart to newbindex branch */ +extern int copyup_file(struct inode *dir, struct file *file, int bstart, + int newbindex, loff_t size); +extern int copyup_named_file(struct inode *dir, struct file *file, + char *name, int bstart, int new_bindex, + loff_t len); +/* copies a dentry from dbstart to newbindex branch */ +extern int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart, + int new_bindex, struct file **copyup_file, + loff_t len); + +extern int remove_whiteouts(struct dentry *dentry, + struct dentry *hidden_dentry, int bindex); + +extern int do_delete_whiteouts(struct dentry *dentry, int bindex, + struct unionfs_dir_state *namelist); + +extern int unionfs_get_nlinks(struct inode *inode); + +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */ +extern int check_empty(struct dentry *dentry, + struct unionfs_dir_state **namelist); +/* Delete whiteouts from this directory in branch bindex. */ +extern int delete_whiteouts(struct dentry *dentry, int bindex, + struct unionfs_dir_state *namelist); + +/* Re-lookup a hidden dentry. */ +extern int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex); + +extern void unionfs_reinterpose(struct dentry *this_dentry); +extern struct super_block *unionfs_duplicate_super(struct super_block *sb); + +/* Locking functions. */ +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl); +extern int unionfs_getlk(struct file *file, struct file_lock *fl); + +/* Common file operations. */ +extern int unionfs_file_revalidate(struct file *file, int willwrite); +extern int unionfs_open(struct inode *inode, struct file *file); +extern int unionfs_file_release(struct inode *inode, struct file *file); +extern int unionfs_flush(struct file *file, fl_owner_t id); +extern long unionfs_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); + +/* Inode operations */ +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry); +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry); + +extern int __unionfs_d_revalidate_chain(struct dentry *dentry, + struct nameidata *nd); + +/* The values for unionfs_interpose's flag. */ +#define INTERPOSE_DEFAULT 0 +#define INTERPOSE_LOOKUP 1 +#define INTERPOSE_REVAL 2 +#define INTERPOSE_REVAL_NEG 3 +#define INTERPOSE_PARTIAL 4 + +extern int unionfs_interpose(struct dentry *this_dentry, + struct super_block *sb, int flag); + +#ifdef CONFIG_UNION_FS_XATTR +/* Extended attribute functions. */ +extern void *unionfs_xattr_alloc(size_t size, size_t limit); +extern void unionfs_xattr_free(void *ptr, size_t size); + +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size); +extern int unionfs_removexattr(struct dentry *dentry, const char *name); +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list, + size_t size); +extern int unionfs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +#endif /* CONFIG_UNION_FS_XATTR */ + +/* The root directory is unhashed, but isn't deleted. */ +static inline int d_deleted(struct dentry *d) +{ + return d_unhashed(d) && (d != d->d_sb->s_root); +} + +struct dentry *unionfs_lookup_backend(struct dentry *dentry, + struct nameidata *nd, int lookupmode); + +/* unionfs_permission, check if we should bypass error to facilitate copyup */ +#define IS_COPYUP_ERR(err) ((err) == -EROFS) + +/* unionfs_open, check if we need to copyup the file */ +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND) +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS) + +static inline int branchperms(const struct super_block *sb, int index) +{ + BUG_ON(index < 0); + + return UNIONFS_SB(sb)->data[index].branchperms; +} + +static inline int set_branchperms(struct super_block *sb, int index, int perms) +{ + BUG_ON(index < 0); + + UNIONFS_SB(sb)->data[index].branchperms = perms; + + return perms; +} + +/* Is this file on a read-only branch? */ +static inline int is_robranch_super(const struct super_block *sb, int index) +{ + int ret; + + ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0; + return ret; +} + +/* Is this file on a read-only branch? */ +static inline int is_robranch_idx(const struct dentry *dentry, int index) +{ + int err = 0; + + BUG_ON(index < 0); + + if ((!(branchperms(dentry->d_sb, index) & MAY_WRITE)) || + IS_RDONLY(unionfs_lower_dentry_idx(dentry, index)->d_inode)) + err = -EROFS; + return err; +} + +static inline int is_robranch(const struct dentry *dentry) +{ + int index; + + index = UNIONFS_D(dentry)->bstart; + BUG_ON(index < 0); + + return is_robranch_idx(dentry, index); +} + +/* What do we use for whiteouts. */ +#define UNIONFS_WHPFX ".wh." +#define UNIONFS_WHLEN 4 +/* + * If a directory contains this file, then it is opaque. We start with the + * .wh. flag so that it is blocked by lookup. + */ +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque" +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME + +#ifndef DEFAULT_POLLMASK +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) +#endif + +/* + * EXTERNALS: + */ +extern char *alloc_whname(const char *name, int len); +extern int check_branch(struct nameidata *nd); +extern int __parse_branch_mode(const char *name); +extern int parse_branch_mode(const char *name); + +/* + * These two functions are here because it is kind of daft to copy and paste + * the contents of the two functions to 32+ places in unionfs + */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget(dentry->d_parent); + + mutex_lock(&dir->d_inode->i_mutex); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); +} + +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry, + int bindex) +{ + BUG_ON(!dentry || bindex < 0); + + return mntget(unionfs_lower_mnt_idx(dentry, bindex)); +} + +static inline void unionfs_mntput(struct dentry *dentry, int bindex) +{ + if (!dentry) + return; + + BUG_ON(bindex < 0); + + mntput(unionfs_lower_mnt_idx(dentry, bindex)); +} +#endif /* not _UNION_H_ */ diff -Nurb linux-2.6.22-570/fs/unionfs/unlink.c linux-2.6.22-591/fs/unionfs/unlink.c --- linux-2.6.22-570/fs/unionfs/unlink.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/unlink.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* unlink a file by creating a whiteout */ +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry) +{ + struct dentry *hidden_dentry; + struct dentry *hidden_dir_dentry; + int bindex; + int err = 0; + + if ((err = unionfs_partial_lookup(dentry))) + goto out; + + bindex = dbstart(dentry); + + hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); + if (!hidden_dentry) + goto out; + + hidden_dir_dentry = lock_parent(hidden_dentry); + + /* avoid destroying the hidden inode if the file is in use */ + dget(hidden_dentry); + if (!(err = is_robranch_super(dentry->d_sb, bindex))) + err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry); + dput(hidden_dentry); + fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); + unlock_dir(hidden_dir_dentry); + + if (err && !IS_COPYUP_ERR(err)) + goto out; + + if (err) { + if (dbstart(dentry) == 0) + goto out; + + err = create_whiteout(dentry, dbstart(dentry) - 1); + } else if (dbopaque(dentry) != -1) + /* There is a hidden lower-priority file with the same name. */ + err = create_whiteout(dentry, dbopaque(dentry)); + else + err = create_whiteout(dentry, dbstart(dentry)); + +out: + if (!err) + dentry->d_inode->i_nlink--; + + /* We don't want to leave negative leftover dentries for revalidate. */ + if (!err && (dbopaque(dentry) != -1)) + update_bstart(dentry); + + return err; +} + +int unionfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + err = unionfs_unlink_whiteout(dir, dentry); + /* call d_drop so the system "forgets" about us */ + if (!err) + d_drop(dentry); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry, + struct unionfs_dir_state *namelist) +{ + int err; + struct dentry *hidden_dentry; + struct dentry *hidden_dir_dentry = NULL; + + /* Here we need to remove whiteout entries. */ + err = delete_whiteouts(dentry, dbstart(dentry), namelist); + if (err) + goto out; + + hidden_dentry = unionfs_lower_dentry(dentry); + + hidden_dir_dentry = lock_parent(hidden_dentry); + + /* avoid destroying the hidden inode if the file is in use */ + dget(hidden_dentry); + if (!(err = is_robranch(dentry))) + err = vfs_rmdir(hidden_dir_dentry->d_inode, hidden_dentry); + dput(hidden_dentry); + + fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); + /* propagate number of hard-links */ + dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode); + +out: + if (hidden_dir_dentry) + unlock_dir(hidden_dir_dentry); + return err; +} + +int unionfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + struct unionfs_dir_state *namelist = NULL; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + /* check if this unionfs directory is empty or not */ + err = check_empty(dentry, &namelist); + if (err) + goto out; + + err = unionfs_rmdir_first(dir, dentry, namelist); + /* create whiteout */ + if (!err) + err = create_whiteout(dentry, dbstart(dentry)); + else { + int new_err; + + if (dbstart(dentry) == 0) + goto out; + + /* exit if the error returned was NOT -EROFS */ + if (!IS_COPYUP_ERR(err)) + goto out; + + new_err = create_whiteout(dentry, dbstart(dentry) - 1); + if (new_err != -EEXIST) + err = new_err; + } + +out: + /* call d_drop so the system "forgets" about us */ + if (!err) + d_drop(dentry); + + if (namelist) + free_rdstate(namelist); + + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} diff -Nurb linux-2.6.22-570/fs/unionfs/xattr.c linux-2.6.22-591/fs/unionfs/xattr.c --- linux-2.6.22-570/fs/unionfs/xattr.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/fs/unionfs/xattr.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* This is lifted from fs/xattr.c */ +void *unionfs_xattr_alloc(size_t size, size_t limit) +{ + void *ptr; + + if (size > limit) + return ERR_PTR(-E2BIG); + + if (!size) /* size request, no buffer is needed */ + return NULL; + else if (size <= PAGE_SIZE) + ptr = kmalloc(size, GFP_KERNEL); + else + ptr = vmalloc(size); + if (!ptr) + return ERR_PTR(-ENOMEM); + return ptr; +} + +void unionfs_xattr_free(void *ptr, size_t size) +{ + if (!size) /* size request, no buffer was needed */ + return; + else if (size <= PAGE_SIZE) + kfree(ptr); + else + vfree(ptr); +} + +/* + * BKL held by caller. + * dentry->d_inode->i_mutex locked + */ +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value, + size_t size) +{ + struct dentry *hidden_dentry = NULL; + int err = -EOPNOTSUPP; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + hidden_dentry = unionfs_lower_dentry(dentry); + + err = vfs_getxattr(hidden_dentry, (char*) name, value, size); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +/* + * BKL held by caller. + * dentry->d_inode->i_mutex locked + */ +int unionfs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct dentry *hidden_dentry = NULL; + int err = -EOPNOTSUPP; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + hidden_dentry = unionfs_lower_dentry(dentry); + + err = vfs_setxattr(hidden_dentry, (char*) name, (void*) value, + size, flags); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +/* + * BKL held by caller. + * dentry->d_inode->i_mutex locked + */ +int unionfs_removexattr(struct dentry *dentry, const char *name) +{ + struct dentry *hidden_dentry = NULL; + int err = -EOPNOTSUPP; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + hidden_dentry = unionfs_lower_dentry(dentry); + + err = vfs_removexattr(hidden_dentry, (char*) name); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} + +/* + * BKL held by caller. + * dentry->d_inode->i_mutex locked + */ +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct dentry *hidden_dentry = NULL; + int err = -EOPNOTSUPP; + char *encoded_list = NULL; + + unionfs_read_lock(dentry->d_sb); + unionfs_lock_dentry(dentry); + + if (!__unionfs_d_revalidate_chain(dentry, NULL)) { + err = -ESTALE; + goto out; + } + + hidden_dentry = unionfs_lower_dentry(dentry); + + encoded_list = list; + err = vfs_listxattr(hidden_dentry, encoded_list, size); + +out: + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(dentry->d_sb); + return err; +} diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c linux-2.6.22-591/fs/xfs/linux-2.6/xfs_file.c --- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/fs/xfs/linux-2.6/xfs_file.c 2007-12-21 15:36:12.000000000 -0500 @@ -246,18 +246,19 @@ #ifdef CONFIG_XFS_DMAPI STATIC struct page * -xfs_vm_nopage( - struct vm_area_struct *area, - unsigned long address, - int *type) +xfs_vm_fault( + struct vm_area_struct *vma, + struct fault_data *fdata) { - struct inode *inode = area->vm_file->f_path.dentry->d_inode; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; bhv_vnode_t *vp = vn_from_inode(inode); ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); - if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) + if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) { + fdata->type = VM_FAULT_SIGBUS; return NULL; - return filemap_nopage(area, address, type); + } + return filemap_fault(vma, fdata); } #endif /* CONFIG_XFS_DMAPI */ @@ -343,6 +344,7 @@ struct vm_area_struct *vma) { vma->vm_ops = &xfs_file_vm_ops; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; #ifdef CONFIG_XFS_DMAPI if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) @@ -501,14 +503,12 @@ }; static struct vm_operations_struct xfs_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, + .fault = filemap_fault, }; #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct xfs_dmapi_file_vm_ops = { - .nopage = xfs_vm_nopage, - .populate = filemap_populate, + .fault = xfs_vm_fault, #ifdef HAVE_VMOP_MPROTECT .mprotect = xfs_vm_mprotect, #endif diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c linux-2.6.22-591/fs/xfs/linux-2.6/xfs_super.c --- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/fs/xfs/linux-2.6/xfs_super.c 2007-12-21 15:36:12.000000000 -0500 @@ -570,6 +570,7 @@ bhv_vfs_sync_work_t *work, *n; LIST_HEAD (tmp); + set_freezable(); timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); for (;;) { timeleft = schedule_timeout_interruptible(timeleft); diff -Nurb linux-2.6.22-570/include/acpi/acmacros.h linux-2.6.22-591/include/acpi/acmacros.h --- linux-2.6.22-570/include/acpi/acmacros.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/acpi/acmacros.h 2007-12-21 15:36:12.000000000 -0500 @@ -486,6 +486,8 @@ #define ACPI_FUNCTION_NAME(name) #endif +#ifdef DEBUG_FUNC_TRACE + #define ACPI_FUNCTION_TRACE(a) ACPI_FUNCTION_NAME(a) \ acpi_ut_trace(ACPI_DEBUG_PARAMETERS) #define ACPI_FUNCTION_TRACE_PTR(a,b) ACPI_FUNCTION_NAME(a) \ @@ -563,6 +565,27 @@ #endif /* ACPI_SIMPLE_RETURN_MACROS */ +#else /* !DEBUG_FUNC_TRACE */ + +#define ACPI_FUNCTION_TRACE(a) +#define ACPI_FUNCTION_TRACE_PTR(a,b) +#define ACPI_FUNCTION_TRACE_U32(a,b) +#define ACPI_FUNCTION_TRACE_STR(a,b) +#define ACPI_FUNCTION_EXIT +#define ACPI_FUNCTION_STATUS_EXIT(s) +#define ACPI_FUNCTION_VALUE_EXIT(s) +#define ACPI_FUNCTION_TRACE(a) +#define ACPI_FUNCTION_ENTRY() + +#define return_VOID return +#define return_ACPI_STATUS(s) return(s) +#define return_VALUE(s) return(s) +#define return_UINT8(s) return(s) +#define return_UINT32(s) return(s) +#define return_PTR(s) return(s) + +#endif /* DEBUG_FUNC_TRACE */ + /* Conditional execution */ #define ACPI_DEBUG_EXEC(a) a diff -Nurb linux-2.6.22-570/include/acpi/acoutput.h linux-2.6.22-591/include/acpi/acoutput.h --- linux-2.6.22-570/include/acpi/acoutput.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/acpi/acoutput.h 2007-12-21 15:36:12.000000000 -0500 @@ -178,8 +178,8 @@ /* Defaults for debug_level, debug and normal */ -#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT) -#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT) +#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR) +#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR) #define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) #endif /* __ACOUTPUT_H__ */ diff -Nurb linux-2.6.22-570/include/acpi/platform/acenv.h linux-2.6.22-591/include/acpi/platform/acenv.h --- linux-2.6.22-570/include/acpi/platform/acenv.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/acpi/platform/acenv.h 2007-12-21 15:36:12.000000000 -0500 @@ -136,7 +136,7 @@ /*! [Begin] no source code translation */ -#if defined(__linux__) +#if defined(_LINUX) || defined(__linux__) #include "aclinux.h" #elif defined(_AED_EFI) diff -Nurb linux-2.6.22-570/include/acpi/platform/aclinux.h linux-2.6.22-591/include/acpi/platform/aclinux.h --- linux-2.6.22-570/include/acpi/platform/aclinux.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/acpi/platform/aclinux.h 2007-12-21 15:36:12.000000000 -0500 @@ -91,7 +91,10 @@ #define ACPI_USE_NATIVE_DIVIDE #endif +#ifndef __cdecl #define __cdecl +#endif + #define ACPI_FLUSH_CPU_CACHE() #endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/acpi/processor.h linux-2.6.22-591/include/acpi/processor.h --- linux-2.6.22-570/include/acpi/processor.h 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/include/acpi/processor.h 2007-12-21 15:36:12.000000000 -0500 @@ -21,6 +21,8 @@ #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ #define ACPI_PSD_REV0_ENTRIES 5 +#define ACPI_TSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ +#define ACPI_TSD_REV0_ENTRIES 5 /* * Types of coordination defined in ACPI 3.0. Same macros can be used across * P, C and T states @@ -125,17 +127,53 @@ /* Throttling Control */ +struct acpi_tsd_package { + acpi_integer num_entries; + acpi_integer revision; + acpi_integer domain; + acpi_integer coord_type; + acpi_integer num_processors; +} __attribute__ ((packed)); + +struct acpi_ptc_register { + u8 descriptor; + u16 length; + u8 space_id; + u8 bit_width; + u8 bit_offset; + u8 reserved; + u64 address; +} __attribute__ ((packed)); + +struct acpi_processor_tx_tss { + acpi_integer freqpercentage; /* */ + acpi_integer power; /* milliWatts */ + acpi_integer transition_latency; /* microseconds */ + acpi_integer control; /* control value */ + acpi_integer status; /* success indicator */ +}; struct acpi_processor_tx { u16 power; u16 performance; }; +struct acpi_processor; struct acpi_processor_throttling { - int state; + unsigned int state; + unsigned int platform_limit; + struct acpi_pct_register control_register; + struct acpi_pct_register status_register; + unsigned int state_count; + struct acpi_processor_tx_tss *states_tss; + struct acpi_tsd_package domain_info; + cpumask_t shared_cpu_map; + int (*acpi_processor_get_throttling) (struct acpi_processor * pr); + int (*acpi_processor_set_throttling) (struct acpi_processor * pr, + int state); + u32 address; u8 duty_offset; u8 duty_width; - int state_count; struct acpi_processor_tx states[ACPI_PROCESSOR_MAX_THROTTLING]; }; @@ -161,6 +199,7 @@ u8 bm_check:1; u8 has_cst:1; u8 power_setup_done:1; + u8 bm_rld_set:1; }; struct acpi_processor { @@ -169,6 +208,9 @@ u32 id; u32 pblk; int performance_platform_limit; + int throttling_platform_limit; + /* 0 - states 0..n-th state available */ + struct acpi_processor_flags flags; struct acpi_processor_power power; struct acpi_processor_performance *performance; @@ -270,7 +312,7 @@ /* in processor_throttling.c */ int acpi_processor_get_throttling_info(struct acpi_processor *pr); -int acpi_processor_set_throttling(struct acpi_processor *pr, int state); +extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state); extern struct file_operations acpi_processor_throttling_fops; /* in processor_idle.c */ @@ -279,6 +321,9 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr); int acpi_processor_power_exit(struct acpi_processor *pr, struct acpi_device *device); + +extern struct cpuidle_driver acpi_idle_driver; +void acpi_max_cstate_changed(void); int acpi_processor_suspend(struct acpi_device * device, pm_message_t state); int acpi_processor_resume(struct acpi_device * device); diff -Nurb linux-2.6.22-570/include/asm-alpha/page.h linux-2.6.22-591/include/asm-alpha/page.h --- linux-2.6.22-570/include/asm-alpha/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-alpha/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -17,7 +17,8 @@ extern void clear_page(void *page); #define clear_user_page(page, vaddr, pg) clear_page(page) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE extern void copy_page(void * _to, void * _from); diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h linux-2.6.22-591/include/asm-arm/arch-iop13xx/adma.h --- linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-arm/arch-iop13xx/adma.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,544 @@ +/* + * Copyright(c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _ADMA_H +#define _ADMA_H +#include +#include +#include +#include + +#define ADMA_ACCR(chan) (chan->mmr_base + 0x0) +#define ADMA_ACSR(chan) (chan->mmr_base + 0x4) +#define ADMA_ADAR(chan) (chan->mmr_base + 0x8) +#define ADMA_IIPCR(chan) (chan->mmr_base + 0x18) +#define ADMA_IIPAR(chan) (chan->mmr_base + 0x1c) +#define ADMA_IIPUAR(chan) (chan->mmr_base + 0x20) +#define ADMA_ANDAR(chan) (chan->mmr_base + 0x24) +#define ADMA_ADCR(chan) (chan->mmr_base + 0x28) +#define ADMA_CARMD(chan) (chan->mmr_base + 0x2c) +#define ADMA_ABCR(chan) (chan->mmr_base + 0x30) +#define ADMA_DLADR(chan) (chan->mmr_base + 0x34) +#define ADMA_DUADR(chan) (chan->mmr_base + 0x38) +#define ADMA_SLAR(src, chan) (chan->mmr_base + (0x3c + (src << 3))) +#define ADMA_SUAR(src, chan) (chan->mmr_base + (0x40 + (src << 3))) + +struct iop13xx_adma_src { + u32 src_addr; + union { + u32 upper_src_addr; + struct { + unsigned int pq_upper_src_addr:24; + unsigned int pq_dmlt:8; + }; + }; +}; + +struct iop13xx_adma_desc_ctrl { + unsigned int int_en:1; + unsigned int xfer_dir:2; + unsigned int src_select:4; + unsigned int zero_result:1; + unsigned int block_fill_en:1; + unsigned int crc_gen_en:1; + unsigned int crc_xfer_dis:1; + unsigned int crc_seed_fetch_dis:1; + unsigned int status_write_back_en:1; + unsigned int endian_swap_en:1; + unsigned int reserved0:2; + unsigned int pq_update_xfer_en:1; + unsigned int dual_xor_en:1; + unsigned int pq_xfer_en:1; + unsigned int p_xfer_dis:1; + unsigned int reserved1:10; + unsigned int relax_order_en:1; + unsigned int no_snoop_en:1; +}; + +struct iop13xx_adma_byte_count { + unsigned int byte_count:24; + unsigned int host_if:3; + unsigned int reserved:2; + unsigned int zero_result_err_q:1; + unsigned int zero_result_err:1; + unsigned int tx_complete:1; +}; + +struct iop13xx_adma_desc_hw { + u32 next_desc; + union { + u32 desc_ctrl; + struct iop13xx_adma_desc_ctrl desc_ctrl_field; + }; + union { + u32 crc_addr; + u32 block_fill_data; + u32 q_dest_addr; + }; + union { + u32 byte_count; + struct iop13xx_adma_byte_count byte_count_field; + }; + union { + u32 dest_addr; + u32 p_dest_addr; + }; + union { + u32 upper_dest_addr; + u32 pq_upper_dest_addr; + }; + struct iop13xx_adma_src src[1]; +}; + +struct iop13xx_adma_desc_dual_xor { + u32 next_desc; + u32 desc_ctrl; + u32 reserved; + u32 byte_count; + u32 h_dest_addr; + u32 h_upper_dest_addr; + u32 src0_addr; + u32 upper_src0_addr; + u32 src1_addr; + u32 upper_src1_addr; + u32 h_src_addr; + u32 h_upper_src_addr; + u32 d_src_addr; + u32 d_upper_src_addr; + u32 d_dest_addr; + u32 d_upper_dest_addr; +}; + +struct iop13xx_adma_desc_pq_update { + u32 next_desc; + u32 desc_ctrl; + u32 reserved; + u32 byte_count; + u32 p_dest_addr; + u32 p_upper_dest_addr; + u32 src0_addr; + u32 upper_src0_addr; + u32 src1_addr; + u32 upper_src1_addr; + u32 p_src_addr; + u32 p_upper_src_addr; + u32 q_src_addr; + struct { + unsigned int q_upper_src_addr:24; + unsigned int q_dmlt:8; + }; + u32 q_dest_addr; + u32 q_upper_dest_addr; +}; + +static inline int iop_adma_get_max_xor(void) +{ + return 16; +} + +static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) +{ + return __raw_readl(ADMA_ADAR(chan)); +} + +static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, + u32 next_desc_addr) +{ + __raw_writel(next_desc_addr, ADMA_ANDAR(chan)); +} + +#define ADMA_STATUS_BUSY (1 << 13) + +static inline char iop_chan_is_busy(struct iop_adma_chan *chan) +{ + if (__raw_readl(ADMA_ACSR(chan)) & + ADMA_STATUS_BUSY) + return 1; + else + return 0; +} + +static inline int +iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots) +{ + return 1; +} +#define iop_desc_is_aligned(x, y) 1 + +static inline int +iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) +{ + *slots_per_op = 1; + return 1; +} + +#define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s) + +static inline int +iop_chan_memset_slot_count(size_t len, int *slots_per_op) +{ + *slots_per_op = 1; + return 1; +} + +static inline int +iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) +{ + int num_slots; + /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1 + * (1 source => 8 bytes) (1 slot => 32 bytes) + */ + num_slots = 1 + (((src_cnt - 1) << 3) >> 5); + if (((src_cnt - 1) << 3) & 0x1f) + num_slots++; + + *slots_per_op = num_slots; + + return num_slots; +} + +#define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) +#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT +#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT +#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT +#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) + +static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->dest_addr; +} + +static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->byte_count_field.byte_count; +} + +static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan, + int src_idx) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->src[src_idx].src_addr; +} + +static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->desc_ctrl_field.src_select + 1; +} + +static inline void +iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; + hw_desc->crc_addr = 0; +} + +static inline void +iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.block_fill_en = 1; + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; + hw_desc->crc_addr = 0; +} + +/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */ +static inline void +iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.src_select = src_cnt - 1; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; + hw_desc->crc_addr = 0; + +} +#define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i) + +/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */ +static inline int +iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.src_select = src_cnt - 1; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.zero_result = 1; + u_desc_ctrl.field.status_write_back_en = 1; + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; + hw_desc->crc_addr = 0; + + return 1; +} + +static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan, + u32 byte_count) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + hw_desc->byte_count = byte_count; +} + +static inline void +iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) +{ + int slots_per_op = desc->slots_per_op; + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; + int i = 0; + + if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { + hw_desc->byte_count = len; + } else { + do { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; + i += slots_per_op; + } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); + + if (len) { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iter->byte_count = len; + } + } +} + + +static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan, + dma_addr_t addr) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + hw_desc->dest_addr = addr; + hw_desc->upper_dest_addr = 0; +} + +static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, + dma_addr_t addr) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + hw_desc->src[0].src_addr = addr; + hw_desc->src[0].upper_src_addr = 0; +} + +static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, + int src_idx, dma_addr_t addr) +{ + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; + int i = 0; + + do { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iter->src[src_idx].src_addr = addr; + iter->src[src_idx].upper_src_addr = 0; + slot_cnt -= slots_per_op; + if (slot_cnt) { + i += slots_per_op; + addr += IOP_ADMA_XOR_MAX_BYTE_COUNT; + } + } while (slot_cnt); +} + +static inline void +iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + iop_desc_init_memcpy(desc, 1); + iop_desc_set_byte_count(desc, chan, 0); + iop_desc_set_dest_addr(desc, chan, 0); + iop_desc_set_memcpy_src_addr(desc, 0); +} + +#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr + +static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, + u32 next_desc_addr) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + BUG_ON(hw_desc->next_desc); + hw_desc->next_desc = next_desc_addr; +} + +static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->next_desc; +} + +static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + hw_desc->next_desc = 0; +} + +static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, + u32 val) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + hw_desc->block_fill_data = val; +} + +static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; + struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; + + BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); + + if (desc_ctrl.pq_xfer_en) + return byte_count.zero_result_err_q; + else + return byte_count.zero_result_err; +} + +static inline void iop_chan_append(struct iop_adma_chan *chan) +{ + u32 adma_accr; + + adma_accr = __raw_readl(ADMA_ACCR(chan)); + adma_accr |= 0x2; + __raw_writel(adma_accr, ADMA_ACCR(chan)); +} + +static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan) +{ + do { } while (0); +} + +static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) +{ + return __raw_readl(ADMA_ACSR(chan)); +} + +static inline void iop_chan_disable(struct iop_adma_chan *chan) +{ + u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan)); + adma_chan_ctrl &= ~0x1; + __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan)); +} + +static inline void iop_chan_enable(struct iop_adma_chan *chan) +{ + u32 adma_chan_ctrl; + + adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan)); + adma_chan_ctrl |= 0x1; + __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan)); +} + +static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(ADMA_ACSR(chan)); + status &= (1 << 12); + __raw_writel(status, ADMA_ACSR(chan)); +} + +static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(ADMA_ACSR(chan)); + status &= (1 << 11); + __raw_writel(status, ADMA_ACSR(chan)); +} + +static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(ADMA_ACSR(chan)); + status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3); + __raw_writel(status, ADMA_ACSR(chan)); +} + +static inline int +iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) +{ + return test_bit(9, &status); +} + +static inline int +iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) +{ + return test_bit(5, &status); +} + +static inline int +iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) +{ + return test_bit(4, &status); +} + +static inline int +iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) +{ + return test_bit(3, &status); +} + +static inline int +iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) +{ + return 0; +} + +static inline int +iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) +{ + return 0; +} + +static inline int +iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) +{ + return 0; +} + +#endif /* _ADMA_H */ diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h linux-2.6.22-591/include/asm-arm/arch-iop13xx/iop13xx.h --- linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-arm/arch-iop13xx/iop13xx.h 2007-12-21 15:36:12.000000000 -0500 @@ -166,12 +166,22 @@ #define IOP13XX_INIT_I2C_1 (1 << 1) #define IOP13XX_INIT_I2C_2 (1 << 2) +/* ADMA selection flags */ +/* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */ +#define IOP13XX_INIT_ADMA_DEFAULT (0) +#define IOP13XX_INIT_ADMA_0 (1 << 0) +#define IOP13XX_INIT_ADMA_1 (1 << 1) +#define IOP13XX_INIT_ADMA_2 (1 << 2) + +/* Platform devices */ #define IQ81340_NUM_UART 2 #define IQ81340_NUM_I2C 3 #define IQ81340_NUM_PHYS_MAP_FLASH 1 -#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\ - IQ81340_NUM_I2C +\ - IQ81340_NUM_PHYS_MAP_FLASH) +#define IQ81340_NUM_ADMA 3 +#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \ + IQ81340_NUM_I2C + \ + IQ81340_NUM_PHYS_MAP_FLASH + \ + IQ81340_NUM_ADMA) /*========================== PMMR offsets for key registers ============*/ #define IOP13XX_ATU0_PMMR_OFFSET 0x00048000 @@ -444,22 +454,6 @@ /*==============================ADMA UNITS===============================*/ #define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9)) #define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0) -#define IOP13XX_ADMA_OFFSET(chan, ofs) IOP13XX_REG_ADDR32((chan << 9) + (ofs)) - -#define IOP13XX_ADMA_ACCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x0) -#define IOP13XX_ADMA_ACSR(chan) IOP13XX_ADMA_OFFSET(chan, 0x4) -#define IOP13XX_ADMA_ADAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x8) -#define IOP13XX_ADMA_IIPCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x18) -#define IOP13XX_ADMA_IIPAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x1c) -#define IOP13XX_ADMA_IIPUAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x20) -#define IOP13XX_ADMA_ANDAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x24) -#define IOP13XX_ADMA_ADCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x28) -#define IOP13XX_ADMA_CARMD(chan) IOP13XX_ADMA_OFFSET(chan, 0x2c) -#define IOP13XX_ADMA_ABCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x30) -#define IOP13XX_ADMA_DLADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x34) -#define IOP13XX_ADMA_DUADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x38) -#define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3)) -#define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3)) /*==============================XSI BRIDGE===============================*/ #define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c) diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h linux-2.6.22-591/include/asm-arm/arch-iop32x/adma.h --- linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-arm/arch-iop32x/adma.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,5 @@ +#ifndef IOP32X_ADMA_H +#define IOP32X_ADMA_H +#include +#endif + diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h linux-2.6.22-591/include/asm-arm/arch-iop33x/adma.h --- linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-arm/arch-iop33x/adma.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,5 @@ +#ifndef IOP33X_ADMA_H +#define IOP33X_ADMA_H +#include +#endif + diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h linux-2.6.22-591/include/asm-arm/hardware/iop3xx-adma.h --- linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-arm/hardware/iop3xx-adma.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,891 @@ +/* + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _ADMA_H +#define _ADMA_H +#include +#include +#include +#include + +/* Memory copy units */ +#define DMA_CCR(chan) (chan->mmr_base + 0x0) +#define DMA_CSR(chan) (chan->mmr_base + 0x4) +#define DMA_DAR(chan) (chan->mmr_base + 0xc) +#define DMA_NDAR(chan) (chan->mmr_base + 0x10) +#define DMA_PADR(chan) (chan->mmr_base + 0x14) +#define DMA_PUADR(chan) (chan->mmr_base + 0x18) +#define DMA_LADR(chan) (chan->mmr_base + 0x1c) +#define DMA_BCR(chan) (chan->mmr_base + 0x20) +#define DMA_DCR(chan) (chan->mmr_base + 0x24) + +/* Application accelerator unit */ +#define AAU_ACR(chan) (chan->mmr_base + 0x0) +#define AAU_ASR(chan) (chan->mmr_base + 0x4) +#define AAU_ADAR(chan) (chan->mmr_base + 0x8) +#define AAU_ANDAR(chan) (chan->mmr_base + 0xc) +#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2))) +#define AAU_DAR(chan) (chan->mmr_base + 0x20) +#define AAU_ABCR(chan) (chan->mmr_base + 0x24) +#define AAU_ADCR(chan) (chan->mmr_base + 0x28) +#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2))) +#define AAU_EDCR0_IDX 8 +#define AAU_EDCR1_IDX 17 +#define AAU_EDCR2_IDX 26 + +#define DMA0_ID 0 +#define DMA1_ID 1 +#define AAU_ID 2 + +struct iop3xx_aau_desc_ctrl { + unsigned int int_en:1; + unsigned int blk1_cmd_ctrl:3; + unsigned int blk2_cmd_ctrl:3; + unsigned int blk3_cmd_ctrl:3; + unsigned int blk4_cmd_ctrl:3; + unsigned int blk5_cmd_ctrl:3; + unsigned int blk6_cmd_ctrl:3; + unsigned int blk7_cmd_ctrl:3; + unsigned int blk8_cmd_ctrl:3; + unsigned int blk_ctrl:2; + unsigned int dual_xor_en:1; + unsigned int tx_complete:1; + unsigned int zero_result_err:1; + unsigned int zero_result_en:1; + unsigned int dest_write_en:1; +}; + +struct iop3xx_aau_e_desc_ctrl { + unsigned int reserved:1; + unsigned int blk1_cmd_ctrl:3; + unsigned int blk2_cmd_ctrl:3; + unsigned int blk3_cmd_ctrl:3; + unsigned int blk4_cmd_ctrl:3; + unsigned int blk5_cmd_ctrl:3; + unsigned int blk6_cmd_ctrl:3; + unsigned int blk7_cmd_ctrl:3; + unsigned int blk8_cmd_ctrl:3; + unsigned int reserved2:7; +}; + +struct iop3xx_dma_desc_ctrl { + unsigned int pci_transaction:4; + unsigned int int_en:1; + unsigned int dac_cycle_en:1; + unsigned int mem_to_mem_en:1; + unsigned int crc_data_tx_en:1; + unsigned int crc_gen_en:1; + unsigned int crc_seed_dis:1; + unsigned int reserved:21; + unsigned int crc_tx_complete:1; +}; + +struct iop3xx_desc_dma { + u32 next_desc; + union { + u32 pci_src_addr; + u32 pci_dest_addr; + u32 src_addr; + }; + union { + u32 upper_pci_src_addr; + u32 upper_pci_dest_addr; + }; + union { + u32 local_pci_src_addr; + u32 local_pci_dest_addr; + u32 dest_addr; + }; + u32 byte_count; + union { + u32 desc_ctrl; + struct iop3xx_dma_desc_ctrl desc_ctrl_field; + }; + u32 crc_addr; +}; + +struct iop3xx_desc_aau { + u32 next_desc; + u32 src[4]; + u32 dest_addr; + u32 byte_count; + union { + u32 desc_ctrl; + struct iop3xx_aau_desc_ctrl desc_ctrl_field; + }; + union { + u32 src_addr; + u32 e_desc_ctrl; + struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; + } src_edc[31]; +}; + +struct iop3xx_aau_gfmr { + unsigned int gfmr1:8; + unsigned int gfmr2:8; + unsigned int gfmr3:8; + unsigned int gfmr4:8; +}; + +struct iop3xx_desc_pq_xor { + u32 next_desc; + u32 src[3]; + union { + u32 data_mult1; + struct iop3xx_aau_gfmr data_mult1_field; + }; + u32 dest_addr; + u32 byte_count; + union { + u32 desc_ctrl; + struct iop3xx_aau_desc_ctrl desc_ctrl_field; + }; + union { + u32 src_addr; + u32 e_desc_ctrl; + struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; + u32 data_multiplier; + struct iop3xx_aau_gfmr data_mult_field; + u32 reserved; + } src_edc_gfmr[19]; +}; + +struct iop3xx_desc_dual_xor { + u32 next_desc; + u32 src0_addr; + u32 src1_addr; + u32 h_src_addr; + u32 d_src_addr; + u32 h_dest_addr; + u32 byte_count; + union { + u32 desc_ctrl; + struct iop3xx_aau_desc_ctrl desc_ctrl_field; + }; + u32 d_dest_addr; +}; + +union iop3xx_desc { + struct iop3xx_desc_aau *aau; + struct iop3xx_desc_dma *dma; + struct iop3xx_desc_pq_xor *pq_xor; + struct iop3xx_desc_dual_xor *dual_xor; + void *ptr; +}; + +static inline int iop_adma_get_max_xor(void) +{ + return 32; +} + +static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) +{ + int id = chan->device->id; + + switch (id) { + case DMA0_ID: + case DMA1_ID: + return __raw_readl(DMA_DAR(chan)); + case AAU_ID: + return __raw_readl(AAU_ADAR(chan)); + default: + BUG(); + } + return 0; +} + +static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, + u32 next_desc_addr) +{ + int id = chan->device->id; + + switch (id) { + case DMA0_ID: + case DMA1_ID: + __raw_writel(next_desc_addr, DMA_NDAR(chan)); + break; + case AAU_ID: + __raw_writel(next_desc_addr, AAU_ANDAR(chan)); + break; + } + +} + +#define IOP_ADMA_STATUS_BUSY (1 << 10) +#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024) +#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024) +#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) + +static inline int iop_chan_is_busy(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(DMA_CSR(chan)); + return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0; +} + +static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc, + int num_slots) +{ + /* num_slots will only ever be 1, 2, 4, or 8 */ + return (desc->idx & (num_slots - 1)) ? 0 : 1; +} + +/* to do: support large (i.e. > hw max) buffer sizes */ +static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) +{ + *slots_per_op = 1; + return 1; +} + +/* to do: support large (i.e. > hw max) buffer sizes */ +static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) +{ + *slots_per_op = 1; + return 1; +} + +static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, + int *slots_per_op) +{ + const static int slot_count_table[] = { 0, + 1, 1, 1, 1, /* 01 - 04 */ + 2, 2, 2, 2, /* 05 - 08 */ + 4, 4, 4, 4, /* 09 - 12 */ + 4, 4, 4, 4, /* 13 - 16 */ + 8, 8, 8, 8, /* 17 - 20 */ + 8, 8, 8, 8, /* 21 - 24 */ + 8, 8, 8, 8, /* 25 - 28 */ + 8, 8, 8, 8, /* 29 - 32 */ + }; + *slots_per_op = slot_count_table[src_cnt]; + return *slots_per_op; +} + +static inline int +iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan) +{ + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return iop_chan_memcpy_slot_count(0, slots_per_op); + case AAU_ID: + return iop3xx_aau_xor_slot_count(0, 2, slots_per_op); + default: + BUG(); + } + return 0; +} + +static inline int iop_chan_xor_slot_count(size_t len, int src_cnt, + int *slots_per_op) +{ + int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); + + if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT) + return slot_cnt; + + len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; + while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) { + len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; + slot_cnt += *slots_per_op; + } + + if (len) + slot_cnt += *slots_per_op; + + return slot_cnt; +} + +/* zero sum on iop3xx is limited to 1k at a time so it requires multiple + * descriptors + */ +static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, + int *slots_per_op) +{ + int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); + + if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) + return slot_cnt; + + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; + while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; + slot_cnt += *slots_per_op; + } + + if (len) + slot_cnt += *slots_per_op; + + return slot_cnt; +} + +static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return hw_desc.dma->dest_addr; + case AAU_ID: + return hw_desc.aau->dest_addr; + default: + BUG(); + } + return 0; +} + +static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return hw_desc.dma->byte_count; + case AAU_ID: + return hw_desc.aau->byte_count; + default: + BUG(); + } + return 0; +} + +/* translate the src_idx to a descriptor word index */ +static inline int __desc_idx(int src_idx) +{ + const static int desc_idx_table[] = { 0, 0, 0, 0, + 0, 1, 2, 3, + 5, 6, 7, 8, + 9, 10, 11, 12, + 14, 15, 16, 17, + 18, 19, 20, 21, + 23, 24, 25, 26, + 27, 28, 29, 30, + }; + + return desc_idx_table[src_idx]; +} + +static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan, + int src_idx) +{ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return hw_desc.dma->src_addr; + case AAU_ID: + break; + default: + BUG(); + } + + if (src_idx < 4) + return hw_desc.aau->src[src_idx]; + else + return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr; +} + +static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc, + int src_idx, dma_addr_t addr) +{ + if (src_idx < 4) + hw_desc->src[src_idx] = addr; + else + hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr; +} + +static inline void +iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en) +{ + struct iop3xx_desc_dma *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop3xx_dma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.mem_to_mem_en = 1; + u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */ + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; + hw_desc->upper_pci_src_addr = 0; + hw_desc->crc_addr = 0; +} + +static inline void +iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en) +{ + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop3xx_aau_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */ + u_desc_ctrl.field.dest_write_en = 1; + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; +} + +static inline u32 +iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en) +{ + int i, shift; + u32 edcr; + union { + u32 value; + struct iop3xx_aau_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + switch (src_cnt) { + case 25 ... 32: + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ + edcr = 0; + shift = 1; + for (i = 24; i < src_cnt; i++) { + edcr |= (1 << shift); + shift += 3; + } + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr; + src_cnt = 24; + /* fall through */ + case 17 ... 24: + if (!u_desc_ctrl.field.blk_ctrl) { + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ + } + edcr = 0; + shift = 1; + for (i = 16; i < src_cnt; i++) { + edcr |= (1 << shift); + shift += 3; + } + hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr; + src_cnt = 16; + /* fall through */ + case 9 ... 16: + if (!u_desc_ctrl.field.blk_ctrl) + u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ + edcr = 0; + shift = 1; + for (i = 8; i < src_cnt; i++) { + edcr |= (1 << shift); + shift += 3; + } + hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr; + src_cnt = 8; + /* fall through */ + case 2 ... 8: + shift = 1; + for (i = 0; i < src_cnt; i++) { + u_desc_ctrl.value |= (1 << shift); + shift += 3; + } + + if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) + u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ + } + + u_desc_ctrl.field.dest_write_en = 1; + u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */ + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; + + return u_desc_ctrl.value; +} + +static inline void +iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) +{ + iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en); +} + +/* return the number of operations */ +static inline int +iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) +{ + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; + struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter; + union { + u32 value; + struct iop3xx_aau_desc_ctrl field; + } u_desc_ctrl; + int i, j; + + hw_desc = desc->hw_desc; + + for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0; + i += slots_per_op, j++) { + iter = iop_hw_desc_slot_idx(hw_desc, i); + u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en); + u_desc_ctrl.field.dest_write_en = 0; + u_desc_ctrl.field.zero_result_en = 1; + u_desc_ctrl.field.int_en = int_en; + iter->desc_ctrl = u_desc_ctrl.value; + + /* for the subsequent descriptors preserve the store queue + * and chain them together + */ + if (i) { + prev_hw_desc = + iop_hw_desc_slot_idx(hw_desc, i - slots_per_op); + prev_hw_desc->next_desc = (u32) (desc->phys + (i << 5)); + } + } + + return j; +} + +static inline void +iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) +{ + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop3xx_aau_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + switch (src_cnt) { + case 25 ... 32: + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; + /* fall through */ + case 17 ... 24: + if (!u_desc_ctrl.field.blk_ctrl) { + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ + } + hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0; + /* fall through */ + case 9 ... 16: + if (!u_desc_ctrl.field.blk_ctrl) + u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ + hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0; + /* fall through */ + case 1 ... 8: + if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) + u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ + } + + u_desc_ctrl.field.dest_write_en = 0; + u_desc_ctrl.field.int_en = int_en; + hw_desc->desc_ctrl = u_desc_ctrl.value; +} + +static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan, + u32 byte_count) +{ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + hw_desc.dma->byte_count = byte_count; + break; + case AAU_ID: + hw_desc.aau->byte_count = byte_count; + break; + default: + BUG(); + } +} + +static inline void +iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + iop_desc_init_memcpy(desc, 1); + hw_desc.dma->byte_count = 0; + hw_desc.dma->dest_addr = 0; + hw_desc.dma->src_addr = 0; + break; + case AAU_ID: + iop_desc_init_null_xor(desc, 2, 1); + hw_desc.aau->byte_count = 0; + hw_desc.aau->dest_addr = 0; + hw_desc.aau->src[0] = 0; + hw_desc.aau->src[1] = 0; + break; + default: + BUG(); + } +} + +static inline void +iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) +{ + int slots_per_op = desc->slots_per_op; + struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; + int i = 0; + + if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { + hw_desc->byte_count = len; + } else { + do { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; + i += slots_per_op; + } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); + + if (len) { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iter->byte_count = len; + } + } +} + +static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan, + dma_addr_t addr) +{ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + hw_desc.dma->dest_addr = addr; + break; + case AAU_ID: + hw_desc.aau->dest_addr = addr; + break; + default: + BUG(); + } +} + +static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, + dma_addr_t addr) +{ + struct iop3xx_desc_dma *hw_desc = desc->hw_desc; + hw_desc->src_addr = addr; +} + +static inline void +iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx, + dma_addr_t addr) +{ + + struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; + int i; + + for (i = 0; (slot_cnt -= slots_per_op) >= 0; + i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); + } +} + +static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, + int src_idx, dma_addr_t addr) +{ + + struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; + int i; + + for (i = 0; (slot_cnt -= slots_per_op) >= 0; + i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) { + iter = iop_hw_desc_slot_idx(hw_desc, i); + iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); + } +} + +static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, + u32 next_desc_addr) +{ + /* hw_desc->next_desc is the same location for all channels */ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + BUG_ON(hw_desc.dma->next_desc); + hw_desc.dma->next_desc = next_desc_addr; +} + +static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) +{ + /* hw_desc->next_desc is the same location for all channels */ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + return hw_desc.dma->next_desc; +} + +static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) +{ + /* hw_desc->next_desc is the same location for all channels */ + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; + hw_desc.dma->next_desc = 0; +} + +static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, + u32 val) +{ + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; + hw_desc->src[0] = val; +} + +static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +{ + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; + struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; + + BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); + return desc_ctrl.zero_result_err; +} + +static inline void iop_chan_append(struct iop_adma_chan *chan) +{ + u32 dma_chan_ctrl; + /* workaround dropped interrupts on 3xx */ + mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3)); + + dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); + dma_chan_ctrl |= 0x2; + __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); +} + +static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan) +{ + if (!busy) + del_timer(&chan->cleanup_watchdog); +} + +static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) +{ + return __raw_readl(DMA_CSR(chan)); +} + +static inline void iop_chan_disable(struct iop_adma_chan *chan) +{ + u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); + dma_chan_ctrl &= ~1; + __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); +} + +static inline void iop_chan_enable(struct iop_adma_chan *chan) +{ + u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); + + dma_chan_ctrl |= 1; + __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); +} + +static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(DMA_CSR(chan)); + status &= (1 << 9); + __raw_writel(status, DMA_CSR(chan)); +} + +static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(DMA_CSR(chan)); + status &= (1 << 8); + __raw_writel(status, DMA_CSR(chan)); +} + +static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) +{ + u32 status = __raw_readl(DMA_CSR(chan)); + + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1); + break; + case AAU_ID: + status &= (1 << 5); + break; + default: + BUG(); + } + + __raw_writel(status, DMA_CSR(chan)); +} + +static inline int +iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) +{ + return 0; +} + +static inline int +iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) +{ + return 0; +} + +static inline int +iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) +{ + return 0; +} + +static inline int +iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) +{ + return test_bit(5, &status); +} + +static inline int +iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) +{ + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return test_bit(2, &status); + default: + return 0; + } +} + +static inline int +iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) +{ + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return test_bit(3, &status); + default: + return 0; + } +} + +static inline int +iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) +{ + switch (chan->device->id) { + case DMA0_ID: + case DMA1_ID: + return test_bit(1, &status); + default: + return 0; + } +} +#endif /* _ADMA_H */ diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h linux-2.6.22-591/include/asm-arm/hardware/iop3xx.h --- linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-arm/hardware/iop3xx.h 2007-12-21 15:36:12.000000000 -0500 @@ -144,24 +144,9 @@ #define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380) /* DMA Controller */ -#define IOP3XX_DMA0_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0400) -#define IOP3XX_DMA0_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0404) -#define IOP3XX_DMA0_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x040c) -#define IOP3XX_DMA0_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0410) -#define IOP3XX_DMA0_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0414) -#define IOP3XX_DMA0_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0418) -#define IOP3XX_DMA0_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x041c) -#define IOP3XX_DMA0_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0420) -#define IOP3XX_DMA0_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0424) -#define IOP3XX_DMA1_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0440) -#define IOP3XX_DMA1_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0444) -#define IOP3XX_DMA1_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x044c) -#define IOP3XX_DMA1_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0450) -#define IOP3XX_DMA1_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0454) -#define IOP3XX_DMA1_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0458) -#define IOP3XX_DMA1_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x045c) -#define IOP3XX_DMA1_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0460) -#define IOP3XX_DMA1_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0464) +#define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \ + (0x400 + (chan << 6))) +#define IOP3XX_DMA_UPPER_PA(chan) (IOP3XX_DMA_PHYS_BASE(chan) + 0x27) /* Peripheral bus interface */ #define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680) @@ -210,48 +195,8 @@ #define IOP_TMR_RATIO_1_1 0x00 /* Application accelerator unit */ -#define IOP3XX_AAU_ACR (volatile u32 *)IOP3XX_REG_ADDR(0x0800) -#define IOP3XX_AAU_ASR (volatile u32 *)IOP3XX_REG_ADDR(0x0804) -#define IOP3XX_AAU_ADAR (volatile u32 *)IOP3XX_REG_ADDR(0x0808) -#define IOP3XX_AAU_ANDAR (volatile u32 *)IOP3XX_REG_ADDR(0x080c) -#define IOP3XX_AAU_SAR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0810) -#define IOP3XX_AAU_SAR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0814) -#define IOP3XX_AAU_SAR3 (volatile u32 *)IOP3XX_REG_ADDR(0x0818) -#define IOP3XX_AAU_SAR4 (volatile u32 *)IOP3XX_REG_ADDR(0x081c) -#define IOP3XX_AAU_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x0820) -#define IOP3XX_AAU_ABCR (volatile u32 *)IOP3XX_REG_ADDR(0x0824) -#define IOP3XX_AAU_ADCR (volatile u32 *)IOP3XX_REG_ADDR(0x0828) -#define IOP3XX_AAU_SAR5 (volatile u32 *)IOP3XX_REG_ADDR(0x082c) -#define IOP3XX_AAU_SAR6 (volatile u32 *)IOP3XX_REG_ADDR(0x0830) -#define IOP3XX_AAU_SAR7 (volatile u32 *)IOP3XX_REG_ADDR(0x0834) -#define IOP3XX_AAU_SAR8 (volatile u32 *)IOP3XX_REG_ADDR(0x0838) -#define IOP3XX_AAU_EDCR0 (volatile u32 *)IOP3XX_REG_ADDR(0x083c) -#define IOP3XX_AAU_SAR9 (volatile u32 *)IOP3XX_REG_ADDR(0x0840) -#define IOP3XX_AAU_SAR10 (volatile u32 *)IOP3XX_REG_ADDR(0x0844) -#define IOP3XX_AAU_SAR11 (volatile u32 *)IOP3XX_REG_ADDR(0x0848) -#define IOP3XX_AAU_SAR12 (volatile u32 *)IOP3XX_REG_ADDR(0x084c) -#define IOP3XX_AAU_SAR13 (volatile u32 *)IOP3XX_REG_ADDR(0x0850) -#define IOP3XX_AAU_SAR14 (volatile u32 *)IOP3XX_REG_ADDR(0x0854) -#define IOP3XX_AAU_SAR15 (volatile u32 *)IOP3XX_REG_ADDR(0x0858) -#define IOP3XX_AAU_SAR16 (volatile u32 *)IOP3XX_REG_ADDR(0x085c) -#define IOP3XX_AAU_EDCR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0860) -#define IOP3XX_AAU_SAR17 (volatile u32 *)IOP3XX_REG_ADDR(0x0864) -#define IOP3XX_AAU_SAR18 (volatile u32 *)IOP3XX_REG_ADDR(0x0868) -#define IOP3XX_AAU_SAR19 (volatile u32 *)IOP3XX_REG_ADDR(0x086c) -#define IOP3XX_AAU_SAR20 (volatile u32 *)IOP3XX_REG_ADDR(0x0870) -#define IOP3XX_AAU_SAR21 (volatile u32 *)IOP3XX_REG_ADDR(0x0874) -#define IOP3XX_AAU_SAR22 (volatile u32 *)IOP3XX_REG_ADDR(0x0878) -#define IOP3XX_AAU_SAR23 (volatile u32 *)IOP3XX_REG_ADDR(0x087c) -#define IOP3XX_AAU_SAR24 (volatile u32 *)IOP3XX_REG_ADDR(0x0880) -#define IOP3XX_AAU_EDCR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0884) -#define IOP3XX_AAU_SAR25 (volatile u32 *)IOP3XX_REG_ADDR(0x0888) -#define IOP3XX_AAU_SAR26 (volatile u32 *)IOP3XX_REG_ADDR(0x088c) -#define IOP3XX_AAU_SAR27 (volatile u32 *)IOP3XX_REG_ADDR(0x0890) -#define IOP3XX_AAU_SAR28 (volatile u32 *)IOP3XX_REG_ADDR(0x0894) -#define IOP3XX_AAU_SAR29 (volatile u32 *)IOP3XX_REG_ADDR(0x0898) -#define IOP3XX_AAU_SAR30 (volatile u32 *)IOP3XX_REG_ADDR(0x089c) -#define IOP3XX_AAU_SAR31 (volatile u32 *)IOP3XX_REG_ADDR(0x08a0) -#define IOP3XX_AAU_SAR32 (volatile u32 *)IOP3XX_REG_ADDR(0x08a4) +#define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800) +#define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7) /* I2C bus interface unit */ #define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680) @@ -329,6 +274,9 @@ asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val)); } +extern struct platform_device iop3xx_dma_0_channel; +extern struct platform_device iop3xx_dma_1_channel; +extern struct platform_device iop3xx_aau_channel; extern struct platform_device iop3xx_i2c0_device; extern struct platform_device iop3xx_i2c1_device; diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h linux-2.6.22-591/include/asm-arm/hardware/iop_adma.h --- linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-arm/hardware/iop_adma.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,120 @@ +/* + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef IOP_ADMA_H +#define IOP_ADMA_H +#include +#include +#include + +#define IOP_ADMA_SLOT_SIZE 32 +#define IOP_ADMA_THRESHOLD 4 + +/** + * struct iop_adma_device - internal representation of an ADMA device + * @pdev: Platform device + * @id: HW ADMA Device selector + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @common: embedded struct dma_device + */ +struct iop_adma_device { + struct platform_device *pdev; + int id; + dma_addr_t dma_desc_pool; + void *dma_desc_pool_virt; + struct dma_device common; +}; + +/** + * struct iop_adma_chan - internal representation of an ADMA device + * @pending: allows batching of hardware operations + * @completed_cookie: identifier for the most recently completed operation + * @lock: serializes enqueue/dequeue operations to the slot pool + * @mmr_base: memory mapped register base + * @chain: device chain view of the descriptors + * @device: parent device + * @common: common dmaengine channel object members + * @last_used: place holder for allocation to continue from where it left off + * @all_slots: complete domain of slots usable by the channel + * @cleanup_watchdog: workaround missed interrupts on iop3xx + * @slots_allocated: records the actual size of the descriptor slot pool + * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs + */ +struct iop_adma_chan { + int pending; + dma_cookie_t completed_cookie; + spinlock_t lock; /* protects the descriptor slot pool */ + void __iomem *mmr_base; + struct list_head chain; + struct iop_adma_device *device; + struct dma_chan common; + struct iop_adma_desc_slot *last_used; + struct list_head all_slots; + struct timer_list cleanup_watchdog; + int slots_allocated; + struct tasklet_struct irq_tasklet; +}; + +/** + * struct iop_adma_desc_slot - IOP-ADMA software descriptor + * @slot_node: node on the iop_adma_chan.all_slots list + * @chain_node: node on the op_adma_chan.chain list + * @hw_desc: virtual address of the hardware descriptor chain + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @slot_cnt: total slots used in an transaction (group of operations) + * @slots_per_op: number of slots per operation + * @idx: pool index + * @unmap_src_cnt: number of xor sources + * @unmap_len: transaction bytecount + * @async_tx: support for the async_tx api + * @group_list: list of slots that make up a multi-descriptor transaction + * for example transfer lengths larger than the supported hw max + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct iop_adma_desc_slot { + struct list_head slot_node; + struct list_head chain_node; + void *hw_desc; + dma_addr_t phys; + struct iop_adma_desc_slot *group_head; + u16 slot_cnt; + u16 slots_per_op; + u16 idx; + u16 unmap_src_cnt; + size_t unmap_len; + struct dma_async_tx_descriptor async_tx; + struct list_head group_list; + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +}; + +struct iop_adma_platform_data { + int hw_id; + dma_cap_mask_t cap_mask; + size_t pool_size; +}; + +#define to_iop_sw_desc(addr_hw_desc) \ + container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc) +#define iop_hw_desc_slot_idx(hw_desc, idx) \ + ( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) ) +#endif diff -Nurb linux-2.6.22-570/include/asm-arm/kgdb.h linux-2.6.22-591/include/asm-arm/kgdb.h --- linux-2.6.22-570/include/asm-arm/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-arm/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,103 @@ +/* + * include/asm-arm/kgdb.h + * + * ARM KGDB support + * + * Author: Deepak Saxena + * + * Copyright (C) 2002 MontaVista Software Inc. + * + */ + +#ifndef __ASM_KGDB_H__ +#define __ASM_KGDB_H__ + +#include +#include + + +/* + * GDB assumes that we're a user process being debugged, so + * it will send us an SWI command to write into memory as the + * debug trap. When an SWI occurs, the next instruction addr is + * placed into R14_svc before jumping to the vector trap. + * This doesn't work for kernel debugging as we are already in SVC + * we would loose the kernel's LR, which is a bad thing. This + * is bad thing. + * + * By doing this as an undefined instruction trap, we force a mode + * switch from SVC to UND mode, allowing us to save full kernel state. + * + * We also define a KGDB_COMPILED_BREAK which can be used to compile + * in breakpoints. This is important for things like sysrq-G and for + * the initial breakpoint from trap_init(). + * + * Note to ARM HW designers: Add real trap support like SH && PPC to + * make our lives much much simpler. :) + */ +#define BREAK_INSTR_SIZE 4 +#define GDB_BREAKINST 0xef9f0001 +#define KGDB_BREAKINST 0xe7ffdefe +#define KGDB_COMPILED_BREAK 0xe7ffdeff +#define CACHE_FLUSH_IS_SAFE 1 + +#ifndef __ASSEMBLY__ + +#define BREAKPOINT() asm(".word 0xe7ffdeff") + + +extern void kgdb_handle_bus_error(void); +extern int kgdb_fault_expected; +#endif /* !__ASSEMBLY__ */ + +/* + * From Kevin Hilman: + * + * gdb is expecting the following registers layout. + * + * r0-r15: 1 long word each + * f0-f7: unused, 3 long words each !! + * fps: unused, 1 long word + * cpsr: 1 long word + * + * Even though f0-f7 and fps are not used, they need to be + * present in the registers sent for correct processing in + * the host-side gdb. + * + * In particular, it is crucial that CPSR is in the right place, + * otherwise gdb will not be able to correctly interpret stepping over + * conditional branches. + */ +#define _GP_REGS 16 +#define _FP_REGS 8 +#define _EXTRA_REGS 2 +#define GDB_MAX_REGS (_GP_REGS + (_FP_REGS * 3) + _EXTRA_REGS) + +#define KGDB_MAX_NO_CPUS 1 +#define BUFMAX 400 +#define NUMREGBYTES (GDB_MAX_REGS << 2) +#define NUMCRITREGBYTES (32 << 2) + +#define _R0 0 +#define _R1 1 +#define _R2 2 +#define _R3 3 +#define _R4 4 +#define _R5 5 +#define _R6 6 +#define _R7 7 +#define _R8 8 +#define _R9 9 +#define _R10 10 +#define _FP 11 +#define _IP 12 +#define _SP 13 +#define _LR 14 +#define _PC 15 +#define _CPSR (GDB_MAX_REGS - 1) + +/* So that we can denote the end of a frame for tracing, in the simple + * case. */ +#define CFI_END_FRAME(func) __CFI_END_FRAME(_PC,_SP,func) + +#endif /* __ASM_KGDB_H__ */ diff -Nurb linux-2.6.22-570/include/asm-arm/system.h linux-2.6.22-591/include/asm-arm/system.h --- linux-2.6.22-570/include/asm-arm/system.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-arm/system.h 2007-12-21 15:36:12.000000000 -0500 @@ -360,6 +360,41 @@ extern void disable_hlt(void); extern void enable_hlt(void); +#ifndef CONFIG_SMP +/* + * Atomic compare and exchange. + */ +#define __HAVE_ARCH_CMPXCHG 1 + +extern unsigned long wrong_size_cmpxchg(volatile void *ptr); + +static inline unsigned long __cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long flags, prev; + volatile unsigned long *p = ptr; + + if (size == 4) { + local_irq_save(flags); + if ((prev = *p) == old) + *p = new; + local_irq_restore(flags); + return(prev); + } else + return wrong_size_cmpxchg(ptr); +} + +#define cmpxchg(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ +}) + +#endif + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) diff -Nurb linux-2.6.22-570/include/asm-cris/page.h linux-2.6.22-591/include/asm-cris/page.h --- linux-2.6.22-570/include/asm-cris/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-cris/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -20,7 +20,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -Nurb linux-2.6.22-570/include/asm-generic/kgdb.h linux-2.6.22-591/include/asm-generic/kgdb.h --- linux-2.6.22-570/include/asm-generic/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-generic/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,100 @@ +/* + * include/asm-generic/kgdb.h + * + * This provides the assembly level information so that KGDB can provide + * a GDB that has been patched with enough information to know to stop + * trying to unwind the function. + * + * Author: Tom Rini + * + * 2005 (c) MontaVista Software, Inc. + * 2006 (c) Embedded Alley Solutions, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_GENERIC_KGDB_H__ +#define __ASM_GENERIC_KGDB_H__ + +#ifdef CONFIG_X86 +/** + * kgdb_skipexception - Bail of of KGDB when we've been triggered. + * @exception: Exception vector number + * @regs: Current &struct pt_regs. + * + * On some architectures we need to skip a breakpoint exception when + * it occurs after a breakpoint has been removed. + */ +int kgdb_skipexception(int exception, struct pt_regs *regs); +#else +#define kgdb_skipexception(exception, regs) 0 +#endif + +#if defined(CONFIG_X86) +/** + * kgdb_post_master_code - Save error vector/code numbers. + * @regs: Original pt_regs. + * @e_vector: Original error vector. + * @err_code: Original error code. + * + * This is needed on architectures which support SMP and KGDB. + * This function is called after all the slave cpus have been put + * to a know spin state and the master CPU has control over KGDB. + */ +extern void kgdb_post_master_code(struct pt_regs *regs, int e_vector, + int err_code); + +/** + * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. + * @regs: Current &struct pt_regs. + * + * This function will be called if the particular architecture must + * disable hardware debugging while it is processing gdb packets or + * handling exception. + */ +extern void kgdb_disable_hw_debug(struct pt_regs *regs); +#else +#define kgdb_disable_hw_debug(regs) do { } while (0) +#define kgdb_post_master_code(regs, v, c) do { } while (0) +#endif + +#ifdef CONFIG_KGDB_ARCH_HAS_SHADOW_INFO +/** + * kgdb_shadowinfo - Get shadowed information on @threadid. + * @regs: The &struct pt_regs of the current process. + * @buffer: A buffer of %BUFMAX size. + * @threadid: The thread id of the shadowed process to get information on. + */ +extern void kgdb_shadowinfo(struct pt_regs *regs, char *buffer, + unsigned threadid); + +/** + * kgdb_get_shadow_thread - Get the shadowed &task_struct of @threadid. + * @regs: The &struct pt_regs of the current thread. + * @threadid: The thread id of the shadowed process to get information on. + * + * RETURN: + * This returns a pointer to the &struct task_struct of the shadowed + * thread, @threadid. + */ +extern struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs, + int threadid); + +/** + * kgdb_shadow_regs - Return the shadowed registers of @threadid. + * @regs: The &struct pt_regs of the current thread. + * @threadid: The thread id we want the &struct pt_regs for. + * + * RETURN: + * The a pointer to the &struct pt_regs of the shadowed thread @threadid. + */ +extern struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid); +#else +#define kgdb_shadowinfo(regs, buf, threadid) do { } while (0) +#define kgdb_get_shadow_thread(regs, threadid) NULL +#define kgdb_shadow_regs(regs, threadid) NULL +#endif + +#endif /* __ASM_GENERIC_KGDB_H__ */ diff -Nurb linux-2.6.22-570/include/asm-generic/vmlinux.lds.h linux-2.6.22-591/include/asm-generic/vmlinux.lds.h --- linux-2.6.22-570/include/asm-generic/vmlinux.lds.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-generic/vmlinux.lds.h 2007-12-21 15:36:12.000000000 -0500 @@ -127,6 +127,8 @@ *(__ksymtab_strings) \ } \ \ + EH_FRAME \ + \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___param) = .; \ @@ -177,6 +179,26 @@ *(.kprobes.text) \ VMLINUX_SYMBOL(__kprobes_text_end) = .; +#ifdef CONFIG_STACK_UNWIND +#define EH_FRAME \ + /* Unwind data binary search table */ \ + . = ALIGN(8); \ + .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ + *(.eh_frame_hdr) \ + VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ + } \ + /* Unwind data */ \ + . = ALIGN(8); \ + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind) = .; \ + *(.eh_frame) \ + VMLINUX_SYMBOL(__end_unwind) = .; \ + } +#else +#define EH_FRAME +#endif + /* DWARF debug sections. Symbols in the DWARF debugging sections are relative to the beginning of the section so we begin them at 0. */ diff -Nurb linux-2.6.22-570/include/asm-h8300/page.h linux-2.6.22-591/include/asm-h8300/page.h --- linux-2.6.22-570/include/asm-h8300/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-h8300/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -22,7 +22,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -Nurb linux-2.6.22-570/include/asm-i386/kdebug.h linux-2.6.22-591/include/asm-i386/kdebug.h --- linux-2.6.22-570/include/asm-i386/kdebug.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-i386/kdebug.h 2007-12-21 15:36:12.000000000 -0500 @@ -28,6 +28,7 @@ DIE_CALL, DIE_NMI_IPI, DIE_PAGE_FAULT, + DIE_PAGE_FAULT_NO_CONTEXT, }; #endif diff -Nurb linux-2.6.22-570/include/asm-i386/kgdb.h linux-2.6.22-591/include/asm-i386/kgdb.h --- linux-2.6.22-570/include/asm-i386/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-i386/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,51 @@ +#ifdef __KERNEL__ +#ifndef _ASM_KGDB_H_ +#define _ASM_KGDB_H_ + +#include + +/* + * Copyright (C) 2001-2004 Amit S. Kale + */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 1024 + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* Number of bytes of registers we need to save for a setjmp/longjmp. */ +#define NUMCRITREGBYTES 24 + +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC, /* 8 also known as eip */ + _PS, /* 9 also known as eflags */ + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +#define BREAKPOINT() asm(" int $3"); +#define BREAK_INSTR_SIZE 1 +#define CACHE_FLUSH_IS_SAFE 1 +#endif /* _ASM_KGDB_H_ */ +#endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/asm-i386/page.h linux-2.6.22-591/include/asm-i386/page.h --- linux-2.6.22-570/include/asm-i386/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-i386/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -34,7 +34,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -Nurb linux-2.6.22-570/include/asm-i386/unistd.h linux-2.6.22-591/include/asm-i386/unistd.h --- linux-2.6.22-570/include/asm-i386/unistd.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-i386/unistd.h 2007-12-21 15:36:12.000000000 -0500 @@ -329,10 +329,13 @@ #define __NR_signalfd 321 #define __NR_timerfd 322 #define __NR_eventfd 323 +#define __NR_revokeat 324 +#define __NR_frevoke 325 +#define __NR_fallocate 326 #ifdef __KERNEL__ -#define NR_syscalls 324 +#define NR_syscalls 327 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff -Nurb linux-2.6.22-570/include/asm-i386/unwind.h linux-2.6.22-591/include/asm-i386/unwind.h --- linux-2.6.22-570/include/asm-i386/unwind.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-i386/unwind.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,6 +1,95 @@ #ifndef _ASM_I386_UNWIND_H #define _ASM_I386_UNWIND_H +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + */ + +#ifdef CONFIG_STACK_UNWIND + +#include +#include +#include +#include + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#define UNW_PC(frame) (frame)->regs.eip +#define UNW_SP(frame) (frame)->regs.esp +#ifdef CONFIG_FRAME_POINTER +#define UNW_FP(frame) (frame)->regs.ebp +#define FRAME_RETADDR_OFFSET 4 +#define FRAME_LINK_OFFSET 0 +#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0) +#define STACK_TOP(tsk) ((tsk)->thread.esp0) +#else +#define UNW_FP(frame) ((void)(frame), 0) +#endif +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(eax), \ + PTREGS_INFO(ecx), \ + PTREGS_INFO(edx), \ + PTREGS_INFO(ebx), \ + PTREGS_INFO(esp), \ + PTREGS_INFO(ebp), \ + PTREGS_INFO(esi), \ + PTREGS_INFO(edi), \ + PTREGS_INFO(eip) + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 4)) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ + if (user_mode_vm(regs)) + info->regs = *regs; + else { + memcpy(&info->regs, regs, offsetof(struct pt_regs, esp)); + info->regs.esp = (unsigned long)®s->esp; + info->regs.xss = __KERNEL_DS; + } +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.eip = info->task->thread.eip; + info->regs.xcs = __KERNEL_CS; + __get_user(info->regs.ebp, (long *)info->task->thread.esp); + info->regs.esp = info->task->thread.esp; + info->regs.xss = __KERNEL_DS; + info->regs.xds = __USER_DS; + info->regs.xes = __USER_DS; + info->regs.xfs = __KERNEL_PERCPU; +} + +extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) +{ + return user_mode_vm(&info->regs) + || info->regs.eip < PAGE_OFFSET + || (info->regs.eip >= __fix_to_virt(FIX_VDSO) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + || info->regs.esp < PAGE_OFFSET; +} + +#else + #define UNW_PC(frame) ((void)(frame), 0) #define UNW_SP(frame) ((void)(frame), 0) #define UNW_FP(frame) ((void)(frame), 0) @@ -10,4 +99,6 @@ return 0; } +#endif + #endif /* _ASM_I386_UNWIND_H */ diff -Nurb linux-2.6.22-570/include/asm-ia64/kdebug.h linux-2.6.22-591/include/asm-ia64/kdebug.h --- linux-2.6.22-570/include/asm-ia64/kdebug.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-ia64/kdebug.h 2007-12-21 15:36:12.000000000 -0500 @@ -69,6 +69,7 @@ DIE_KDEBUG_LEAVE, DIE_KDUMP_ENTER, DIE_KDUMP_LEAVE, + DIE_PAGE_FAULT_NO_CONTEXT, }; #endif diff -Nurb linux-2.6.22-570/include/asm-ia64/kgdb.h linux-2.6.22-591/include/asm-ia64/kgdb.h --- linux-2.6.22-570/include/asm-ia64/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-ia64/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,37 @@ +#ifdef __KERNEL__ +#ifndef _ASM_KGDB_H_ +#define _ASM_KGDB_H_ + +/* + * Copyright (C) 2001-2004 Amit S. Kale + */ + +#include +#include + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 1024 + +/* Number of bytes of registers. We set this to 0 so that certain GDB + * packets will fail, forcing the use of others, which are more friendly + * on ia64. */ +#define NUMREGBYTES 0 + +#define NUMCRITREGBYTES (70*8) +#define JMP_REGS_ALIGNMENT __attribute__ ((aligned (16))) + +#define BREAKNUM 0x00003333300LL +#define KGDBBREAKNUM 0x6665UL +#define BREAKPOINT() asm volatile ("break.m 0x6665") +#define BREAK_INSTR_SIZE 16 +#define CACHE_FLUSH_IS_SAFE 1 + +struct pt_regs; +extern volatile int kgdb_hwbreak_sstep[NR_CPUS]; +extern void smp_send_nmi_allbutself(void); +extern void kgdb_wait_ipi(struct pt_regs *); +#endif /* _ASM_KGDB_H_ */ +#endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/asm-ia64/page.h linux-2.6.22-591/include/asm-ia64/page.h --- linux-2.6.22-570/include/asm-ia64/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-ia64/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -87,9 +87,10 @@ } while (0) -#define alloc_zeroed_user_highpage(vma, vaddr) \ +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ({ \ - struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \ + struct page *page = alloc_page_vma( \ + GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr); \ if (page) \ flush_dcache_page(page); \ page; \ diff -Nurb linux-2.6.22-570/include/asm-ia64/processor.h linux-2.6.22-591/include/asm-ia64/processor.h --- linux-2.6.22-570/include/asm-ia64/processor.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-ia64/processor.h 2007-12-21 15:36:12.000000000 -0500 @@ -295,9 +295,9 @@ regs->ar_bspstore = current->thread.rbs_bot; \ regs->ar_fpsr = FPSR_DEFAULT; \ regs->loadrs = 0; \ - regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \ + regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ - if (unlikely(!current->mm->dumpable)) { \ + if (unlikely(!get_dumpable(current->mm))) { \ /* \ * Zap scratch regs to avoid leaking bits between processes with different \ * uid/privileges. \ diff -Nurb linux-2.6.22-570/include/asm-m32r/page.h linux-2.6.22-591/include/asm-m32r/page.h --- linux-2.6.22-570/include/asm-m32r/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-m32r/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -15,7 +15,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -Nurb linux-2.6.22-570/include/asm-m68knommu/page.h linux-2.6.22-591/include/asm-m68knommu/page.h --- linux-2.6.22-570/include/asm-m68knommu/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-m68knommu/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -22,7 +22,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-32.h linux-2.6.22-591/include/asm-mips/asmmacro-32.h --- linux-2.6.22-570/include/asm-mips/asmmacro-32.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-mips/asmmacro-32.h 2007-12-21 15:36:12.000000000 -0500 @@ -11,6 +11,28 @@ #include #include #include +#include + + .macro fpu_save_double_kgdb stack status tmp1=t0 + cfc1 \tmp1, fcr31 + sdc1 $f0, GDB_FR_FPR0(\stack) + sdc1 $f2, GDB_FR_FPR2(\stack) + sdc1 $f4, GDB_FR_FPR4(\stack) + sdc1 $f6, GDB_FR_FPR6(\stack) + sdc1 $f8, GDB_FR_FPR8(\stack) + sdc1 $f10, GDB_FR_FPR10(\stack) + sdc1 $f12, GDB_FR_FPR12(\stack) + sdc1 $f14, GDB_FR_FPR14(\stack) + sdc1 $f16, GDB_FR_FPR16(\stack) + sdc1 $f18, GDB_FR_FPR18(\stack) + sdc1 $f20, GDB_FR_FPR20(\stack) + sdc1 $f22, GDB_FR_FPR22(\stack) + sdc1 $f24, GDB_FR_FPR24(\stack) + sdc1 $f26, GDB_FR_FPR26(\stack) + sdc1 $f28, GDB_FR_FPR28(\stack) + sdc1 $f30, GDB_FR_FPR30(\stack) + sw \tmp1, GDB_FR_FSR(\stack) + .endm .macro fpu_save_double thread status tmp1=t0 cfc1 \tmp1, fcr31 @@ -91,6 +113,27 @@ ctc1 \tmp, fcr31 .endm + .macro fpu_restore_double_kgdb stack status tmp=t0 + lw \tmp, GDB_FR_FSR(\stack) + ldc1 $f0, GDB_FR_FPR0(\stack) + ldc1 $f2, GDB_FR_FPR2(\stack) + ldc1 $f4, GDB_FR_FPR4(\stack) + ldc1 $f6, GDB_FR_FPR6(\stack) + ldc1 $f8, GDB_FR_FPR8(\stack) + ldc1 $f10, GDB_FR_FPR10(\stack) + ldc1 $f12, GDB_FR_FPR12(\stack) + ldc1 $f14, GDB_FR_FPR14(\stack) + ldc1 $f16, GDB_FR_FPR16(\stack) + ldc1 $f18, GDB_FR_FPR18(\stack) + ldc1 $f20, GDB_FR_FPR20(\stack) + ldc1 $f22, GDB_FR_FPR22(\stack) + ldc1 $f24, GDB_FR_FPR24(\stack) + ldc1 $f26, GDB_FR_FPR26(\stack) + ldc1 $f28, GDB_FR_FPR28(\stack) + ldc1 $f30, GDB_FR_FPR30(\stack) + ctc1 \tmp, fcr31 + .endm + .macro fpu_restore_single thread tmp=t0 lw \tmp, THREAD_FCR31(\thread) lwc1 $f0, THREAD_FPR0(\thread) diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-64.h linux-2.6.22-591/include/asm-mips/asmmacro-64.h --- linux-2.6.22-570/include/asm-mips/asmmacro-64.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-mips/asmmacro-64.h 2007-12-21 15:36:12.000000000 -0500 @@ -12,6 +12,7 @@ #include #include #include +#include .macro fpu_save_16even thread tmp=t0 cfc1 \tmp, fcr31 @@ -53,6 +54,46 @@ sdc1 $f31, THREAD_FPR31(\thread) .endm + .macro fpu_save_16odd_kgdb stack + sdc1 $f1, GDB_FR_FPR1(\stack) + sdc1 $f3, GDB_FR_FPR3(\stack) + sdc1 $f5, GDB_FR_FPR5(\stack) + sdc1 $f7, GDB_FR_FPR7(\stack) + sdc1 $f9, GDB_FR_FPR9(\stack) + sdc1 $f11, GDB_FR_FPR11(\stack) + sdc1 $f13, GDB_FR_FPR13(\stack) + sdc1 $f15, GDB_FR_FPR15(\stack) + sdc1 $f17, GDB_FR_FPR17(\stack) + sdc1 $f19, GDB_FR_FPR19(\stack) + sdc1 $f21, GDB_FR_FPR21(\stack) + sdc1 $f23, GDB_FR_FPR23(\stack) + sdc1 $f25, GDB_FR_FPR25(\stack) + sdc1 $f27, GDB_FR_FPR27(\stack) + sdc1 $f29, GDB_FR_FPR29(\stack) + sdc1 $f31, GDB_FR_FPR31(\stack) + .endm + + .macro fpu_save_16even_kgdb stack tmp=t0 + cfc1 \tmp, fcr31 + sdc1 $f0, GDB_FR_FPR0(\stack) + sdc1 $f2, GDB_FR_FPR2(\stack) + sdc1 $f4, GDB_FR_FPR4(\stack) + sdc1 $f6, GDB_FR_FPR6(\stack) + sdc1 $f8, GDB_FR_FPR8(\stack) + sdc1 $f10, GDB_FR_FPR10(\stack) + sdc1 $f12, GDB_FR_FPR12(\stack) + sdc1 $f14, GDB_FR_FPR14(\stack) + sdc1 $f16, GDB_FR_FPR16(\stack) + sdc1 $f18, GDB_FR_FPR18(\stack) + sdc1 $f20, GDB_FR_FPR20(\stack) + sdc1 $f22, GDB_FR_FPR22(\stack) + sdc1 $f24, GDB_FR_FPR24(\stack) + sdc1 $f26, GDB_FR_FPR26(\stack) + sdc1 $f28, GDB_FR_FPR28(\stack) + sdc1 $f30, GDB_FR_FPR30(\stack) + sw \tmp, GDB_FR_FSR(\stack) + .endm + .macro fpu_save_double thread status tmp sll \tmp, \status, 5 bgez \tmp, 2f @@ -61,6 +102,15 @@ fpu_save_16even \thread \tmp .endm + .macro fpu_save_double_kgdb stack status tmp + sll \tmp, \status, 5 + bgez \tmp, 2f + nop + fpu_save_16odd_kgdb \stack +2: + fpu_save_16even_kgdb \stack \tmp + .endm + .macro fpu_restore_16even thread tmp=t0 lw \tmp, THREAD_FCR31(\thread) ldc1 $f0, THREAD_FPR0(\thread) @@ -101,6 +151,46 @@ ldc1 $f31, THREAD_FPR31(\thread) .endm + .macro fpu_restore_16even_kgdb stack tmp=t0 + lw \tmp, GDB_FR_FSR(\stack) + ldc1 $f0, GDB_FR_FPR0(\stack) + ldc1 $f2, GDB_FR_FPR2(\stack) + ldc1 $f4, GDB_FR_FPR4(\stack) + ldc1 $f6, GDB_FR_FPR6(\stack) + ldc1 $f8, GDB_FR_FPR8(\stack) + ldc1 $f10, GDB_FR_FPR10(\stack) + ldc1 $f12, GDB_FR_FPR12(\stack) + ldc1 $f14, GDB_FR_FPR14(\stack) + ldc1 $f16, GDB_FR_FPR16(\stack) + ldc1 $f18, GDB_FR_FPR18(\stack) + ldc1 $f20, GDB_FR_FPR20(\stack) + ldc1 $f22, GDB_FR_FPR22(\stack) + ldc1 $f24, GDB_FR_FPR24(\stack) + ldc1 $f26, GDB_FR_FPR26(\stack) + ldc1 $f28, GDB_FR_FPR28(\stack) + ldc1 $f30, GDB_FR_FPR30(\stack) + ctc1 \tmp, fcr31 + .endm + + .macro fpu_restore_16odd_kgdb stack + ldc1 $f1, GDB_FR_FPR1(\stack) + ldc1 $f3, GDB_FR_FPR3(\stack) + ldc1 $f5, GDB_FR_FPR5(\stack) + ldc1 $f7, GDB_FR_FPR7(\stack) + ldc1 $f9, GDB_FR_FPR9(\stack) + ldc1 $f11, GDB_FR_FPR11(\stack) + ldc1 $f13, GDB_FR_FPR13(\stack) + ldc1 $f15, GDB_FR_FPR15(\stack) + ldc1 $f17, GDB_FR_FPR17(\stack) + ldc1 $f19, GDB_FR_FPR19(\stack) + ldc1 $f21, GDB_FR_FPR21(\stack) + ldc1 $f23, GDB_FR_FPR23(\stack) + ldc1 $f25, GDB_FR_FPR25(\stack) + ldc1 $f27, GDB_FR_FPR27(\stack) + ldc1 $f29, GDB_FR_FPR29(\stack) + ldc1 $f31, GDB_FR_FPR31(\stack) + .endm + .macro fpu_restore_double thread status tmp sll \tmp, \status, 5 bgez \tmp, 1f # 16 register mode? @@ -109,6 +199,15 @@ 1: fpu_restore_16even \thread \tmp .endm + .macro fpu_restore_double_kgdb stack status tmp + sll \tmp, \status, 5 + bgez \tmp, 1f # 16 register mode? + nop + + fpu_restore_16odd_kgdb \stack +1: fpu_restore_16even_kgdb \stack \tmp + .endm + .macro cpu_save_nonscratch thread LONG_S s0, THREAD_REG16(\thread) LONG_S s1, THREAD_REG17(\thread) diff -Nurb linux-2.6.22-570/include/asm-mips/kdebug.h linux-2.6.22-591/include/asm-mips/kdebug.h --- linux-2.6.22-570/include/asm-mips/kdebug.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-mips/kdebug.h 2007-12-21 15:36:12.000000000 -0500 @@ -1 +1,30 @@ -#include +/* + * + * Copyright (C) 2004 MontaVista Software Inc. + * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef _MIPS_KDEBUG_H +#define _MIPS_KDEBUG_H + +#include + +struct pt_regs; + +extern struct atomic_notifier_head mips_die_head; + +enum die_val { + DIE_OOPS = 1, + DIE_PANIC, + DIE_DIE, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_PAGE_FAULT, +}; + +#endif /* _MIPS_KDEBUG_H */ diff -Nurb linux-2.6.22-570/include/asm-mips/kgdb.h linux-2.6.22-591/include/asm-mips/kgdb.h --- linux-2.6.22-570/include/asm-mips/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-mips/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,41 @@ +#ifdef __KERNEL__ +#ifndef _ASM_KGDB_H_ +#define _ASM_KGDB_H_ + +#include +#include + +#ifndef __ASSEMBLY__ +#if (_MIPS_ISA == _MIPS_ISA_MIPS1) || (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS32) + +typedef u32 gdb_reg_t; + +#elif (_MIPS_ISA == _MIPS_ISA_MIPS3) || (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS64) + +#ifdef CONFIG_32BIT +typedef u32 gdb_reg_t; +#else /* CONFIG_CPU_32BIT */ +typedef u64 gdb_reg_t; +#endif +#else +#error "Need to set typedef for gdb_reg_t" +#endif /* _MIPS_ISA */ + +#define BUFMAX 2048 +#define NUMREGBYTES (90*sizeof(gdb_reg_t)) +#define NUMCRITREGBYTES (12*sizeof(gdb_reg_t)) +#define BREAK_INSTR_SIZE 4 +#define BREAKPOINT() __asm__ __volatile__( \ + ".globl breakinst\n\t" \ + ".set\tnoreorder\n\t" \ + "nop\n" \ + "breakinst:\tbreak\n\t" \ + "nop\n\t" \ + ".set\treorder") +#define CACHE_FLUSH_IS_SAFE 0 + +extern int kgdb_early_setup; + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_KGDB_H_ */ +#endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/asm-mips/ptrace.h linux-2.6.22-591/include/asm-mips/ptrace.h --- linux-2.6.22-570/include/asm-mips/ptrace.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-mips/ptrace.h 2007-12-21 15:36:12.000000000 -0500 @@ -28,7 +28,7 @@ * system call/exception. As usual the registers k0/k1 aren't being saved. */ struct pt_regs { -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) || defined(CONFIG_KGDB) /* Pad bytes for argument save space on the stack. */ unsigned long pad0[6]; #endif diff -Nurb linux-2.6.22-570/include/asm-powerpc/cputable.h linux-2.6.22-591/include/asm-powerpc/cputable.h --- linux-2.6.22-570/include/asm-powerpc/cputable.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/cputable.h 2007-12-21 15:36:12.000000000 -0500 @@ -111,7 +111,7 @@ /* CPU kernel features */ /* Retain the 32b definitions all use bottom half of word */ -#define CPU_FTR_SPLIT_ID_CACHE ASM_CONST(0x0000000000000001) +#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000000000000001) #define CPU_FTR_L2CR ASM_CONST(0x0000000000000002) #define CPU_FTR_SPEC7450 ASM_CONST(0x0000000000000004) #define CPU_FTR_ALTIVEC ASM_CONST(0x0000000000000008) @@ -135,6 +135,7 @@ #define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000) #define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000) #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x0000000000800000) +#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x0000000001000000) /* * Add the 64-bit processor unique features in the top half of the word; @@ -154,7 +155,6 @@ #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000) #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000) #define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000) -#define CPU_FTR_COHERENT_ICACHE LONG_ASM_CONST(0x0000020000000000) #define CPU_FTR_LOCKLESS_TLBIE LONG_ASM_CONST(0x0000040000000000) #define CPU_FTR_CI_LARGE_PAGE LONG_ASM_CONST(0x0000100000000000) #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000) @@ -206,164 +206,163 @@ !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \ !defined(CONFIG_BOOKE)) -#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE) -#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE | \ + CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) +#define CPU_FTRS_603 (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) -#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_604 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE | \ CPU_FTR_PPC_LE) -#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) -#define CPU_FTRS_740 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_740 (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_PPC_LE) -#define CPU_FTRS_750 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_750 (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_PPC_LE) -#define CPU_FTRS_750CL (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_750CL (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) -#define CPU_FTRS_750FX1 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_750FX1 (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM | CPU_FTR_PPC_LE) -#define CPU_FTRS_750FX2 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_750FX2 (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NO_DPM | CPU_FTR_PPC_LE) -#define CPU_FTRS_750FX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_750FX (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) -#define CPU_FTRS_750GX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_750GX (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) -#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) -#define CPU_FTRS_7400 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7400 (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) -#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \ CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ CPU_FTR_NEED_COHERENT | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) -#define CPU_FTRS_7455 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7455 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE) -#define CPU_FTRS_7447 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7447 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7447A (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7447A (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) -#define CPU_FTRS_7448 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_7448 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_PPC_LE) -#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_82XX (CPU_FTR_COMMON | \ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB) -#define CPU_FTRS_G2_LE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \ +#define CPU_FTRS_G2_LE (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS) -#define CPU_FTRS_E300 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \ +#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_COMMON) -#define CPU_FTRS_E300C2 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \ +#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) -#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ +#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | \ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE) -#define CPU_FTRS_8XX (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB) -#define CPU_FTRS_40X (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_NODSISRALIGN) -#define CPU_FTRS_44X (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_NODSISRALIGN) -#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) -#define CPU_FTRS_E500 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_NODSISRALIGN) -#define CPU_FTRS_E500_2 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_8XX (CPU_FTR_USE_TB) +#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) +#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) +#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) +#define CPU_FTRS_E500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) +#define CPU_FTRS_E500_2 (CPU_FTR_USE_TB | \ CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ -#define CPU_FTRS_POWER3 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | CPU_FTR_PPC_LE) -#define CPU_FTRS_RS64 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \ CPU_FTR_MMCRA | CPU_FTR_CTRL) -#define CPU_FTRS_POWER4 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA) -#define CPU_FTRS_PPC970 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA) -#define CPU_FTRS_POWER5 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR) -#define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR) -#define CPU_FTRS_CELL (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_CELL (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_CELL_TB_BUG) -#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \ CPU_FTR_PURR | CPU_FTR_REAL_LE) -#define CPU_FTRS_COMPATIBLE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2) #ifdef __powerpc64__ diff -Nurb linux-2.6.22-570/include/asm-powerpc/floppy.h linux-2.6.22-591/include/asm-powerpc/floppy.h --- linux-2.6.22-570/include/asm-powerpc/floppy.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/floppy.h 2007-12-21 15:36:12.000000000 -0500 @@ -29,7 +29,7 @@ #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL); #include -#include /* for ppc64_isabridge_dev */ +#include /* for isa_bridge_pcidev */ #define fd_dma_setup(addr,size,mode,io) fd_ops->_dma_setup(addr,size,mode,io) @@ -139,12 +139,12 @@ if (bus_addr && (addr != prev_addr || size != prev_size || dir != prev_dir)) { /* different from last time -- unmap prev */ - pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir); + pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir); bus_addr = 0; } if (!bus_addr) /* need to map it */ - bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir); + bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir); /* remember this one as prev */ prev_addr = addr; diff -Nurb linux-2.6.22-570/include/asm-powerpc/io.h linux-2.6.22-591/include/asm-powerpc/io.h --- linux-2.6.22-570/include/asm-powerpc/io.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/io.h 2007-12-21 15:36:12.000000000 -0500 @@ -607,9 +607,9 @@ * * * iounmap undoes such a mapping and can be hooked * - * * __ioremap_explicit (and the pending __iounmap_explicit) are low level - * functions to create hand-made mappings for use only by the PCI code - * and cannot currently be hooked. + * * __ioremap_at (and the pending __iounmap_at) are low level functions to + * create hand-made mappings for use only by the PCI code and cannot + * currently be hooked. Must be page aligned. * * * __ioremap is the low level implementation used by ioremap and * ioremap_flags and cannot be hooked (but can be used by a hook on one @@ -629,19 +629,9 @@ unsigned long flags); extern void __iounmap(volatile void __iomem *addr); -extern int __ioremap_explicit(phys_addr_t p_addr, unsigned long v_addr, +extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, unsigned long flags); -extern int __iounmap_explicit(volatile void __iomem *start, - unsigned long size); - -extern void __iomem * reserve_phb_iospace(unsigned long size); - -/* Those are more 32 bits only functions */ -extern unsigned long iopa(unsigned long addr); -extern unsigned long mm_ptov(unsigned long addr) __attribute_const__; -extern void io_block_mapping(unsigned long virt, phys_addr_t phys, - unsigned int size, int flags); - +extern void __iounmap_at(void *ea, unsigned long size); /* * When CONFIG_PPC_INDIRECT_IO is set, we use the generic iomap implementation @@ -651,8 +641,8 @@ */ #define HAVE_ARCH_PIO_SIZE 1 #define PIO_OFFSET 0x00000000UL -#define PIO_MASK 0x3fffffffUL -#define PIO_RESERVED 0x40000000UL +#define PIO_MASK (FULL_IO_SIZE - 1) +#define PIO_RESERVED (FULL_IO_SIZE) #define mmio_read16be(addr) readw_be(addr) #define mmio_read32be(addr) readl_be(addr) diff -Nurb linux-2.6.22-570/include/asm-powerpc/kgdb.h linux-2.6.22-591/include/asm-powerpc/kgdb.h --- linux-2.6.22-570/include/asm-powerpc/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-powerpc/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,75 @@ +/* + * include/asm-powerpc/kgdb.h + * + * The PowerPC (32/64) specific defines / externs for KGDB. Based on + * the previous 32bit and 64bit specific files, which had the following + * copyrights: + * + * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) + * PPC Mods (C) 2004 Tom Rini (trini@mvista.com) + * PPC Mods (C) 2003 John Whitney (john.whitney@timesys.com) + * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu) + * + * + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Author: Tom Rini + * + * 2006 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#ifdef __KERNEL__ +#ifndef __POWERPC_KGDB_H__ +#define __POWERPC_KGDB_H__ + +#include + +#ifndef __ASSEMBLY__ + +#define BREAK_INSTR_SIZE 4 +#define BUFMAX ((NUMREGBYTES * 2) + 512) +#define OUTBUFMAX ((NUMREGBYTES * 2) + 512) +#define BREAKPOINT() asm(".long 0x7d821008"); /* twge r2, r2 */ +#define CACHE_FLUSH_IS_SAFE 1 + +/* The number bytes of registers we have to save depends on a few + * things. For 64bit we default to not including vector registers and + * vector state registers. */ +#ifdef CONFIG_PPC64 +/* + * 64 bit (8 byte) registers: + * 32 gpr, 32 fpr, nip, msr, link, ctr + * 32 bit (4 byte) registers: + * ccr, xer, fpscr + */ +#define NUMREGBYTES ((68 * 8) + (3 * 4)) +#if 0 +/* The following adds in vector registers and vector state registers. */ +/* 128 bit (16 byte) registers: + * 32 vr + * 64 bit (8 byte) registers: + * 32 gpr, 32 fpr, nip, msr, link, ctr + * 32 bit (4 byte) registers: + * ccr, xer, fpscr, vscr, vrsave + */ +#define NUMREGBYTES ((128 * 16) + (68 * 8) + (5 * 4)) +#endif +#define NUMCRITREGBYTES 184 +#else /* CONFIG_PPC32 */ +/* On non-E500 family PPC32 we determine the size by picking the last + * register we need, but on E500 we skip sections so we list what we + * need to store, and add it up. */ +#ifndef CONFIG_E500 +#define MAXREG (PT_FPSCR+1) +#else +/* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/ +#define MAXREG ((32*2)+6+2+1) +#endif +#define NUMREGBYTES (MAXREG * sizeof(int)) +/* CR/LR, R1, R2, R13-R31 inclusive. */ +#define NUMCRITREGBYTES (23 * sizeof(int)) +#endif /* 32/64 */ +#endif /* !(__ASSEMBLY__) */ +#endif /* !__POWERPC_KGDB_H__ */ +#endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/lppaca.h linux-2.6.22-591/include/asm-powerpc/lppaca.h --- linux-2.6.22-570/include/asm-powerpc/lppaca.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/lppaca.h 2007-12-21 15:36:12.000000000 -0500 @@ -98,7 +98,7 @@ u64 saved_gpr5; // Saved GPR5 x30-x37 u8 reserved4; // Reserved x38-x38 - u8 cpuctls_task_attrs; // Task attributes for cpuctls x39-x39 + u8 donate_dedicated_cpu; // Donate dedicated CPU cycles x39-x39 u8 fpregs_in_use; // FP regs in use x3A-x3A u8 pmcregs_in_use; // PMC regs in use x3B-x3B volatile u32 saved_decr; // Saved Decr Value x3C-x3F diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h linux-2.6.22-591/include/asm-powerpc/mmu-hash32.h --- linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-powerpc/mmu-hash32.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,91 @@ +#ifndef _ASM_POWERPC_MMU_HASH32_H_ +#define _ASM_POWERPC_MMU_HASH32_H_ +/* + * 32-bit hash table MMU support + */ + +/* + * BATs + */ + +/* Block size masks */ +#define BL_128K 0x000 +#define BL_256K 0x001 +#define BL_512K 0x003 +#define BL_1M 0x007 +#define BL_2M 0x00F +#define BL_4M 0x01F +#define BL_8M 0x03F +#define BL_16M 0x07F +#define BL_32M 0x0FF +#define BL_64M 0x1FF +#define BL_128M 0x3FF +#define BL_256M 0x7FF + +/* BAT Access Protection */ +#define BPP_XX 0x00 /* No access */ +#define BPP_RX 0x01 /* Read only */ +#define BPP_RW 0x02 /* Read/write */ + +#ifndef __ASSEMBLY__ +struct ppc_bat { + struct { + unsigned long bepi:15; /* Effective page index (virtual address) */ + unsigned long :4; /* Unused */ + unsigned long bl:11; /* Block size mask */ + unsigned long vs:1; /* Supervisor valid */ + unsigned long vp:1; /* User valid */ + } batu; /* Upper register */ + struct { + unsigned long brpn:15; /* Real page index (physical address) */ + unsigned long :10; /* Unused */ + unsigned long w:1; /* Write-thru cache */ + unsigned long i:1; /* Cache inhibit */ + unsigned long m:1; /* Memory coherence */ + unsigned long g:1; /* Guarded (MBZ in IBAT) */ + unsigned long :1; /* Unused */ + unsigned long pp:2; /* Page access protections */ + } batl; /* Lower register */ +}; +#endif /* !__ASSEMBLY__ */ + +/* + * Hash table + */ + +/* Values for PP (assumes Ks=0, Kp=1) */ +#define PP_RWXX 0 /* Supervisor read/write, User none */ +#define PP_RWRX 1 /* Supervisor read/write, User read */ +#define PP_RWRW 2 /* Supervisor read/write, User read/write */ +#define PP_RXRX 3 /* Supervisor read, User read */ + +#ifndef __ASSEMBLY__ + +/* Hardware Page Table Entry */ +struct hash_pte { + unsigned long v:1; /* Entry is valid */ + unsigned long vsid:24; /* Virtual segment identifier */ + unsigned long h:1; /* Hash algorithm indicator */ + unsigned long api:6; /* Abbreviated page index */ + unsigned long rpn:20; /* Real (physical) page number */ + unsigned long :3; /* Unused */ + unsigned long r:1; /* Referenced */ + unsigned long c:1; /* Changed */ + unsigned long w:1; /* Write-thru cache mode */ + unsigned long i:1; /* Cache inhibited */ + unsigned long m:1; /* Memory coherence */ + unsigned long g:1; /* Guarded */ + unsigned long :1; /* Unused */ + unsigned long pp:2; /* Page protection */ +}; + +typedef struct { + unsigned long id; + unsigned long vdso_base; +} mm_context_t; + +typedef unsigned long phys_addr_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_MMU_HASH32_H_ */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h linux-2.6.22-591/include/asm-powerpc/mmu-hash64.h --- linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/mmu-hash64.h 2007-12-21 15:36:12.000000000 -0500 @@ -103,12 +103,12 @@ #ifndef __ASSEMBLY__ -typedef struct { +struct hash_pte { unsigned long v; unsigned long r; -} hpte_t; +}; -extern hpte_t *htab_address; +extern struct hash_pte *htab_address; extern unsigned long htab_size_bytes; extern unsigned long htab_hash_mask; diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu.h linux-2.6.22-591/include/asm-powerpc/mmu.h --- linux-2.6.22-570/include/asm-powerpc/mmu.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/mmu.h 2007-12-21 15:36:12.000000000 -0500 @@ -5,6 +5,9 @@ #ifdef CONFIG_PPC64 /* 64-bit classic hash table MMU */ # include +#elif defined(CONFIG_PPC_STD_MMU) +/* 32-bit classic hash table MMU */ +# include #elif defined(CONFIG_44x) /* 44x-style software loaded TLB */ # include diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci-bridge.h linux-2.6.22-591/include/asm-powerpc/pci-bridge.h --- linux-2.6.22-570/include/asm-powerpc/pci-bridge.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/pci-bridge.h 2007-12-21 15:36:12.000000000 -0500 @@ -31,6 +31,7 @@ int last_busno; void __iomem *io_base_virt; + void *io_base_alloc; resource_size_t io_base_phys; /* Some machines have a non 1:1 mapping of @@ -70,19 +71,22 @@ int devfn; /* pci device and function number */ int class_code; /* pci device class */ -#ifdef CONFIG_PPC_PSERIES + struct pci_controller *phb; /* for pci devices */ + struct iommu_table *iommu_table; /* for phb's or bridges */ + struct pci_dev *pcidev; /* back-pointer to the pci device */ + struct device_node *node; /* back-pointer to the device_node */ + + int pci_ext_config_space; /* for pci devices */ + +#ifdef CONFIG_EEH int eeh_mode; /* See eeh.h for possible EEH_MODEs */ int eeh_config_addr; int eeh_pe_config_addr; /* new-style partition endpoint address */ int eeh_check_count; /* # times driver ignored error */ int eeh_freeze_count; /* # times this device froze up. */ -#endif - int pci_ext_config_space; /* for pci devices */ - struct pci_controller *phb; /* for pci devices */ - struct iommu_table *iommu_table; /* for phb's or bridges */ - struct pci_dev *pcidev; /* back-pointer to the pci device */ - struct device_node *node; /* back-pointer to the device_node */ + int eeh_false_positives; /* # times this device reported #ff's */ u32 config_space[16]; /* saved PCI config space */ +#endif }; /* Get the pointer to a device_node's pci_dn */ @@ -164,6 +168,11 @@ } #endif +extern void isa_bridge_find_early(struct pci_controller *hose); + +extern int pcibios_unmap_io_space(struct pci_bus *bus); +extern int pcibios_map_io_space(struct pci_bus *bus); + /* Return values for ppc_md.pci_probe_mode function */ #define PCI_PROBE_NONE -1 /* Don't look at this bus at all */ #define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci.h linux-2.6.22-591/include/asm-powerpc/pci.h --- linux-2.6.22-570/include/asm-powerpc/pci.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/pci.h 2007-12-21 15:36:12.000000000 -0500 @@ -220,10 +220,6 @@ return root; } -extern int unmap_bus_range(struct pci_bus *bus); - -extern int remap_bus_range(struct pci_bus *bus); - extern void pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus); diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h linux-2.6.22-591/include/asm-powerpc/pgtable-ppc32.h --- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/pgtable-ppc32.h 2007-12-21 15:36:12.000000000 -0500 @@ -6,11 +6,7 @@ #ifndef __ASSEMBLY__ #include #include -#include /* For TASK_SIZE */ -#include -#include #include /* For sub-arch specific PPC_PIN_SIZE */ -struct mm_struct; extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); @@ -488,14 +484,6 @@ #define pfn_pte(pfn, prot) __pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) |\ pgprot_val(prot)) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[1024]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) - #endif /* __ASSEMBLY__ */ #define pte_none(pte) ((pte_val(pte) & ~_PTE_NONE_MASK) == 0) @@ -734,10 +722,6 @@ #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; - -extern void paging_init(void); - /* * Encode and decode a swap entry. * Note that the bits we use in a PTE for representing a swap entry @@ -755,40 +739,6 @@ #define pte_to_pgoff(pte) (pte_val(pte) >> 3) #define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE }) -/* CONFIG_APUS */ -/* For virtual address to physical address conversion */ -extern void cache_clear(__u32 addr, int length); -extern void cache_push(__u32 addr, int length); -extern int mm_end_of_chunk (unsigned long addr, int len); -extern unsigned long iopa(unsigned long addr); -extern unsigned long mm_ptov(unsigned long addr) __attribute_const__; - -/* Values for nocacheflag and cmode */ -/* These are not used by the APUS kernel_map, but prevents - compilation errors. */ -#define KERNELMAP_FULL_CACHING 0 -#define KERNELMAP_NOCACHE_SER 1 -#define KERNELMAP_NOCACHE_NONSER 2 -#define KERNELMAP_NO_COPYBACK 3 - -/* - * Map some physical address range into the kernel address space. - */ -extern unsigned long kernel_map(unsigned long paddr, unsigned long size, - int nocacheflag, unsigned long *memavailp ); - -/* - * Set cache mode of (kernel space) address range. - */ -extern void kernel_set_cachemode (unsigned long address, unsigned long size, - unsigned int cmode); - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define kern_addr_valid(addr) (1) - -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - /* * No page table caches to initialise */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h linux-2.6.22-591/include/asm-powerpc/pgtable-ppc64.h --- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/pgtable-ppc64.h 2007-12-21 15:36:12.000000000 -0500 @@ -7,11 +7,7 @@ #ifndef __ASSEMBLY__ #include -#include /* For TASK_SIZE */ -#include -#include #include -struct mm_struct; #endif /* __ASSEMBLY__ */ #ifdef CONFIG_PPC_64K_PAGES @@ -27,7 +23,7 @@ */ #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) -#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) +#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) #if TASK_SIZE_USER64 > PGTABLE_RANGE #error TASK_SIZE_USER64 exceeds pagetable range @@ -37,19 +33,28 @@ #error TASK_SIZE_USER64 exceeds user VSID range #endif + /* * Define the address range of the vmalloc VM area. */ #define VMALLOC_START ASM_CONST(0xD000000000000000) -#define VMALLOC_SIZE ASM_CONST(0x80000000000) +#define VMALLOC_SIZE (PGTABLE_RANGE >> 1) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* - * Define the address range of the imalloc VM area. - */ -#define PHBS_IO_BASE VMALLOC_END -#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ -#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) + * Define the address ranges for MMIO and IO space : + * + * ISA_IO_BASE = VMALLOC_END, 64K reserved area + * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces + * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE + */ +#define FULL_IO_SIZE 0x80000000ul +#define ISA_IO_BASE (VMALLOC_END) +#define ISA_IO_END (VMALLOC_END + 0x10000ul) +#define PHB_IO_BASE (ISA_IO_END) +#define PHB_IO_END (VMALLOC_END + FULL_IO_SIZE) +#define IOREMAP_BASE (PHB_IO_END) +#define IOREMAP_END (VMALLOC_START + PGTABLE_RANGE) /* * Region IDs @@ -134,16 +139,6 @@ #define __S110 PAGE_SHARED_X #define __S111 PAGE_SHARED_X -#ifndef __ASSEMBLY__ - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -#endif /* __ASSEMBLY__ */ - #ifdef CONFIG_HUGETLB_PAGE #define HAVE_ARCH_UNMAPPED_AREA @@ -442,10 +437,6 @@ #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -extern pgd_t swapper_pg_dir[]; - -extern void paging_init(void); - /* Encode and de-code a swap entry */ #define __swp_type(entry) (((entry).val >> 1) & 0x3f) #define __swp_offset(entry) ((entry).val >> 8) @@ -456,17 +447,6 @@ #define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE}) #define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT) -/* - * kern_addr_valid is intended to indicate whether an address is a valid - * kernel address. Most 32-bit archs define it as always true (like this) - * but most 64-bit archs actually perform a test. What should we do here? - * The only use is in fs/ncpfs/dir.c - */ -#define kern_addr_valid(addr) (1) - -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - void pgtable_cache_init(void); /* diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable.h linux-2.6.22-591/include/asm-powerpc/pgtable.h --- linux-2.6.22-570/include/asm-powerpc/pgtable.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/pgtable.h 2007-12-21 15:36:12.000000000 -0500 @@ -2,6 +2,13 @@ #define _ASM_POWERPC_PGTABLE_H #ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#include /* For TASK_SIZE */ +#include +#include +struct mm_struct; +#endif /* !__ASSEMBLY__ */ + #if defined(CONFIG_PPC64) # include #else @@ -9,6 +16,27 @@ #endif #ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern pgd_t swapper_pg_dir[]; + +extern void paging_init(void); + +/* + * kern_addr_valid is intended to indicate whether an address is a valid + * kernel address. Most 32-bit archs define it as always true (like this) + * but most 64-bit archs actually perform a test. What should we do here? + */ +#define kern_addr_valid(addr) (1) + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + #include #endif /* __ASSEMBLY__ */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/ppc-pci.h linux-2.6.22-591/include/asm-powerpc/ppc-pci.h --- linux-2.6.22-570/include/asm-powerpc/ppc-pci.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/ppc-pci.h 2007-12-21 15:36:12.000000000 -0500 @@ -26,7 +26,7 @@ extern void find_and_init_phbs(void); -extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */ +extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ #define BUID_HI(buid) ((buid) >> 32) @@ -47,8 +47,8 @@ extern unsigned long get_phb_buid (struct device_node *); extern int rtas_setup_phb(struct pci_controller *phb); -/* From pSeries_pci.h */ -extern void pSeries_final_fixup(void); +/* From iSeries PCI */ +extern void iSeries_pcibios_init(void); extern unsigned long pci_probe_only; @@ -139,6 +139,9 @@ */ struct device_node * find_device_pe(struct device_node *dn); +void eeh_sysfs_add_device(struct pci_dev *pdev); +void eeh_sysfs_remove_device(struct pci_dev *pdev); + #endif /* CONFIG_EEH */ #else /* CONFIG_PCI */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/ptrace.h linux-2.6.22-591/include/asm-powerpc/ptrace.h --- linux-2.6.22-570/include/asm-powerpc/ptrace.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/ptrace.h 2007-12-21 15:36:12.000000000 -0500 @@ -92,6 +92,11 @@ set_thread_flag(TIF_NOERROR); \ } while(0) +struct task_struct; +extern unsigned long ptrace_get_reg(struct task_struct *task, int regno); +extern int ptrace_put_reg(struct task_struct *task, int regno, + unsigned long data); + /* * We use the least-significant bit of the trap field to indicate * whether we have saved the full set of registers, or only a @@ -158,9 +163,7 @@ #define PT_NIP 32 #define PT_MSR 33 -#ifdef __KERNEL__ #define PT_ORIG_R3 34 -#endif #define PT_CTR 35 #define PT_LNK 36 #define PT_XER 37 @@ -169,11 +172,12 @@ #define PT_MQ 39 #else #define PT_SOFTE 39 +#endif #define PT_TRAP 40 #define PT_DAR 41 #define PT_DSISR 42 #define PT_RESULT 43 -#endif +#define PT_REGS_COUNT 44 #define PT_FPR0 48 /* each FP reg occupies 2 slots in this space */ @@ -229,7 +233,17 @@ #define PTRACE_GET_DEBUGREG 25 #define PTRACE_SET_DEBUGREG 26 -/* Additional PTRACE requests implemented on PowerPC. */ +/* (new) PTRACE requests using the same numbers as x86 and the same + * argument ordering. Additionally, they support more registers too + */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETREGS64 22 +#define PTRACE_SETREGS64 23 + +/* (old) PTRACE requests with inverted arguments */ #define PPC_PTRACE_GETREGS 0x99 /* Get GPRs 0 - 31 */ #define PPC_PTRACE_SETREGS 0x98 /* Set GPRs 0 - 31 */ #define PPC_PTRACE_GETFPREGS 0x97 /* Get FPRs 0 - 31 */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/syscalls.h linux-2.6.22-591/include/asm-powerpc/syscalls.h --- linux-2.6.22-570/include/asm-powerpc/syscalls.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/syscalls.h 2007-12-21 15:36:12.000000000 -0500 @@ -43,16 +43,9 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); - -#ifndef __powerpc64__ -asmlinkage long sys_sigaltstack(const stack_t __user *uss, - stack_t __user *uoss, int r5, int r6, int r7, int r8, - struct pt_regs *regs); -#else /* __powerpc64__ */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, unsigned long r6, unsigned long r7, unsigned long r8, struct pt_regs *regs); -#endif /* __powerpc64__ */ #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_SYSCALLS_H */ diff -Nurb linux-2.6.22-570/include/asm-powerpc/systbl.h linux-2.6.22-591/include/asm-powerpc/systbl.h --- linux-2.6.22-570/include/asm-powerpc/systbl.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/asm-powerpc/systbl.h 2007-12-21 15:36:12.000000000 -0500 @@ -312,3 +312,4 @@ COMPAT_SYS_SPU(timerfd) SYSCALL_SPU(eventfd) COMPAT_SYS_SPU(sync_file_range2) +COMPAT_SYS(fallocate) diff -Nurb linux-2.6.22-570/include/asm-powerpc/thread_info.h linux-2.6.22-591/include/asm-powerpc/thread_info.h --- linux-2.6.22-570/include/asm-powerpc/thread_info.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-powerpc/thread_info.h 2007-12-21 15:36:12.000000000 -0500 @@ -113,8 +113,8 @@ #define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 5 /* 32 bit binary */ -#define TIF_RUNLATCH 6 /* Is the runlatch enabled? */ -#define TIF_ABI_PENDING 7 /* 32/64 bit switch needed */ +#define TIF_PERFMON_WORK 6 /* work for pfm_handle_work() */ +#define TIF_PERFMON_CTXSW 7 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 8 /* syscall auditing active */ #define TIF_SINGLESTEP 9 /* singlestepping active */ #define TIF_MEMDIE 10 @@ -123,6 +123,8 @@ #define TIF_NOERROR 14 /* Force successful syscall return */ #define TIF_RESTORE_SIGMASK 15 /* Restore signal mask in do_signal */ #define TIF_FREEZE 16 /* Freezing for suspend */ +#define TIF_RUNLATCH 17 /* Is the runlatch enabled? */ +#define TIF_ABI_PENDING 18 /* 32/64 bit switch needed */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< #ifndef __ASSEMBLY__ - -/* Things specific to the gen550 backend. */ -struct uart_port; - -extern void gen550_progress(char *, unsigned short); -extern void gen550_kgdb_map_scc(void); -extern void gen550_init(int, struct uart_port *); - -/* Things specific to the pmac backend. */ -extern void zs_kgdb_hook(int tty_num); - -/* To init the kgdb engine. (called by serial hook)*/ -extern void set_debug_traps(void); - -/* To enter the debugger explicitly. */ -extern void breakpoint(void); - -/* For taking exceptions + /* For taking exceptions * these are defined in traps.c */ -extern int (*debugger)(struct pt_regs *regs); +struct pt_regs; +extern void (*debugger)(struct pt_regs *regs); extern int (*debugger_bpt)(struct pt_regs *regs); extern int (*debugger_sstep)(struct pt_regs *regs); extern int (*debugger_iabr_match)(struct pt_regs *regs); extern int (*debugger_dabr_match)(struct pt_regs *regs); extern void (*debugger_fault_handler)(struct pt_regs *regs); - -/* What we bring to the party */ -int kgdb_bpt(struct pt_regs *regs); -int kgdb_sstep(struct pt_regs *regs); -void kgdb(struct pt_regs *regs); -int kgdb_iabr_match(struct pt_regs *regs); -int kgdb_dabr_match(struct pt_regs *regs); - -/* - * external low-level support routines (ie macserial.c) - */ -extern void kgdb_interruptible(int); /* control interrupts from serial */ -extern void putDebugChar(char); /* write a single character */ -extern char getDebugChar(void); /* read and return a single char */ - -#endif /* !(__ASSEMBLY__) */ -#endif /* !(_PPC_KGDB_H) */ +#endif /* !__ASSEMBLY__ */ +#endif /* __PPC_KGDB_H__ */ #endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/asm-ppc/machdep.h linux-2.6.22-591/include/asm-ppc/machdep.h --- linux-2.6.22-570/include/asm-ppc/machdep.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-ppc/machdep.h 2007-12-21 15:36:12.000000000 -0500 @@ -72,9 +72,7 @@ unsigned long (*find_end_of_memory)(void); void (*setup_io_mappings)(void); - void (*early_serial_map)(void); void (*progress)(char *, unsigned short); - void (*kgdb_map_scc)(void); unsigned char (*nvram_read_val)(int addr); void (*nvram_write_val)(int addr, unsigned char val); diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60.h linux-2.6.22-591/include/asm-ppc/mv64x60.h --- linux-2.6.22-570/include/asm-ppc/mv64x60.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-ppc/mv64x60.h 2007-12-21 15:36:12.000000000 -0500 @@ -348,6 +348,8 @@ void mv64x60_progress_init(u32 base); void mv64x60_mpsc_progress(char *s, unsigned short hex); +struct platform_device * mv64x60_early_get_pdev_data(const char *name, + int id, int remove); extern struct mv64x60_32bit_window gt64260_32bit_windows[MV64x60_32BIT_WIN_COUNT]; diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h linux-2.6.22-591/include/asm-ppc/mv64x60_defs.h --- linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-ppc/mv64x60_defs.h 2007-12-21 15:36:12.000000000 -0500 @@ -57,7 +57,8 @@ #define MV64x60_IRQ_I2C 37 #define MV64x60_IRQ_BRG 39 #define MV64x60_IRQ_MPSC_0 40 -#define MV64x60_IRQ_MPSC_1 42 +#define MV64360_IRQ_MPSC_1 41 +#define GT64260_IRQ_MPSC_1 42 #define MV64x60_IRQ_COMM 43 #define MV64x60_IRQ_P0_GPP_0_7 56 #define MV64x60_IRQ_P0_GPP_8_15 57 diff -Nurb linux-2.6.22-570/include/asm-s390/page.h linux-2.6.22-591/include/asm-s390/page.h --- linux-2.6.22-570/include/asm-s390/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-s390/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -64,7 +64,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -Nurb linux-2.6.22-570/include/asm-sh/kgdb.h linux-2.6.22-591/include/asm-sh/kgdb.h --- linux-2.6.22-570/include/asm-sh/kgdb.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-sh/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -2,78 +2,41 @@ * May be copied or modified under the terms of the GNU General Public * License. See linux/COPYING for more information. * - * Based on original code by Glenn Engel, Jim Kingdon, - * David Grothe , Tigran Aivazian, and - * Amit S. Kale + * Based on a file that was modified or based on files by: Glenn Engel, + * Jim Kingdon, David Grothe , Tigran Aivazian , + * Amit S. Kale , sh-stub.c from Ben Lee and + * Steve Chamberlain, Henry Bell * - * Super-H port based on sh-stub.c (Ben Lee and Steve Chamberlain) by - * Henry Bell - * - * Header file for low-level support for remote debug using GDB. + * Maintainer: Tom Rini * */ #ifndef __KGDB_H #define __KGDB_H -#include -#include +#include -struct console; +/* Based on sh-gdb.c from gdb-6.1, Glenn + Engel at HP Ben Lee and Steve Chamberlain */ +#define NUMREGBYTES 112 /* 92 */ +#define NUMCRITREGBYTES (9 << 2) +#define BUFMAX 400 -/* Same as pt_regs but has vbr in place of syscall_nr */ +#ifndef __ASSEMBLY__ struct kgdb_regs { unsigned long regs[16]; unsigned long pc; unsigned long pr; - unsigned long sr; unsigned long gbr; + unsigned long vbr; unsigned long mach; unsigned long macl; - unsigned long vbr; + unsigned long sr; }; -/* State info */ -extern char kgdb_in_gdb_mode; -extern int kgdb_done_init; -extern int kgdb_enabled; -extern int kgdb_nofault; /* Ignore bus errors (in gdb mem access) */ -extern int kgdb_halt; /* Execute initial breakpoint at startup */ -extern char in_nmi; /* Debounce flag to prevent NMI reentry*/ - -/* SCI */ -extern int kgdb_portnum; -extern int kgdb_baud; -extern char kgdb_parity; -extern char kgdb_bits; - -/* Init and interface stuff */ -extern int kgdb_init(void); -extern int (*kgdb_getchar)(void); -extern void (*kgdb_putchar)(int); - -/* Trap functions */ -typedef void (kgdb_debug_hook_t)(struct pt_regs *regs); -typedef void (kgdb_bus_error_hook_t)(void); -extern kgdb_debug_hook_t *kgdb_debug_hook; -extern kgdb_bus_error_hook_t *kgdb_bus_err_hook; - -/* Console */ -void kgdb_console_write(struct console *co, const char *s, unsigned count); -extern int kgdb_console_setup(struct console *, char *); - -/* Prototypes for jmp fns */ -#define _JBLEN 9 -typedef int jmp_buf[_JBLEN]; -extern void longjmp(jmp_buf __jmpb, int __retval); -extern int setjmp(jmp_buf __jmpb); - -/* Forced breakpoint */ -#define breakpoint() \ -do { \ - if (kgdb_enabled) \ - __asm__ __volatile__("trapa #0x3c"); \ -} while (0) +#define BREAKPOINT() asm("trapa #0xff"); +#define BREAK_INSTR_SIZE 2 +#define CACHE_FLUSH_IS_SAFE 1 /* KGDB should be able to flush all kernel text space */ #if defined(CONFIG_CPU_SH4) @@ -100,4 +63,5 @@ { return hexchars[x & 0xf]; } +#endif /* !__ASSEMBLY__ */ #endif diff -Nurb linux-2.6.22-570/include/asm-sh/system.h linux-2.6.22-591/include/asm-sh/system.h --- linux-2.6.22-570/include/asm-sh/system.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-sh/system.h 2007-12-21 15:36:12.000000000 -0500 @@ -264,6 +264,45 @@ #define instruction_size(insn) (2) #endif +static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, + unsigned long new) +{ + __u32 retval; + unsigned long flags; + + local_irq_save(flags); + retval = *m; + if (retval == old) + *m = new; + local_irq_restore(flags); /* implies memory barrier */ + return retval; +} + +/* This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + /* XXX * disable hlt during certain critical i/o operations */ diff -Nurb linux-2.6.22-570/include/asm-um/thread_info.h linux-2.6.22-591/include/asm-um/thread_info.h --- linux-2.6.22-570/include/asm-um/thread_info.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-um/thread_info.h 2007-12-21 15:36:12.000000000 -0500 @@ -52,10 +52,21 @@ return ti; } +#ifdef CONFIG_DEBUG_STACK_USAGE + +#define alloc_thread_info(tsk) \ + ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \ + CONFIG_KERNEL_STACK_ORDER)) +#else + /* thread information allocation */ #define alloc_thread_info(tsk) \ - ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL)) -#define free_thread_info(ti) kfree(ti) + ((struct thread_info *) __get_free_pages(GFP_KERNEL, \ + CONFIG_KERNEL_STACK_ORDER)) +#endif + +#define free_thread_info(ti) \ + free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER) #endif diff -Nurb linux-2.6.22-570/include/asm-x86_64/kdebug.h linux-2.6.22-591/include/asm-x86_64/kdebug.h --- linux-2.6.22-570/include/asm-x86_64/kdebug.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-x86_64/kdebug.h 2007-12-21 15:36:12.000000000 -0500 @@ -23,6 +23,7 @@ DIE_CALL, DIE_NMI_IPI, DIE_PAGE_FAULT, + DIE_PAGE_FAULT_NO_CONTEXT, }; extern void printk_address(unsigned long address); diff -Nurb linux-2.6.22-570/include/asm-x86_64/kgdb.h linux-2.6.22-591/include/asm-x86_64/kgdb.h --- linux-2.6.22-570/include/asm-x86_64/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/asm-x86_64/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,52 @@ +#ifdef __KERNEL__ +#ifndef _ASM_KGDB_H_ +#define _ASM_KGDB_H_ + +/* + * Copyright (C) 2001-2004 Amit S. Kale + */ + +#include + +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +#define _RAX 0 +#define _RDX 1 +#define _RCX 2 +#define _RBX 3 +#define _RSI 4 +#define _RDI 5 +#define _RBP 6 +#define _RSP 7 +#define _R8 8 +#define _R9 9 +#define _R10 10 +#define _R11 11 +#define _R12 12 +#define _R13 13 +#define _R14 14 +#define _R15 15 +#define _PC 16 +#define _PS 17 + +/* Number of bytes of registers. */ +#define NUMREGBYTES ((_PS+1)*8) +#define NUMCRITREGBYTES (8 * 8) /* 8 registers. */ + +#ifndef __ASSEMBLY__ +/* BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets, and + * a longer buffer is needed to list all threads. */ +#define BUFMAX 1024 +#define BREAKPOINT() asm(" int $3"); +#define CHECK_EXCEPTION_STACK() ((&__get_cpu_var(init_tss))[0].ist[0]) +#define BREAK_INSTR_SIZE 1 +#define CACHE_FLUSH_IS_SAFE 1 +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_KGDB_H_ */ +#endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/asm-x86_64/page.h linux-2.6.22-591/include/asm-x86_64/page.h --- linux-2.6.22-570/include/asm-x86_64/page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-x86_64/page.h 2007-12-21 15:36:12.000000000 -0500 @@ -48,7 +48,8 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* * These are used to make use of C type-checking.. diff -Nurb linux-2.6.22-570/include/asm-x86_64/proto.h linux-2.6.22-591/include/asm-x86_64/proto.h --- linux-2.6.22-570/include/asm-x86_64/proto.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-x86_64/proto.h 2007-12-21 15:36:12.000000000 -0500 @@ -75,8 +75,6 @@ extern void early_quirks(void); extern void check_efer(void); -extern int unhandled_signal(struct task_struct *tsk, int sig); - extern void select_idle_routine(const struct cpuinfo_x86 *c); extern unsigned long table_start, table_end; diff -Nurb linux-2.6.22-570/include/asm-x86_64/system.h linux-2.6.22-591/include/asm-x86_64/system.h --- linux-2.6.22-570/include/asm-x86_64/system.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-x86_64/system.h 2007-12-21 15:36:12.000000000 -0500 @@ -22,7 +22,9 @@ /* Save restore flags to clear handle leaking NT */ #define switch_to(prev,next,last) \ - asm volatile(SAVE_CONTEXT \ + asm volatile(".globl __switch_to_begin\n\t" \ + "__switch_to_begin:\n\t" \ + SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ @@ -34,6 +36,8 @@ "movq %%rax,%%rdi\n\t" \ "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ + "\n.globl __switch_to_end\n\t" \ + "__switch_to_end:\n\t" \ : "=a" (last) \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ diff -Nurb linux-2.6.22-570/include/asm-x86_64/unistd.h linux-2.6.22-591/include/asm-x86_64/unistd.h --- linux-2.6.22-570/include/asm-x86_64/unistd.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/asm-x86_64/unistd.h 2007-12-21 15:36:12.000000000 -0500 @@ -630,6 +630,8 @@ __SYSCALL(__NR_timerfd, sys_timerfd) #define __NR_eventfd 284 __SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 284 +__SYSCALL(__NR_fallocate, sys_fallocate) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff -Nurb linux-2.6.22-570/include/asm-x86_64/unwind.h linux-2.6.22-591/include/asm-x86_64/unwind.h --- linux-2.6.22-570/include/asm-x86_64/unwind.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/asm-x86_64/unwind.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,6 +1,100 @@ #ifndef _ASM_X86_64_UNWIND_H #define _ASM_X86_64_UNWIND_H +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + */ + +#ifdef CONFIG_STACK_UNWIND + +#include +#include +#include +#include + +struct unwind_frame_info +{ + struct pt_regs regs; + struct task_struct *task; + unsigned call_frame:1; +}; + +#define UNW_PC(frame) (frame)->regs.rip +#define UNW_SP(frame) (frame)->regs.rsp +#ifdef CONFIG_FRAME_POINTER +#define UNW_FP(frame) (frame)->regs.rbp +#define FRAME_RETADDR_OFFSET 8 +#define FRAME_LINK_OFFSET 0 +#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1)) +#define STACK_TOP(tsk) ((tsk)->thread.rsp0) +#endif +/* Might need to account for the special exception and interrupt handling + stacks here, since normally + EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER, + but the construct is needed only for getting across the stack switch to + the interrupt stack - thus considering the IRQ stack itself is unnecessary, + and the overhead of comparing against all exception handling stacks seems + not desirable. */ +#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) + +#define UNW_REGISTER_INFO \ + PTREGS_INFO(rax), \ + PTREGS_INFO(rdx), \ + PTREGS_INFO(rcx), \ + PTREGS_INFO(rbx), \ + PTREGS_INFO(rsi), \ + PTREGS_INFO(rdi), \ + PTREGS_INFO(rbp), \ + PTREGS_INFO(rsp), \ + PTREGS_INFO(r8), \ + PTREGS_INFO(r9), \ + PTREGS_INFO(r10), \ + PTREGS_INFO(r11), \ + PTREGS_INFO(r12), \ + PTREGS_INFO(r13), \ + PTREGS_INFO(r14), \ + PTREGS_INFO(r15), \ + PTREGS_INFO(rip) + +#define UNW_DEFAULT_RA(raItem, dataAlign) \ + ((raItem).where == Memory && \ + !((raItem).value * (dataAlign) + 8)) + +static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, + /*const*/ struct pt_regs *regs) +{ + info->regs = *regs; +} + +static inline void arch_unw_init_blocked(struct unwind_frame_info *info) +{ + extern const char thread_return[]; + + memset(&info->regs, 0, sizeof(info->regs)); + info->regs.rip = (unsigned long)thread_return; + info->regs.cs = __KERNEL_CS; + __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp); + info->regs.rsp = info->task->thread.rsp; + info->regs.ss = __KERNEL_DS; +} + +extern int arch_unwind_init_running(struct unwind_frame_info *, + int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +{ + return user_mode(&info->regs) + || (long)info->regs.rip >= 0 + || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) + || (long)info->regs.rsp >= 0; +} + +#else + #define UNW_PC(frame) ((void)(frame), 0UL) #define UNW_SP(frame) ((void)(frame), 0UL) @@ -9,4 +103,6 @@ return 0; } +#endif + #endif /* _ASM_X86_64_UNWIND_H */ diff -Nurb linux-2.6.22-570/include/linux/Kbuild linux-2.6.22-591/include/linux/Kbuild --- linux-2.6.22-570/include/linux/Kbuild 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/Kbuild 2007-12-21 15:36:12.000000000 -0500 @@ -91,7 +91,6 @@ header-y += in_route.h header-y += ioctl.h header-y += ipmi_msgdefs.h -header-y += ip_mp_alg.h header-y += ipsec.h header-y += ipx.h header-y += irda.h diff -Nurb linux-2.6.22-570/include/linux/acpi.h linux-2.6.22-591/include/linux/acpi.h --- linux-2.6.22-570/include/linux/acpi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/acpi.h 2007-12-21 15:36:12.000000000 -0500 @@ -206,11 +206,8 @@ { return max_cstate; } -static inline void acpi_set_cstate_limit(unsigned int new_limit) -{ - max_cstate = new_limit; - return; -} +extern void (*acpi_do_set_cstate_limit)(void); +extern void acpi_set_cstate_limit(unsigned int new_limit); #else static inline unsigned int acpi_get_cstate_limit(void) { return 0; } static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; } diff -Nurb linux-2.6.22-570/include/linux/async_tx.h linux-2.6.22-591/include/linux/async_tx.h --- linux-2.6.22-570/include/linux/async_tx.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/async_tx.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,156 @@ +/* + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _ASYNC_TX_H_ +#define _ASYNC_TX_H_ +#include +#include +#include + +/** + * dma_chan_ref - object used to manage dma channels received from the + * dmaengine core. + * @chan - the channel being tracked + * @node - node for the channel to be placed on async_tx_master_list + * @rcu - for list_del_rcu + * @count - number of times this channel is listed in the pool + * (for channels with multiple capabiities) + */ +struct dma_chan_ref { + struct dma_chan *chan; + struct list_head node; + struct rcu_head rcu; + atomic_t count; +}; + +/** + * async_tx_flags - modifiers for the async_* calls + * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the + * the destination address is not a source. The asynchronous case handles this + * implicitly, the synchronous case needs to zero the destination block. + * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is + * also one of the source addresses. In the synchronous case the destination + * address is an implied source, whereas the asynchronous case it must be listed + * as a source. The destination address must be the first address in the source + * array. + * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations + * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a + * dependency chain + * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. + * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously + * take an atomic mapping (KM_USER0) on the source page(s) + * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously + * take an atomic mapping (KM_USER0) on the dest page(s) + */ +enum async_tx_flags { + ASYNC_TX_XOR_ZERO_DST = (1 << 0), + ASYNC_TX_XOR_DROP_DST = (1 << 1), + ASYNC_TX_ASSUME_COHERENT = (1 << 2), + ASYNC_TX_ACK = (1 << 3), + ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_KMAP_SRC = (1 << 5), + ASYNC_TX_KMAP_DST = (1 << 6), +}; + +#ifdef CONFIG_DMA_ENGINE +void async_tx_issue_pending_all(void); +enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); +void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); +struct dma_chan * +async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, + enum dma_transaction_type tx_type); +#else +static inline void async_tx_issue_pending_all(void) +{ + do { } while (0); +} + +static inline enum dma_status +dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + return DMA_SUCCESS; +} + +static inline void +async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, + struct dma_chan *host_chan) +{ + do { } while (0); +} + +static inline struct dma_chan * +async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, + enum dma_transaction_type tx_type) +{ + return NULL; +} +#endif + +/** + * async_tx_sync_epilog - actions to take if an operation is run synchronously + * @flags: async_tx flags + * @depend_tx: transaction depends on depend_tx + * @cb_fn: function to call when the transaction completes + * @cb_fn_param: parameter to pass to the callback routine + */ +static inline void +async_tx_sync_epilog(unsigned long flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param) +{ + if (cb_fn) + cb_fn(cb_fn_param); + + if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + async_tx_ack(depend_tx); +} + +void +async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_xor(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_xor_zero_sum(struct page *dest, struct page **src_list, + unsigned int offset, int src_cnt, size_t len, + u32 *result, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, + unsigned int src_offset, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_memset(struct page *dest, int val, unsigned int offset, + size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_trigger_callback(enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); +#endif /* _ASYNC_TX_H_ */ diff -Nurb linux-2.6.22-570/include/linux/configfs.h linux-2.6.22-591/include/linux/configfs.h --- linux-2.6.22-570/include/linux/configfs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/configfs.h 2007-12-21 15:36:12.000000000 -0500 @@ -75,7 +75,6 @@ extern void config_item_init_type_name(struct config_item *item, const char *name, struct config_item_type *type); -extern void config_item_cleanup(struct config_item *); extern struct config_item * config_item_get(struct config_item *); extern void config_item_put(struct config_item *); @@ -157,6 +156,7 @@ struct config_item *(*make_item)(struct config_group *group, const char *name); struct config_group *(*make_group)(struct config_group *group, const char *name); int (*commit_item)(struct config_item *item); + void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); }; @@ -175,6 +175,11 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys); void configfs_unregister_subsystem(struct configfs_subsystem *subsys); +/* These functions can sleep and can alloc with GFP_KERNEL */ +/* WARNING: These cannot be called underneath configfs callbacks!! */ +int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); +void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target); + #endif /* __KERNEL__ */ #endif /* _CONFIGFS_H_ */ diff -Nurb linux-2.6.22-570/include/linux/container.h linux-2.6.22-591/include/linux/container.h --- linux-2.6.22-570/include/linux/container.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/container.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,295 @@ +#ifndef _LINUX_CONTAINER_H +#define _LINUX_CONTAINER_H +/* + * container interface + * + * Copyright (C) 2003 BULL SA + * Copyright (C) 2004-2006 Silicon Graphics, Inc. + * + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_CONTAINERS + +struct containerfs_root; +struct inode; + +extern int container_init_early(void); +extern int container_init(void); +extern void container_init_smp(void); +extern void container_lock(void); +extern void container_unlock(void); +extern void container_fork(struct task_struct *p); +extern void container_fork_callbacks(struct task_struct *p); +extern void container_exit(struct task_struct *p, int run_callbacks); + +extern struct file_operations proc_container_operations; + +/* Define the enumeration of all container subsystems */ +#define SUBSYS(_x) _x ## _subsys_id, +enum container_subsys_id { +#include + CONTAINER_SUBSYS_COUNT +}; +#undef SUBSYS + +/* Per-subsystem/per-container state maintained by the system. */ +struct container_subsys_state { + /* The container that this subsystem is attached to. Useful + * for subsystems that want to know about the container + * hierarchy structure */ + struct container *container; + + /* State maintained by the container system to allow + * subsystems to be "busy". Should be accessed via css_get() + * and css_put() */ + + atomic_t refcnt; +}; + +/* + * Call css_get() to hold a reference on the container; + * + */ + +static inline void css_get(struct container_subsys_state *css) +{ + atomic_inc(&css->refcnt); +} +/* + * css_put() should be called to release a reference taken by + * css_get() + */ +void css_put(struct container_subsys_state *css); + +struct container { + unsigned long flags; /* "unsigned long" so bitops work */ + + /* count users of this container. >0 means busy, but doesn't + * necessarily indicate the number of tasks in the + * container */ + atomic_t count; + + /* + * We link our 'sibling' struct into our parent's 'children'. + * Our children link their 'sibling' into our 'children'. + */ + struct list_head sibling; /* my parent's children */ + struct list_head children; /* my children */ + + struct container *parent; /* my parent */ + struct dentry *dentry; /* container fs entry */ + + /* Private pointers for each registered subsystem */ + struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT]; + + struct containerfs_root *root; + struct container *top_container; + + /* + * List of cg_container_links pointing at css_groups with + * tasks in this container. Protected by css_group_lock + */ + struct list_head css_groups; + + /* + * Linked list running through all containers that can + * potentially be reaped by the release agent. Protected by + * container_mutex + */ + struct list_head release_list; +}; + +/* A css_group is a structure holding pointers to a set of + * container_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire + * container set for a task. + */ + +struct css_group { + + /* Reference count */ + struct kref ref; + + /* + * List running through all container groups. Protected by + * css_group_lock + */ + struct list_head list; + + /* + * List running through all tasks using this container + * group. Protected by css_group_lock + */ + struct list_head tasks; + + /* + * List of cg_container_link objects on link chains from + * containers referenced from this css_group. Protected by + * css_group_lock + */ + struct list_head cg_links; + + /* Set of subsystem states, one for each subsystem. NULL for + * subsystems that aren't part of this hierarchy. These + * pointers reduce the number of dereferences required to get + * from a task to its state for a given container, but result + * in increased space usage if tasks are in wildly different + * groupings across different hierarchies. This array is + * immutable after creation */ + struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT]; + +}; + +/* struct cftype: + * + * The files in the container filesystem mostly have a very simple read/write + * handling, some common function will take care of it. Nevertheless some cases + * (read tasks) are special and therefore I define this structure for every + * kind of file. + * + * + * When reading/writing to a file: + * - the container to use in file->f_dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_dentry->d_fsdata + */ + +#define MAX_CFTYPE_NAME 64 +struct cftype { + /* By convention, the name should begin with the name of the + * subsystem, followed by a period */ + char name[MAX_CFTYPE_NAME]; + int private; + int (*open) (struct inode *inode, struct file *file); + ssize_t (*read) (struct container *cont, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos); + /* + * read_uint() is a shortcut for the common case of returning a + * single integer. Use it in place of read() + */ + u64 (*read_uint) (struct container *cont, struct cftype *cft); + ssize_t (*write) (struct container *cont, struct cftype *cft, + struct file *file, + const char __user *buf, size_t nbytes, loff_t *ppos); + int (*release) (struct inode *inode, struct file *file); +}; + +/* Add a new file to the given container directory. Should only be + * called by subsystems from within a populate() method */ +int container_add_file(struct container *cont, const struct cftype *cft); + +/* Add a set of new files to the given container directory. Should + * only be called by subsystems from within a populate() method */ +int container_add_files(struct container *cont, const struct cftype cft[], + int count); + +int container_is_removed(const struct container *cont); + +int container_path(const struct container *cont, char *buf, int buflen); + +int container_task_count(const struct container *cont); + +/* Return true if the container is a descendant of the current container */ +int container_is_descendant(const struct container *cont); + +/* Container subsystem type. See Documentation/containers.txt for details */ + +struct container_subsys { + int (*create)(struct container_subsys *ss, + struct container *cont); + void (*destroy)(struct container_subsys *ss, struct container *cont); + int (*can_attach)(struct container_subsys *ss, + struct container *cont, struct task_struct *tsk); + void (*attach)(struct container_subsys *ss, struct container *cont, + struct container *old_cont, struct task_struct *tsk); + void (*fork)(struct container_subsys *ss, struct task_struct *task); + void (*exit)(struct container_subsys *ss, struct task_struct *task); + int (*populate)(struct container_subsys *ss, + struct container *cont); + void (*post_clone)(struct container_subsys *ss, struct container *cont); + void (*bind)(struct container_subsys *ss, struct container *root); + int subsys_id; + int active; + int early_init; +#define MAX_CONTAINER_TYPE_NAMELEN 32 + const char *name; + + /* Protected by RCU */ + struct containerfs_root *root; + + struct list_head sibling; + + void *private; +}; + +#define SUBSYS(_x) extern struct container_subsys _x ## _subsys; +#include +#undef SUBSYS + +static inline struct container_subsys_state *container_subsys_state( + struct container *cont, int subsys_id) +{ + return cont->subsys[subsys_id]; +} + +static inline struct container_subsys_state *task_subsys_state( + struct task_struct *task, int subsys_id) +{ + return rcu_dereference(task->containers->subsys[subsys_id]); +} + +static inline struct container* task_container(struct task_struct *task, + int subsys_id) +{ + return task_subsys_state(task, subsys_id)->container; +} + +int container_path(const struct container *cont, char *buf, int buflen); + +int container_clone(struct task_struct *tsk, struct container_subsys *ss); + +/* A container_iter should be treated as an opaque object */ +struct container_iter { + struct list_head *cg_link; + struct list_head *task; +}; + +/* To iterate across the tasks in a container: + * + * 1) call container_iter_start to intialize an iterator + * + * 2) call container_iter_next() to retrieve member tasks until it + * returns NULL or until you want to end the iteration + * + * 3) call container_iter_end() to destroy the iterator. + */ +void container_iter_start(struct container *cont, struct container_iter *it); +struct task_struct *container_iter_next(struct container *cont, + struct container_iter *it); +void container_iter_end(struct container *cont, struct container_iter *it); + +void container_set_release_agent_path(struct container_subsys *ss, + const char *path); + +#else /* !CONFIG_CONTAINERS */ + +static inline int container_init_early(void) { return 0; } +static inline int container_init(void) { return 0; } +static inline void container_init_smp(void) {} +static inline void container_fork(struct task_struct *p) {} +static inline void container_fork_callbacks(struct task_struct *p) {} +static inline void container_exit(struct task_struct *p, int callbacks) {} + +static inline void container_lock(void) {} +static inline void container_unlock(void) {} + +#endif /* !CONFIG_CONTAINERS */ + +#endif /* _LINUX_CONTAINER_H */ diff -Nurb linux-2.6.22-570/include/linux/container_subsys.h linux-2.6.22-591/include/linux/container_subsys.h --- linux-2.6.22-570/include/linux/container_subsys.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/container_subsys.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,32 @@ +/* Add subsystem definitions of the form SUBSYS() in this + * file. Surround each one by a line of comment markers so that + * patches don't collide + */ + +/* */ + +/* */ + +#ifdef CONFIG_CONTAINER_CPUACCT +SUBSYS(cpuacct) +#endif + +/* */ + +#ifdef CONFIG_CPUSETS +SUBSYS(cpuset) +#endif + +/* */ + +#ifdef CONFIG_CONTAINER_DEBUG +SUBSYS(debug) +#endif + +/* */ + +#ifdef CONFIG_CONTAINER_NS +SUBSYS(ns) +#endif + +/* */ diff -Nurb linux-2.6.22-570/include/linux/cpu_acct.h linux-2.6.22-591/include/linux/cpu_acct.h --- linux-2.6.22-570/include/linux/cpu_acct.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/cpu_acct.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,14 @@ + +#ifndef _LINUX_CPU_ACCT_H +#define _LINUX_CPU_ACCT_H + +#include +#include + +#ifdef CONFIG_CONTAINER_CPUACCT +extern void cpuacct_charge(struct task_struct *, cputime_t cputime); +#else +static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {} +#endif + +#endif diff -Nurb linux-2.6.22-570/include/linux/cpuidle.h linux-2.6.22-591/include/linux/cpuidle.h --- linux-2.6.22-570/include/linux/cpuidle.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/cpuidle.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,189 @@ +/* + * cpuidle.h - a generic framework for CPU idle power management + * + * (C) 2007 Venkatesh Pallipadi + * Shaohua Li + * Adam Belay + * + * This code is licenced under the GPL. + */ + +#ifndef _LINUX_CPUIDLE_H +#define _LINUX_CPUIDLE_H + +#include +#include +#include +#include +#include + +#define CPUIDLE_STATE_MAX 8 +#define CPUIDLE_NAME_LEN 16 + +struct cpuidle_device; + + +/**************************** + * CPUIDLE DEVICE INTERFACE * + ****************************/ + +struct cpuidle_state { + char name[CPUIDLE_NAME_LEN]; + void *driver_data; + + unsigned int flags; + unsigned int exit_latency; /* in US */ + unsigned int power_usage; /* in mW */ + unsigned int target_residency; /* in US */ + + unsigned int usage; + unsigned int time; /* in US */ + + int (*enter) (struct cpuidle_device *dev, + struct cpuidle_state *state); +}; + +/* Idle State Flags */ +#define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ +#define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ +#define CPUIDLE_FLAG_SHALLOW (0x10) /* low latency, minimal savings */ +#define CPUIDLE_FLAG_BALANCED (0x20) /* medium latency, moderate savings */ +#define CPUIDLE_FLAG_DEEP (0x40) /* high latency, large savings */ + +#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) + +/** + * cpuidle_get_statedata - retrieves private driver state data + * @state: the state + */ +static inline void * cpuidle_get_statedata(struct cpuidle_state *state) +{ + return state->driver_data; +} + +/** + * cpuidle_set_statedata - stores private driver state data + * @state: the state + * @data: the private data + */ +static inline void +cpuidle_set_statedata(struct cpuidle_state *state, void *data) +{ + state->driver_data = data; +} + +struct cpuidle_state_kobj { + struct cpuidle_state *state; + struct completion kobj_unregister; + struct kobject kobj; +}; + +struct cpuidle_device { + unsigned int status; + int cpu; + + int last_residency; + int state_count; + struct cpuidle_state states[CPUIDLE_STATE_MAX]; + struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; + struct cpuidle_state *last_state; + + struct list_head device_list; + struct kobject kobj; + struct completion kobj_unregister; + void *governor_data; +}; + +DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); + +/* Device Status Flags */ +#define CPUIDLE_STATUS_DETECTED (0x1) +#define CPUIDLE_STATUS_DRIVER_ATTACHED (0x2) +#define CPUIDLE_STATUS_GOVERNOR_ATTACHED (0x4) +#define CPUIDLE_STATUS_DOIDLE (CPUIDLE_STATUS_DETECTED | \ + CPUIDLE_STATUS_DRIVER_ATTACHED | \ + CPUIDLE_STATUS_GOVERNOR_ATTACHED) + +/** + * cpuidle_get_last_residency - retrieves the last state's residency time + * @dev: the target CPU + * + * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set + */ +static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) +{ + return dev->last_residency; +} + + +/**************************** + * CPUIDLE DRIVER INTERFACE * + ****************************/ + +struct cpuidle_driver { + char name[CPUIDLE_NAME_LEN]; + struct list_head driver_list; + + int (*init) (struct cpuidle_device *dev); + void (*exit) (struct cpuidle_device *dev); + int (*redetect) (struct cpuidle_device *dev); + + int (*bm_check) (void); + + struct module *owner; +}; + +#ifdef CONFIG_CPU_IDLE + +extern int cpuidle_register_driver(struct cpuidle_driver *drv); +extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); +extern int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv); +extern int cpuidle_force_redetect_devices(struct cpuidle_driver *drv); + +#else + +static inline int cpuidle_register_driver(struct cpuidle_driver *drv) +{return 0;} +static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } +static inline int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv) +{return 0;} +static inline int cpuidle_force_redetect_devices(struct cpuidle_driver *drv) +{return 0;} + +#endif + +/****************************** + * CPUIDLE GOVERNOR INTERFACE * + ******************************/ + +struct cpuidle_governor { + char name[CPUIDLE_NAME_LEN]; + struct list_head governor_list; + + int (*init) (struct cpuidle_device *dev); + void (*exit) (struct cpuidle_device *dev); + void (*scan) (struct cpuidle_device *dev); + + int (*select) (struct cpuidle_device *dev); + void (*reflect) (struct cpuidle_device *dev); + + struct module *owner; +}; + +#ifdef CONFIG_CPU_IDLE + +extern int cpuidle_register_governor(struct cpuidle_governor *gov); +extern void cpuidle_unregister_governor(struct cpuidle_governor *gov); +extern int cpuidle_get_bm_activity(void); + +#else + +static inline int cpuidle_register_governor(struct cpuidle_governor *gov) +{return 0;} +static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { } +static inline int cpuidle_get_bm_activity(void) +{return 0;} + +#endif + +#endif /* _LINUX_CPUIDLE_H */ diff -Nurb linux-2.6.22-570/include/linux/cpuset.h linux-2.6.22-591/include/linux/cpuset.h --- linux-2.6.22-570/include/linux/cpuset.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/cpuset.h 2007-12-21 15:36:12.000000000 -0500 @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_CPUSETS @@ -19,8 +20,6 @@ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_fork(struct task_struct *p); -extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) @@ -75,13 +74,13 @@ extern void cpuset_track_online_nodes(void); +extern int current_cpuset_is_being_rebound(void); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_fork(struct task_struct *p) {} -static inline void cpuset_exit(struct task_struct *p) {} static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) { @@ -146,6 +145,11 @@ static inline void cpuset_track_online_nodes(void) {} +static inline int current_cpuset_is_being_rebound(void) +{ + return 0; +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff -Nurb linux-2.6.22-570/include/linux/device.h linux-2.6.22-591/include/linux/device.h --- linux-2.6.22-570/include/linux/device.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/device.h 2007-12-21 15:36:14.000000000 -0500 @@ -200,6 +200,8 @@ int (*suspend)(struct device *, pm_message_t state); int (*resume)(struct device *); + + const struct shadow_dir_operations *shadow_ops; }; extern int __must_check class_register(struct class *); @@ -238,7 +240,6 @@ * @devt: for internal use by the driver core only. * @node: for internal use by the driver core only. * @kobj: for internal use by the driver core only. - * @devt_attr: for internal use by the driver core only. * @groups: optional additional groups to be created * @dev: if set, a symlink to the struct device is created in the sysfs * directory for this struct class device. @@ -263,8 +264,6 @@ struct kobject kobj; struct class * class; /* required */ dev_t devt; /* dev_t, creates the sysfs "dev" */ - struct class_device_attribute *devt_attr; - struct class_device_attribute uevent_attr; struct device * dev; /* not necessary, but nice to have */ void * class_data; /* class-specific data */ struct class_device *parent; /* parent of this child device, if there is one */ @@ -419,8 +418,6 @@ struct device_type *type; unsigned is_registered:1; unsigned uevent_suppress:1; - struct device_attribute uevent_attr; - struct device_attribute *devt_attr; struct semaphore sem; /* semaphore to synchronize calls to * its driver. diff -Nurb linux-2.6.22-570/include/linux/dmaengine.h linux-2.6.22-591/include/linux/dmaengine.h --- linux-2.6.22-570/include/linux/dmaengine.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/dmaengine.h 2007-12-21 15:36:12.000000000 -0500 @@ -21,29 +21,40 @@ #ifndef DMAENGINE_H #define DMAENGINE_H -#ifdef CONFIG_DMA_ENGINE - #include #include #include #include #include +#include /** - * enum dma_event - resource PNP/power managment events + * enum dma_state - resource PNP/power managment state * @DMA_RESOURCE_SUSPEND: DMA device going into low power state * @DMA_RESOURCE_RESUME: DMA device returning to full power - * @DMA_RESOURCE_ADDED: DMA device added to the system + * @DMA_RESOURCE_AVAILABLE: DMA device available to the system * @DMA_RESOURCE_REMOVED: DMA device removed from the system */ -enum dma_event { +enum dma_state { DMA_RESOURCE_SUSPEND, DMA_RESOURCE_RESUME, - DMA_RESOURCE_ADDED, + DMA_RESOURCE_AVAILABLE, DMA_RESOURCE_REMOVED, }; /** + * enum dma_state_client - state of the channel in the client + * @DMA_ACK: client would like to use, or was using this channel + * @DMA_DUP: client has already seen this channel, or is not using this channel + * @DMA_NAK: client does not want to see any more channels + */ +enum dma_state_client { + DMA_ACK, + DMA_DUP, + DMA_NAK, +}; + +/** * typedef dma_cookie_t - an opaque DMA cookie * * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code @@ -65,6 +76,31 @@ }; /** + * enum dma_transaction_type - DMA transaction types/indexes + */ +enum dma_transaction_type { + DMA_MEMCPY, + DMA_XOR, + DMA_PQ_XOR, + DMA_DUAL_XOR, + DMA_PQ_UPDATE, + DMA_ZERO_SUM, + DMA_PQ_ZERO_SUM, + DMA_MEMSET, + DMA_MEMCPY_CRC32C, + DMA_INTERRUPT, +}; + +/* last transaction type for creation of the capabilities mask */ +#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1) + +/** + * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. + * See linux/cpumask.h + */ +typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; + +/** * struct dma_chan_percpu - the per-CPU part of struct dma_chan * @refcount: local_t used for open-coded "bigref" counting * @memcpy_count: transaction counter @@ -80,7 +116,6 @@ /** * struct dma_chan - devices supply DMA channels, clients use them - * @client: ptr to the client user of this chan, will be %NULL when unused * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs @@ -88,12 +123,10 @@ * @refcount: kref, used in "bigref" slow-mode * @slow_ref: indicates that the DMA channel is free * @rcu: the DMA channel's RCU head - * @client_node: used to add this to the client chan list * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu */ struct dma_chan { - struct dma_client *client; struct dma_device *device; dma_cookie_t cookie; @@ -105,11 +138,11 @@ int slow_ref; struct rcu_head rcu; - struct list_head client_node; struct list_head device_node; struct dma_chan_percpu *local; }; + void dma_chan_cleanup(struct kref *kref); static inline void dma_chan_get(struct dma_chan *chan) @@ -134,27 +167,68 @@ /* * typedef dma_event_callback - function pointer to a DMA event callback - */ -typedef void (*dma_event_callback) (struct dma_client *client, - struct dma_chan *chan, enum dma_event event); + * For each channel added to the system this routine is called for each client. + * If the client would like to use the channel it returns '1' to signal (ack) + * the dmaengine core to take out a reference on the channel and its + * corresponding device. A client must not 'ack' an available channel more + * than once. When a channel is removed all clients are notified. If a client + * is using the channel it must 'ack' the removal. A client must not 'ack' a + * removed channel more than once. + * @client - 'this' pointer for the client context + * @chan - channel to be acted upon + * @state - available or removed + */ +struct dma_client; +typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, + struct dma_chan *chan, enum dma_state state); /** * struct dma_client - info on the entity making use of DMA services * @event_callback: func ptr to call when something happens - * @chan_count: number of chans allocated - * @chans_desired: number of chans requested. Can be +/- chan_count - * @lock: protects access to the channels list - * @channels: the list of DMA channels allocated + * @cap_mask: only return channels that satisfy the requested capabilities + * a value of zero corresponds to any capability * @global_node: list_head for global dma_client_list */ struct dma_client { dma_event_callback event_callback; - unsigned int chan_count; - unsigned int chans_desired; + dma_cap_mask_t cap_mask; + struct list_head global_node; +}; +typedef void (*dma_async_tx_callback)(void *dma_async_param); +/** + * struct dma_async_tx_descriptor - async transaction descriptor + * @cookie: tracking cookie for this transaction, set to -EBUSY if + * this tx is sitting on a dependency list + * @ack: the descriptor can not be reused until the client acknowledges + * receipt, i.e. has has a chance to establish any dependency chains + * @callback: routine to call after this operation is complete + * @callback_param: general parameter to pass to the callback routine + * @chan: target channel for this operation + * @tx_submit: execute an operation + * @tx_set_dest: set a destination address in a hardware descriptor + * @tx_set_src: set a source address in a hardware descriptor + * @depend_list: at completion this list of transactions are submitted + * @depend_node: allow this transaction to be executed after another + * transaction has completed + * @parent: pointer to the next level up in the dependency chain + * @lock: protect the dependency list + */ +struct dma_async_tx_descriptor { + dma_cookie_t cookie; + int ack; + dma_async_tx_callback callback; + void *callback_param; + struct dma_chan *chan; + dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); + void (*tx_set_dest)(dma_addr_t addr, + struct dma_async_tx_descriptor *tx, int index); + void (*tx_set_src)(dma_addr_t addr, + struct dma_async_tx_descriptor *tx, int index); + struct list_head depend_list; + struct list_head depend_node; + struct dma_async_tx_descriptor *parent; spinlock_t lock; - struct list_head channels; - struct list_head global_node; }; /** @@ -162,141 +236,130 @@ * @chancnt: how many DMA channels are supported * @channels: the list of struct dma_chan * @global_node: list_head for global dma_device_list + * @cap_mask: one or more dma_capability flags + * @max_xor: maximum number of xor sources, 0 if no capability * @refcount: reference count * @done: IO completion struct * @dev_id: unique device ID + * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the * number of allocated descriptors * @device_free_chan_resources: release DMA channel's resources - * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer - * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page - * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset - * @device_memcpy_complete: poll the status of an IOAT DMA transaction - * @device_memcpy_issue_pending: push appended descriptors to hardware + * @device_prep_dma_memcpy: prepares a memcpy operation + * @device_prep_dma_xor: prepares a xor operation + * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_memset: prepares a memset operation + * @device_prep_dma_interrupt: prepares an end of chain interrupt operation + * @device_dependency_added: async_tx notifies the channel about new deps + * @device_issue_pending: push pending transactions to hardware */ struct dma_device { unsigned int chancnt; struct list_head channels; struct list_head global_node; + dma_cap_mask_t cap_mask; + int max_xor; struct kref refcount; struct completion done; int dev_id; + struct device *dev; int (*device_alloc_chan_resources)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); - dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, - void *dest, void *src, size_t len); - dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, - struct page *page, unsigned int offset, void *kdata, - size_t len); - dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, - struct page *dest_pg, unsigned int dest_off, - struct page *src_pg, unsigned int src_off, size_t len); - enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, + + struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( + struct dma_chan *chan, size_t len, int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_xor)( + struct dma_chan *chan, unsigned int src_cnt, size_t len, + int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_chan *chan, unsigned int src_cnt, size_t len, + u32 *result, int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_memset)( + struct dma_chan *chan, int value, size_t len, int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( + struct dma_chan *chan); + + void (*device_dependency_added)(struct dma_chan *chan); + enum dma_status (*device_is_tx_complete)(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used); - void (*device_memcpy_issue_pending)(struct dma_chan *chan); + void (*device_issue_pending)(struct dma_chan *chan); }; /* --- public DMA engine API --- */ -struct dma_client *dma_async_client_register(dma_event_callback event_callback); +void dma_async_client_register(struct dma_client *client); void dma_async_client_unregister(struct dma_client *client); -void dma_async_client_chan_request(struct dma_client *client, - unsigned int number); +void dma_async_client_chan_request(struct dma_client *client); +dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, + void *dest, void *src, size_t len); +dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, + struct page *page, unsigned int offset, void *kdata, size_t len); +dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, struct page *src_pg, + unsigned int src_off, size_t len); +void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, + struct dma_chan *chan); -/** - * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses - * @chan: DMA channel to offload copy to - * @dest: destination address (virtual) - * @src: source address (virtual) - * @len: length - * - * Both @dest and @src must be mappable to a bus address according to the - * DMA mapping API rules for streaming mappings. - * Both @dest and @src must stay memory resident (kernel memory or locked - * user space pages). - */ -static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, - void *dest, void *src, size_t len) +static inline void +async_tx_ack(struct dma_async_tx_descriptor *tx) { - int cpu = get_cpu(); - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; - per_cpu_ptr(chan->local, cpu)->memcpy_count++; - put_cpu(); - - return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len); + tx->ack = 1; } -/** - * dma_async_memcpy_buf_to_pg - offloaded copy from address to page - * @chan: DMA channel to offload copy to - * @page: destination page - * @offset: offset in page to copy to - * @kdata: source address (virtual) - * @len: length - * - * Both @page/@offset and @kdata must be mappable to a bus address according - * to the DMA mapping API rules for streaming mappings. - * Both @page/@offset and @kdata must stay memory resident (kernel memory or - * locked user space pages) - */ -static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, - struct page *page, unsigned int offset, void *kdata, size_t len) +#define first_dma_cap(mask) __first_dma_cap(&(mask)) +static inline int __first_dma_cap(const dma_cap_mask_t *srcp) { - int cpu = get_cpu(); - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; - per_cpu_ptr(chan->local, cpu)->memcpy_count++; - put_cpu(); + return min_t(int, DMA_TX_TYPE_END, + find_first_bit(srcp->bits, DMA_TX_TYPE_END)); +} - return chan->device->device_memcpy_buf_to_pg(chan, page, offset, - kdata, len); +#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask)) +static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp) +{ + return min_t(int, DMA_TX_TYPE_END, + find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1)); } -/** - * dma_async_memcpy_pg_to_pg - offloaded copy from page to page - * @chan: DMA channel to offload copy to - * @dest_pg: destination page - * @dest_off: offset in page to copy to - * @src_pg: source page - * @src_off: offset in page to copy from - * @len: length - * - * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus - * address according to the DMA mapping API rules for streaming mappings. - * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident - * (kernel memory or locked user space pages). - */ -static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, - struct page *dest_pg, unsigned int dest_off, struct page *src_pg, - unsigned int src_off, size_t len) +#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask)) +static inline void +__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) { - int cpu = get_cpu(); - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; - per_cpu_ptr(chan->local, cpu)->memcpy_count++; - put_cpu(); + set_bit(tx_type, dstp->bits); +} - return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, - src_pg, src_off, len); +#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) +static inline int +__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) +{ + return test_bit(tx_type, srcp->bits); } +#define for_each_dma_cap_mask(cap, mask) \ + for ((cap) = first_dma_cap(mask); \ + (cap) < DMA_TX_TYPE_END; \ + (cap) = next_dma_cap((cap), (mask))) + /** - * dma_async_memcpy_issue_pending - flush pending copies to HW + * dma_async_issue_pending - flush pending transactions to HW * @chan: target DMA channel * * This allows drivers to push copies to HW in batches, * reducing MMIO writes where possible. */ -static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) +static inline void dma_async_issue_pending(struct dma_chan *chan) { - return chan->device->device_memcpy_issue_pending(chan); + return chan->device->device_issue_pending(chan); } +#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan) + /** - * dma_async_memcpy_complete - poll for transaction completion + * dma_async_is_tx_complete - poll for transaction completion * @chan: DMA channel * @cookie: transaction identifier to check status of * @last: returns last completed cookie, can be NULL @@ -306,12 +369,15 @@ * internal state and can be used with dma_async_is_complete() to check * the status of multiple cookies without re-checking hardware state. */ -static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, +static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) { - return chan->device->device_memcpy_complete(chan, cookie, last, used); + return chan->device->device_is_tx_complete(chan, cookie, last, used); } +#define dma_async_memcpy_complete(chan, cookie, last, used)\ + dma_async_is_tx_complete(chan, cookie, last, used) + /** * dma_async_is_complete - test a cookie against chan state * @cookie: transaction identifier to test status of @@ -334,6 +400,7 @@ return DMA_IN_PROGRESS; } +enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); /* --- DMA device --- */ @@ -362,5 +429,4 @@ struct dma_pinned_list *pinned_list, struct page *page, unsigned int offset, size_t len); -#endif /* CONFIG_DMA_ENGINE */ #endif /* DMAENGINE_H */ diff -Nurb linux-2.6.22-570/include/linux/etherdevice.h linux-2.6.22-591/include/linux/etherdevice.h --- linux-2.6.22-570/include/linux/etherdevice.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/etherdevice.h 2007-12-21 15:36:12.000000000 -0500 @@ -40,12 +40,6 @@ struct hh_cache *hh); extern struct net_device *alloc_etherdev(int sizeof_priv); -static inline void eth_copy_and_sum (struct sk_buff *dest, - const unsigned char *src, - int len, int base) -{ - memcpy (dest->data, src, len); -} /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. diff -Nurb linux-2.6.22-570/include/linux/freezer.h linux-2.6.22-591/include/linux/freezer.h --- linux-2.6.22-570/include/linux/freezer.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/freezer.h 2007-12-21 15:36:12.000000000 -0500 @@ -115,6 +115,14 @@ return !!(p->flags & PF_FREEZER_SKIP); } +/* + * Tell the freezer that the current task should be frozen by it + */ +static inline void set_freezable(void) +{ + current->flags &= ~PF_NOFREEZE; +} + #else static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } @@ -130,4 +138,5 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } +static inline void set_freezable(void) {} #endif diff -Nurb linux-2.6.22-570/include/linux/fs.h linux-2.6.22-591/include/linux/fs.h --- linux-2.6.22-570/include/linux/fs.h 2007-12-21 15:36:06.000000000 -0500 +++ linux-2.6.22-591/include/linux/fs.h 2007-12-21 15:36:12.000000000 -0500 @@ -283,6 +283,17 @@ #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 +/* + * sys_fallocate modes + * Currently sys_fallocate supports two modes: + * FA_ALLOCATE : This is the preallocate mode, using which an application/user + * may request (pre)allocation of blocks. + * FA_DEALLOCATE: This is the deallocate mode, which can be used to free + * the preallocated blocks. + */ +#define FA_ALLOCATE 0x1 +#define FA_DEALLOCATE 0x2 + #ifdef __KERNEL__ #include @@ -300,6 +311,7 @@ #include #include #include +#include #include #include @@ -1139,6 +1151,7 @@ int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*revoke)(struct file *, struct address_space *); }; struct inode_operations { @@ -1164,6 +1177,8 @@ ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + long (*fallocate)(struct inode *inode, int mode, loff_t offset, + loff_t len); int (*sync_flags) (struct inode *); }; @@ -1809,6 +1824,13 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, size_t len, unsigned int flags); +/* fs/revoke.c */ +#ifdef CONFIG_MMU +extern int generic_file_revoke(struct file *, struct address_space *); +#else +#define generic_file_revoke NULL +#endif + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); @@ -2077,5 +2099,9 @@ { } #endif /* CONFIG_SECURITY */ +int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ diff -Nurb linux-2.6.22-570/include/linux/fs_stack.h linux-2.6.22-591/include/linux/fs_stack.h --- linux-2.6.22-570/include/linux/fs_stack.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/fs_stack.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,7 +1,19 @@ +/* + * Copyright (c) 2006-2007 Erez Zadok + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek + * Copyright (c) 2006-2007 Stony Brook University + * Copyright (c) 2006-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + #ifndef _LINUX_FS_STACK_H #define _LINUX_FS_STACK_H -/* This file defines generic functions used primarily by stackable +/* + * This file defines generic functions used primarily by stackable * filesystems; none of these functions require i_mutex to be held. */ @@ -11,7 +23,8 @@ extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, int (*get_nlinks)(struct inode *)); -extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src); +extern void fsstack_copy_inode_size(struct inode *dst, + const struct inode *src); /* inlines */ static inline void fsstack_copy_attr_atime(struct inode *dest, diff -Nurb linux-2.6.22-570/include/linux/gfp.h linux-2.6.22-591/include/linux/gfp.h --- linux-2.6.22-570/include/linux/gfp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/gfp.h 2007-12-21 15:36:12.000000000 -0500 @@ -30,6 +30,9 @@ * cannot handle allocation failures. * * __GFP_NORETRY: The VM implementation must not retry indefinitely. + * + * __GFP_MOVABLE: Flag that this page will be movable by the page migration + * mechanism or reclaimed */ #define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */ #define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */ @@ -45,15 +48,21 @@ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ +#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ +#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ -#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* if you forget to add the bitmask here kernel will crash, period */ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \ + __GFP_RECLAIMABLE|__GFP_MOVABLE) + +/* This mask makes up all the page movable related flags */ +#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) @@ -62,9 +71,20 @@ #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) +#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_RECLAIMABLE) #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_HIGHMEM | \ + __GFP_MOVABLE) +#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE) +#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_MOVABLE) +#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_HIGHMEM | \ + __GFP_MOVABLE) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) @@ -99,6 +119,12 @@ return ZONE_NORMAL; } +static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags) +{ + BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); + return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags; +} + /* * There is only one page-allocator function, and two main namespaces to * it. The alloc_page*() variants return 'struct page *' and as such diff -Nurb linux-2.6.22-570/include/linux/highmem.h linux-2.6.22-591/include/linux/highmem.h --- linux-2.6.22-570/include/linux/highmem.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/highmem.h 2007-12-21 15:36:12.000000000 -0500 @@ -73,10 +73,27 @@ } #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +/** + * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags + * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA but the caller is expected + * to specify via movableflags whether the page will be movable in the + * future or not + * + * An architecture may override this function by defining + * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own + * implementation. + */ static inline struct page * -alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) +__alloc_zeroed_user_highpage(gfp_t movableflags, + struct vm_area_struct *vma, + unsigned long vaddr) { - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); + struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, + vma, vaddr); if (page) clear_user_highpage(page, vaddr); @@ -85,6 +102,36 @@ } #endif +/** + * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA that the caller knows will + * not be able to move in the future using move_pages() or reclaim. If it + * is known that the page can move, use alloc_zeroed_user_highpage_movable + */ +static inline struct page * +alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) +{ + return __alloc_zeroed_user_highpage(0, vma, vaddr); +} + +/** + * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA that the caller knows will + * be able to migrate in the future using move_pages() or reclaimed + */ +static inline struct page * +alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, + unsigned long vaddr) +{ + return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); +} + static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page, KM_USER0); diff -Nurb linux-2.6.22-570/include/linux/hugetlb.h linux-2.6.22-591/include/linux/hugetlb.h --- linux-2.6.22-570/include/linux/hugetlb.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/hugetlb.h 2007-12-21 15:36:12.000000000 -0500 @@ -15,6 +15,7 @@ } int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); @@ -29,6 +30,8 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); extern unsigned long max_huge_pages; +extern unsigned long hugepages_treat_as_movable; +extern gfp_t htlb_alloc_mask; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff -Nurb linux-2.6.22-570/include/linux/idr.h linux-2.6.22-591/include/linux/idr.h --- linux-2.6.22-570/include/linux/idr.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/idr.h 2007-12-21 15:36:14.000000000 -0500 @@ -83,4 +83,33 @@ void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); + +/* + * IDA - IDR based id allocator, use when translation from id to + * pointer isn't necessary. + */ +#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ +#define IDA_BITMAP_LONGS (128 / sizeof(long) - 1) +#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) + +struct ida_bitmap { + long nr_busy; + unsigned long bitmap[IDA_BITMAP_LONGS]; +}; + +struct ida { + struct idr idr; + struct ida_bitmap *free_bitmap; +}; + +#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } +#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) + +int ida_pre_get(struct ida *ida, gfp_t gfp_mask); +int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); +int ida_get_new(struct ida *ida, int *p_id); +void ida_remove(struct ida *ida, int id); +void ida_destroy(struct ida *ida); +void ida_init(struct ida *ida); + #endif /* __IDR_H__ */ diff -Nurb linux-2.6.22-570/include/linux/if_bridge.h linux-2.6.22-591/include/linux/if_bridge.h --- linux-2.6.22-570/include/linux/if_bridge.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/if_bridge.h 2007-12-21 15:36:14.000000000 -0500 @@ -104,7 +104,7 @@ #include -extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *)); +extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb); extern int (*br_should_route_hook)(struct sk_buff **pskb); diff -Nurb linux-2.6.22-570/include/linux/if_link.h linux-2.6.22-591/include/linux/if_link.h --- linux-2.6.22-570/include/linux/if_link.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/if_link.h 2007-12-21 15:36:12.000000000 -0500 @@ -76,6 +76,8 @@ #define IFLA_WEIGHT IFLA_WEIGHT IFLA_OPERSTATE, IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO __IFLA_MAX }; @@ -140,4 +142,49 @@ __u32 retrans_time; }; +enum +{ + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + +/* VLAN section */ + +enum +{ + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum +{ + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping +{ + __u32 from; + __u32 to; +}; + #endif /* _LINUX_IF_LINK_H */ diff -Nurb linux-2.6.22-570/include/linux/if_pppox.h linux-2.6.22-591/include/linux/if_pppox.h --- linux-2.6.22-570/include/linux/if_pppox.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/if_pppox.h 2007-12-21 15:36:14.000000000 -0500 @@ -160,7 +160,7 @@ struct module; struct pppox_proto { - int (*create)(struct socket *sock); + int (*create)(struct net *net, struct socket *sock); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff -Nurb linux-2.6.22-570/include/linux/if_tun.h linux-2.6.22-591/include/linux/if_tun.h --- linux-2.6.22-570/include/linux/if_tun.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/if_tun.h 2007-12-21 15:36:12.000000000 -0500 @@ -36,6 +36,7 @@ unsigned long flags; int attached; uid_t owner; + gid_t group; wait_queue_head_t read_wait; struct sk_buff_head readq; @@ -78,6 +79,7 @@ #define TUNSETPERSIST _IOW('T', 203, int) #define TUNSETOWNER _IOW('T', 204, int) #define TUNSETLINK _IOW('T', 205, int) +#define TUNSETGROUP _IOW('T', 206, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 diff -Nurb linux-2.6.22-570/include/linux/if_vlan.h linux-2.6.22-591/include/linux/if_vlan.h --- linux-2.6.22-570/include/linux/if_vlan.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/if_vlan.h 2007-12-21 15:36:14.000000000 -0500 @@ -62,7 +62,7 @@ #define VLAN_VID_MASK 0xfff /* found in socket.c */ -extern void vlan_ioctl_set(int (*hook)(void __user *)); +extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); #define VLAN_NAME "vlan" @@ -99,7 +99,7 @@ } struct vlan_priority_tci_mapping { - unsigned long priority; + u32 priority; unsigned short vlan_qos; /* This should be shifted when first set, so we only do it * at provisioning time. * ((skb->priority << 13) & 0xE000) @@ -112,7 +112,10 @@ /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ - unsigned long ingress_priority_map[8]; + unsigned int nr_ingress_mappings; + u32 ingress_priority_map[8]; + + unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ @@ -395,6 +398,10 @@ GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */ }; +enum vlan_flags { + VLAN_FLAG_REORDER_HDR = 0x1, +}; + enum vlan_name_types { VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */ VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */ diff -Nurb linux-2.6.22-570/include/linux/inetdevice.h linux-2.6.22-591/include/linux/inetdevice.h --- linux-2.6.22-570/include/linux/inetdevice.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/inetdevice.h 2007-12-21 15:36:14.000000000 -0500 @@ -17,8 +17,6 @@ DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1); }; -extern struct ipv4_devconf ipv4_devconf; - struct in_device { struct net_device *dev; @@ -44,7 +42,7 @@ }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1]) -#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr) +#define IPV4_DEVCONF_ALL(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf), attr) static inline int ipv4_devconf_get(struct in_device *in_dev, int index) { @@ -71,14 +69,14 @@ ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr)) + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) && IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr)) + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) || IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr))) + (max(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr), IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) -#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL(MC_FORWARDING) && \ +#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, MC_FORWARDING) && \ IPV4_DEVCONF((in_dev)->cnf, \ MC_FORWARDING)) #define IN_DEV_RPFILTER(in_dev) IN_DEV_ANDCONF((in_dev), RP_FILTER) @@ -127,15 +125,15 @@ extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); -extern struct net_device *ip_dev_find(__be32 addr); +extern struct net_device *ip_dev_find(struct net *net, __be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -extern int devinet_ioctl(unsigned int cmd, void __user *); +extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern void devinet_init(void); -extern struct in_device *inetdev_by_index(int); +extern struct in_device *inetdev_by_index(struct net *, int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); -extern __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope); +extern __be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope); extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); -extern void inet_forward_change(void); +extern void inet_forward_change(struct net *net); static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { diff -Nurb linux-2.6.22-570/include/linux/init_task.h linux-2.6.22-591/include/linux/init_task.h --- linux-2.6.22-570/include/linux/init_task.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/init_task.h 2007-12-21 15:36:14.000000000 -0500 @@ -8,6 +8,8 @@ #include #include #include +#include +#include #define INIT_FDTABLE \ { \ @@ -77,7 +79,9 @@ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .uts_ns = &init_uts_ns, \ .mnt_ns = NULL, \ + .net_ns = &init_net, \ INIT_IPC_NS(ipc_ns) \ + .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff -Nurb linux-2.6.22-570/include/linux/io.h linux-2.6.22-591/include/linux/io.h --- linux-2.6.22-570/include/linux/io.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/io.h 2007-12-21 15:36:12.000000000 -0500 @@ -63,32 +63,7 @@ void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset, unsigned long size); void devm_iounmap(struct device *dev, void __iomem *addr); - -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(const volatile void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} +int check_signature(const volatile void __iomem *io_addr, + const unsigned char *signature, int length); #endif /* _LINUX_IO_H */ diff -Nurb linux-2.6.22-570/include/linux/ip_mp_alg.h linux-2.6.22-591/include/linux/ip_mp_alg.h --- linux-2.6.22-570/include/linux/ip_mp_alg.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/ip_mp_alg.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,22 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _LINUX_IP_MP_ALG_H -#define _LINUX_IP_MP_ALG_H - -enum ip_mp_alg { - IP_MP_ALG_NONE, - IP_MP_ALG_RR, - IP_MP_ALG_DRR, - IP_MP_ALG_RANDOM, - IP_MP_ALG_WRANDOM, - __IP_MP_ALG_MAX -}; - -#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1) - -#endif /* _LINUX_IP_MP_ALG_H */ - diff -Nurb linux-2.6.22-570/include/linux/ipc.h linux-2.6.22-591/include/linux/ipc.h --- linux-2.6.22-570/include/linux/ipc.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/ipc.h 2007-12-21 15:36:12.000000000 -0500 @@ -93,6 +93,7 @@ #ifdef CONFIG_SYSVIPC #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, +extern void free_ipc_ns(struct kref *kref); extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns); #else @@ -104,13 +105,9 @@ } #endif -#ifdef CONFIG_IPC_NS -extern void free_ipc_ns(struct kref *kref); -#endif - static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { -#ifdef CONFIG_IPC_NS +#ifdef CONFIG_SYSVIPC if (ns) kref_get(&ns->kref); #endif @@ -119,7 +116,7 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) { -#ifdef CONFIG_IPC_NS +#ifdef CONFIG_SYSVIPC kref_put(&ns->kref, free_ipc_ns); #endif } @@ -127,5 +124,3 @@ #endif /* __KERNEL__ */ #endif /* _LINUX_IPC_H */ - - diff -Nurb linux-2.6.22-570/include/linux/ipv6.h linux-2.6.22-591/include/linux/ipv6.h --- linux-2.6.22-570/include/linux/ipv6.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/ipv6.h 2007-12-21 15:36:12.000000000 -0500 @@ -247,7 +247,7 @@ __u16 lastopt; __u32 nhoff; __u16 flags; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif diff -Nurb linux-2.6.22-570/include/linux/kgdb.h linux-2.6.22-591/include/linux/kgdb.h --- linux-2.6.22-570/include/linux/kgdb.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/kgdb.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,291 @@ +/* + * include/linux/kgdb.h + * + * This provides the hooks and functions that KGDB needs to share between + * the core, I/O and arch-specific portions. + * + * Author: Amit Kale and + * Tom Rini + * + * 2001-2004 (c) Amit S. Kale and 2003-2005 (c) MontaVista Software, Inc. + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ +#ifdef __KERNEL__ +#ifndef _KGDB_H_ +#define _KGDB_H_ + +#include + +#ifdef CONFIG_KGDB +#include +#include +#include +#include + +#ifndef CHECK_EXCEPTION_STACK +#define CHECK_EXCEPTION_STACK() 1 +#endif + +struct tasklet_struct; +struct pt_regs; +struct task_struct; +struct uart_port; + +#ifdef CONFIG_KGDB_CONSOLE +extern struct console kgdbcons; +#endif + +/* To enter the debugger explicitly. */ +extern void breakpoint(void); +extern int kgdb_connected; +extern int kgdb_may_fault; +extern struct tasklet_struct kgdb_tasklet_breakpoint; + +extern atomic_t kgdb_setting_breakpoint; +extern atomic_t cpu_doing_single_step; +extern atomic_t kgdb_sync_softlockup[NR_CPUS]; + +extern struct task_struct *kgdb_usethread, *kgdb_contthread; + +enum kgdb_bptype { + bp_breakpoint = '0', + bp_hardware_breakpoint, + bp_write_watchpoint, + bp_read_watchpoint, + bp_access_watchpoint +}; + +enum kgdb_bpstate { + bp_none = 0, + bp_removed, + bp_set, + bp_active +}; + +struct kgdb_bkpt { + unsigned long bpt_addr; + unsigned char saved_instr[BREAK_INSTR_SIZE]; + enum kgdb_bptype type; + enum kgdb_bpstate state; +}; + +/* The maximum number of KGDB I/O modules that can be loaded */ +#define MAX_KGDB_IO_HANDLERS 3 + +#ifndef MAX_BREAKPOINTS +#define MAX_BREAKPOINTS 1000 +#endif + +#define KGDB_HW_BREAKPOINT 1 + +/* Required functions. */ +/** + * kgdb_arch_init - Perform any architecture specific initalization. + * + * This function will handle the initalization of any architecture + * specific hooks. + */ +extern int kgdb_arch_init(void); + +/** + * regs_to_gdb_regs - Convert ptrace regs to GDB regs + * @gdb_regs: A pointer to hold the registers in the order GDB wants. + * @regs: The &struct pt_regs of the current process. + * + * Convert the pt_regs in @regs into the format for registers that + * GDB expects, stored in @gdb_regs. + */ +extern void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs); + +/** + * sleeping_regs_to_gdb_regs - Convert ptrace regs to GDB regs + * @gdb_regs: A pointer to hold the registers in the order GDB wants. + * @p: The &struct task_struct of the desired process. + * + * Convert the register values of the sleeping process in @p to + * the format that GDB expects. + * This function is called when kgdb does not have access to the + * &struct pt_regs and therefore it should fill the gdb registers + * @gdb_regs with what has been saved in &struct thread_struct + * thread field during switch_to. + */ +extern void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, + struct task_struct *p); + +/** + * gdb_regs_to_regs - Convert GDB regs to ptrace regs. + * @gdb_regs: A pointer to hold the registers we've recieved from GDB. + * @regs: A pointer to a &struct pt_regs to hold these values in. + * + * Convert the GDB regs in @gdb_regs into the pt_regs, and store them + * in @regs. + */ +extern void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs); + +/** + * kgdb_arch_handle_exception - Handle architecture specific GDB packets. + * @vector: The error vector of the exception that happened. + * @signo: The signal number of the exception that happened. + * @err_code: The error code of the exception that happened. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer, of %BUFMAX to write a packet into. + * @regs: The &struct pt_regs of the current process. + * + * This function MUST handle the 'c' and 's' command packets, + * as well packets to set / remove a hardware breakpoint, if used. + * If there are additional packets which the hardware needs to handle, + * they are handled here. The code should return -1 if it wants to + * process more packets, and a %0 or %1 if it wants to exit from the + * kgdb hook. + */ +extern int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *regs); + +/** + * kgdb_roundup_cpus - Get other CPUs into a holding pattern + * @flags: Current IRQ state + * + * On SMP systems, we need to get the attention of the other CPUs + * and get them be in a known state. This should do what is needed + * to get the other CPUs to call kgdb_wait(). Note that on some arches, + * the NMI approach is not used for rounding up all the CPUs. For example, + * in case of MIPS, smp_call_function() is used to roundup CPUs. In + * this case, we have to make sure that interrupts are enabled before + * calling smp_call_function(). The argument to this function is + * the flags that will be used when restoring the interrupts. There is + * local_irq_save() call before kgdb_roundup_cpus(). + * + * On non-SMP systems, this is not called. + */ +extern void kgdb_roundup_cpus(unsigned long flags); + +#ifndef JMP_REGS_ALIGNMENT +#define JMP_REGS_ALIGNMENT +#endif + +extern unsigned long kgdb_fault_jmp_regs[]; + +/** + * kgdb_fault_setjmp - Store state in case we fault. + * @curr_context: An array to store state into. + * + * Certain functions may try and access memory, and in doing so may + * cause a fault. When this happens, we trap it, restore state to + * this call, and let ourself know that something bad has happened. + */ +extern asmlinkage int kgdb_fault_setjmp(unsigned long *curr_context); + +/** + * kgdb_fault_longjmp - Restore state when we have faulted. + * @curr_context: The previously stored state. + * + * When something bad does happen, this function is called to + * restore the known good state, and set the return value to 1, so + * we know something bad happened. + */ +extern asmlinkage void kgdb_fault_longjmp(unsigned long *curr_context); + +/* Optional functions. */ +extern int kgdb_validate_break_address(unsigned long addr); +extern int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr); +extern int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle); + +/** + * struct kgdb_arch - Desribe architecture specific values. + * @gdb_bpt_instr: The instruction to trigger a breakpoint. + * @flags: Flags for the breakpoint, currently just %KGDB_HW_BREAKPOINT. + * @shadowth: A value of %1 indicates we shadow information on processes. + * @set_breakpoint: Allow an architecture to specify how to set a software + * breakpoint. + * @remove_breakpoint: Allow an architecture to specify how to remove a + * software breakpoint. + * @set_hw_breakpoint: Allow an architecture to specify how to set a hardware + * breakpoint. + * @remove_hw_breakpoint: Allow an architecture to specify how to remove a + * hardware breakpoint. + * + * The @shadowth flag is an option to shadow information not retrievable by + * gdb otherwise. This is deprecated in favor of a binutils which supports + * CFI macros. + */ +struct kgdb_arch { + unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE]; + unsigned long flags; + unsigned shadowth; + int (*set_breakpoint) (unsigned long, char *); + int (*remove_breakpoint)(unsigned long, char *); + int (*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); + int (*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); + void (*remove_all_hw_break)(void); + void (*correct_hw_break)(void); +}; + +/* Thread reference */ +typedef unsigned char threadref[8]; + +/** + * struct kgdb_io - Desribe the interface for an I/O driver to talk with KGDB. + * @read_char: Pointer to a function that will return one char. + * @write_char: Pointer to a function that will write one char. + * @flush: Pointer to a function that will flush any pending writes. + * @init: Pointer to a function that will initialize the device. + * @late_init: Pointer to a function that will do any setup that has + * other dependencies. + * @pre_exception: Pointer to a function that will do any prep work for + * the I/O driver. + * @post_exception: Pointer to a function that will do any cleanup work + * for the I/O driver. + * + * The @init and @late_init function pointers allow for an I/O driver + * such as a serial driver to fully initialize the port with @init and + * be called very early, yet safely call request_irq() later in the boot + * sequence. + * + * @init is allowed to return a non-0 return value to indicate failure. + * If this is called early on, then KGDB will try again when it would call + * @late_init. If it has failed later in boot as well, the user will be + * notified. + */ +struct kgdb_io { + int (*read_char) (void); + void (*write_char) (u8); + void (*flush) (void); + int (*init) (void); + void (*late_init) (void); + void (*pre_exception) (void); + void (*post_exception) (void); +}; + +extern struct kgdb_io kgdb_io_ops; +extern struct kgdb_arch arch_kgdb_ops; +extern int kgdb_initialized; + +extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); +extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); + +extern void __init kgdb8250_add_port(int i, struct uart_port *serial_req); +extern void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *serial_req); + +extern int kgdb_hex2long(char **ptr, long *long_val); +extern char *kgdb_mem2hex(char *mem, char *buf, int count); +extern char *kgdb_hex2mem(char *buf, char *mem, int count); +extern int kgdb_get_mem(char *addr, unsigned char *buf, int count); +extern int kgdb_set_mem(char *addr, unsigned char *buf, int count); + +int kgdb_isremovedbreak(unsigned long addr); + +extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, + struct pt_regs *regs); +extern void kgdb_nmihook(int cpu, void *regs); +extern int debugger_step; +extern atomic_t debugger_active; +#else +/* Stubs for when KGDB is not set. */ +static const atomic_t debugger_active = ATOMIC_INIT(0); +#endif /* CONFIG_KGDB */ +#endif /* _KGDB_H_ */ +#endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/linux/kmod.h linux-2.6.22-591/include/linux/kmod.h --- linux-2.6.22-570/include/linux/kmod.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/kmod.h 2007-12-21 15:36:12.000000000 -0500 @@ -36,13 +36,57 @@ #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) struct key; -extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[], - struct key *session_keyring, int wait); +struct file; +struct subprocess_info; + +/* Allocate a subprocess_info structure */ +struct subprocess_info *call_usermodehelper_setup(char *path, + char **argv, char **envp); + +/* Set various pieces of state into the subprocess_info structure */ +void call_usermodehelper_setkeys(struct subprocess_info *info, + struct key *session_keyring); +int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, + struct file **filp); +void call_usermodehelper_setcleanup(struct subprocess_info *info, + void (*cleanup)(char **argv, char **envp)); + +enum umh_wait { + UMH_NO_WAIT = -1, /* don't wait at all */ + UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ + UMH_WAIT_PROC = 1, /* wait for the process to complete */ +}; + +/* Actually execute the sub-process */ +int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); + +/* Free the subprocess_info. This is only needed if you're not going + to call call_usermodehelper_exec */ +void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) +call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) { - return call_usermodehelper_keys(path, argv, envp, NULL, wait); + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp); + if (info == NULL) + return -ENOMEM; + return call_usermodehelper_exec(info, wait); +} + +static inline int +call_usermodehelper_keys(char *path, char **argv, char **envp, + struct key *session_keyring, enum umh_wait wait) +{ + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp); + if (info == NULL) + return -ENOMEM; + + call_usermodehelper_setkeys(info, session_keyring); + return call_usermodehelper_exec(info, wait); } extern void usermodehelper_init(void); diff -Nurb linux-2.6.22-570/include/linux/kobject.h linux-2.6.22-591/include/linux/kobject.h --- linux-2.6.22-570/include/linux/kobject.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/kobject.h 2007-12-21 15:36:14.000000000 -0500 @@ -55,7 +55,7 @@ struct kobject * parent; struct kset * kset; struct kobj_type * ktype; - struct dentry * dentry; + struct sysfs_dirent * sd; wait_queue_head_t poll; }; @@ -71,13 +71,9 @@ extern void kobject_cleanup(struct kobject *); extern int __must_check kobject_add(struct kobject *); -extern int __must_check kobject_shadow_add(struct kobject *, struct dentry *); extern void kobject_del(struct kobject *); extern int __must_check kobject_rename(struct kobject *, const char *new_name); -extern int __must_check kobject_shadow_rename(struct kobject *kobj, - struct dentry *new_parent, - const char *new_name); extern int __must_check kobject_move(struct kobject *, struct kobject *); extern int __must_check kobject_register(struct kobject *); diff -Nurb linux-2.6.22-570/include/linux/ktime.h linux-2.6.22-591/include/linux/ktime.h --- linux-2.6.22-570/include/linux/ktime.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/ktime.h 2007-12-21 15:36:12.000000000 -0500 @@ -279,6 +279,16 @@ return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec; } +static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) +{ + return ktime_to_us(ktime_sub(later, earlier)); +} + +static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) +{ + return ktime_add_ns(kt, usec * 1000); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an diff -Nurb linux-2.6.22-570/include/linux/magic.h linux-2.6.22-591/include/linux/magic.h --- linux-2.6.22-570/include/linux/magic.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/magic.h 2007-12-21 15:36:12.000000000 -0500 @@ -36,8 +36,12 @@ #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" +#define REVOKEFS_MAGIC 0x5245564B /* REVK */ + +#define UNIONFS_SUPER_MAGIC 0xf15f083d #define SMB_SUPER_MAGIC 0x517B #define USBDEVICE_SUPER_MAGIC 0x9fa2 +#define CONTAINER_SUPER_MAGIC 0x27e0eb #endif /* __LINUX_MAGIC_H__ */ diff -Nurb linux-2.6.22-570/include/linux/mempolicy.h linux-2.6.22-591/include/linux/mempolicy.h --- linux-2.6.22-570/include/linux/mempolicy.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/mempolicy.h 2007-12-21 15:36:12.000000000 -0500 @@ -148,18 +148,10 @@ const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern void mpol_fix_fork_child_flag(struct task_struct *p); -#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) - -#ifdef CONFIG_CPUSETS -#define current_cpuset_is_being_rebound() \ - (cpuset_being_rebound == current->cpuset) -#else -#define current_cpuset_is_being_rebound() 0 -#endif extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, gfp_t gfp_flags); extern unsigned slab_node(struct mempolicy *policy); extern enum zone_type policy_zone; @@ -173,8 +165,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); -extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */ - #else struct mempolicy {}; @@ -253,12 +243,10 @@ { } -#define set_cpuset_being_rebound(x) do {} while (0) - static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr) + unsigned long addr, gfp_t gfp_flags) { - return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); + return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); } static inline int do_migrate_pages(struct mm_struct *mm, diff -Nurb linux-2.6.22-570/include/linux/mm.h linux-2.6.22-591/include/linux/mm.h --- linux-2.6.22-570/include/linux/mm.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/mm.h 2007-12-21 15:36:14.000000000 -0500 @@ -42,6 +42,8 @@ #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) +#define VM_REVOKED 0x20000000 /* Mapping has been revoked */ + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way @@ -170,6 +172,13 @@ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ +#define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated, + * eg. truncate or invalidate_inode_*. + * In this case, do_no_page must + * return with the page locked. + */ +#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif @@ -192,6 +201,25 @@ */ extern pgprot_t protection_map[16]; +#define FAULT_FLAG_WRITE 0x01 +#define FAULT_FLAG_NONLINEAR 0x02 + +/* + * fault_data is filled in the the pagefault handler and passed to the + * vma's ->fault function. That function is responsible for filling in + * 'type', which is the type of fault if a page is returned, or the type + * of error if NULL is returned. + * + * pgoff should be used in favour of address, if possible. If pgoff is + * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get + * nonlinear mapping support. + */ +struct fault_data { + unsigned long address; + pgoff_t pgoff; + unsigned int flags; + int type; +}; /* * These are the virtual MM functions - opening of an area, closing and @@ -201,9 +229,15 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); - unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); - int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); + struct page *(*fault)(struct vm_area_struct *vma, + struct fault_data *fdata); + struct page *(*nopage)(struct vm_area_struct *area, + unsigned long address, int *type); + unsigned long (*nopfn)(struct vm_area_struct *area, + unsigned long address); + int (*populate)(struct vm_area_struct *area, unsigned long address, + unsigned long len, pgprot_t prot, unsigned long pgoff, + int nonblock); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -656,7 +690,6 @@ */ #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) -#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ /* * Error return values for the *_nopfn functions @@ -744,6 +777,16 @@ struct vm_area_struct *start_vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *); + +struct mm_walk { + int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *); + int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *); + int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *); + int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *); +}; + +int walk_page_range(struct mm_struct *, unsigned long addr, unsigned long end, + struct mm_walk *walk, void *private); void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, @@ -1058,6 +1101,7 @@ extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); +extern void __unlink_file_vma(struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); @@ -1097,9 +1141,11 @@ loff_t lstart, loff_t lend); /* generic vm_area_ops exported for stackable file systems */ -extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); -extern int filemap_populate(struct vm_area_struct *, unsigned long, - unsigned long, pgprot_t, unsigned long, int); +extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); +extern struct page * __deprecated_for_modules +filemap_nopage(struct vm_area_struct *, unsigned long, int *); +extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, + unsigned long, unsigned long, pgprot_t, unsigned long, int); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); @@ -1199,6 +1245,7 @@ void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); +extern void drop_pagecache_sb(struct super_block *); void drop_pagecache(void); void drop_slab(void); diff -Nurb linux-2.6.22-570/include/linux/mmc/card.h linux-2.6.22-591/include/linux/mmc/card.h --- linux-2.6.22-570/include/linux/mmc/card.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/mmc/card.h 2007-12-21 15:36:12.000000000 -0500 @@ -72,6 +72,7 @@ #define MMC_STATE_READONLY (1<<1) /* card is read-only */ #define MMC_STATE_HIGHSPEED (1<<2) /* card is in high speed mode */ #define MMC_STATE_BLOCKADDR (1<<3) /* card uses block-addressing */ +#define MMC_STATE_LOCKED (1<<4) /* card is currently locked */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ u32 raw_scr[2]; /* raw card SCR */ @@ -89,11 +90,16 @@ #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) #define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED) #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) +#define mmc_card_locked(c) ((c)->state & MMC_STATE_LOCKED) + +#define mmc_card_lockable(c) (((c)->csd.cmdclass & CCC_LOCK_CARD) && \ + ((c)->host->caps & MMC_CAP_BYTEBLOCK)) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED) #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) +#define mmc_card_set_locked(c) ((c)->state |= MMC_STATE_LOCKED) #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) ((c)->dev.bus_id) diff -Nurb linux-2.6.22-570/include/linux/mmc/mmc.h linux-2.6.22-591/include/linux/mmc/mmc.h --- linux-2.6.22-570/include/linux/mmc/mmc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/mmc/mmc.h 2007-12-21 15:36:12.000000000 -0500 @@ -253,5 +253,13 @@ #define MMC_SWITCH_MODE_CLEAR_BITS 0x02 /* Clear bits which are 1 in value */ #define MMC_SWITCH_MODE_WRITE_BYTE 0x03 /* Set target to value */ +/* + * MMC_LOCK_UNLOCK modes + */ +#define MMC_LOCK_MODE_ERASE (1<<3) +#define MMC_LOCK_MODE_UNLOCK (1<<2) +#define MMC_LOCK_MODE_CLR_PWD (1<<1) +#define MMC_LOCK_MODE_SET_PWD (1<<0) + #endif /* MMC_MMC_PROTOCOL_H */ diff -Nurb linux-2.6.22-570/include/linux/mmzone.h linux-2.6.22-591/include/linux/mmzone.h --- linux-2.6.22-570/include/linux/mmzone.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/mmzone.h 2007-12-21 15:36:12.000000000 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -24,8 +25,24 @@ #endif #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) +#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY +#define MIGRATE_UNMOVABLE 0 +#define MIGRATE_RECLAIMABLE 1 +#define MIGRATE_MOVABLE 2 +#define MIGRATE_TYPES 3 +#else +#define MIGRATE_UNMOVABLE 0 +#define MIGRATE_UNRECLAIMABLE 0 +#define MIGRATE_MOVABLE 0 +#define MIGRATE_TYPES 1 +#endif + +#define for_each_migratetype_order(order, type) \ + for (order = 0; order < MAX_ORDER; order++) \ + for (type = 0; type < MIGRATE_TYPES; type++) + struct free_area { - struct list_head free_list; + struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; }; @@ -213,6 +230,14 @@ #endif struct free_area free_area[MAX_ORDER]; +#ifndef CONFIG_SPARSEMEM + /* + * Flags for a MAX_ORDER_NR_PAGES block. See pageblock-flags.h. + * In SPARSEMEM, this map is stored in struct mem_section + */ + unsigned long *pageblock_flags; +#endif /* CONFIG_SPARSEMEM */ + ZONE_PADDING(_pad1_) @@ -468,6 +493,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free); void build_all_zonelists(void); +void raise_kswapd_order(unsigned int order); void wakeup_kswapd(struct zone *zone, int order); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags); @@ -662,6 +688,9 @@ #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) +#define SECTION_BLOCKFLAGS_BITS \ + ((SECTION_SIZE_BITS - (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS) + #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif @@ -681,6 +710,7 @@ * before using it wrong. */ unsigned long section_mem_map; + DECLARE_BITMAP(pageblock_flags, SECTION_BLOCKFLAGS_BITS); }; #ifdef CONFIG_SPARSEMEM_EXTREME diff -Nurb linux-2.6.22-570/include/linux/mnt_namespace.h linux-2.6.22-591/include/linux/mnt_namespace.h --- linux-2.6.22-570/include/linux/mnt_namespace.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/mnt_namespace.h 2007-12-21 15:36:12.000000000 -0500 @@ -14,7 +14,7 @@ int event; }; -extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *, +extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff -Nurb linux-2.6.22-570/include/linux/module.h linux-2.6.22-591/include/linux/module.h --- linux-2.6.22-570/include/linux/module.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/module.h 2007-12-21 15:36:12.000000000 -0500 @@ -227,8 +227,17 @@ MODULE_STATE_LIVE, MODULE_STATE_COMING, MODULE_STATE_GOING, + MODULE_STATE_GONE, }; +#ifdef CONFIG_KGDB +#define MAX_SECTNAME 31 +struct mod_section { + void *address; + char name[MAX_SECTNAME + 1]; +}; +#endif + /* Similar stuff for section attributes. */ struct module_sect_attr { @@ -256,6 +265,13 @@ /* Unique handle for this module */ char name[MODULE_NAME_LEN]; +#ifdef CONFIG_KGDB + /* keep kgdb info at the begining so that gdb doesn't have a chance to + * miss out any fields */ + unsigned long num_sections; + struct mod_section *mod_sections; +#endif + /* Sysfs stuff. */ struct module_kobject mkobj; struct module_param_attrs *param_attrs; diff -Nurb linux-2.6.22-570/include/linux/namei.h linux-2.6.22-591/include/linux/namei.h --- linux-2.6.22-570/include/linux/namei.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/namei.h 2007-12-21 15:36:12.000000000 -0500 @@ -3,6 +3,7 @@ #include #include +#include struct vfsmount; @@ -81,9 +82,16 @@ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); extern void release_open_intent(struct nameidata *); -extern struct dentry * lookup_one_len(const char *, struct dentry *, int); +extern struct dentry * lookup_one_len_nd(const char *, struct dentry *, + int, struct nameidata *); extern struct dentry *lookup_one_len_kern(const char *, struct dentry *, int); +static inline struct dentry *lookup_one_len(const char *name, + struct dentry *dir, int len) +{ + return lookup_one_len_nd(name, dir, len, NULL); +} + extern int follow_down(struct vfsmount **, struct dentry **); extern int follow_up(struct vfsmount **, struct dentry **); @@ -100,4 +108,16 @@ return nd->saved_names[nd->depth]; } +static inline void pathget(struct path *path) +{ + mntget(path->mnt); + dget(path->dentry); +} + +static inline void pathput(struct path *path) +{ + dput(path->dentry); + mntput(path->mnt); +} + #endif /* _LINUX_NAMEI_H */ diff -Nurb linux-2.6.22-570/include/linux/net.h linux-2.6.22-591/include/linux/net.h --- linux-2.6.22-570/include/linux/net.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/net.h 2007-12-21 15:36:14.000000000 -0500 @@ -23,6 +23,7 @@ struct poll_table_struct; struct inode; +struct net; #define NPROTO 34 /* should be enough for now.. */ @@ -170,7 +171,7 @@ struct net_proto_family { int family; - int (*create)(struct socket *sock, int protocol); + int (*create)(struct net *net, struct socket *sock, int protocol); struct module *owner; }; diff -Nurb linux-2.6.22-570/include/linux/netdevice.h linux-2.6.22-591/include/linux/netdevice.h --- linux-2.6.22-570/include/linux/netdevice.h 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/include/linux/netdevice.h 2007-12-21 15:36:14.000000000 -0500 @@ -39,6 +39,7 @@ #include #include +struct net; struct vlan_group; struct ethtool_ops; struct netpoll_info; @@ -314,9 +315,10 @@ /* Net device features */ unsigned long features; #define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ @@ -325,6 +327,7 @@ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -338,8 +341,11 @@ /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) + #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) +#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) +#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) struct net_device *next_sched; @@ -533,6 +539,9 @@ void (*poll_controller)(struct net_device *dev); #endif + /* Network namespace this network device is inside */ + struct net *nd_net; + /* bridge stuff */ struct net_bridge_port *br_port; @@ -540,13 +549,16 @@ struct device dev; /* space for optional statistics and wireless sysfs groups */ struct attribute_group *sysfs_groups[3]; + + /* rtnetlink link ops */ + const struct rtnl_link_ops *rtnl_link_ops; }; #define to_net_dev(d) container_of(d, struct net_device, dev) #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) -static inline void *netdev_priv(struct net_device *dev) +static inline void *netdev_priv(const struct net_device *dev) { return (char *)dev + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) @@ -576,45 +588,48 @@ #include #include -extern struct net_device loopback_dev; /* The loopback */ -extern struct list_head dev_base_head; /* All devices */ extern rwlock_t dev_base_lock; /* Device list lock */ -#define for_each_netdev(d) \ - list_for_each_entry(d, &dev_base_head, dev_list) -#define for_each_netdev_safe(d, n) \ - list_for_each_entry_safe(d, n, &dev_base_head, dev_list) -#define for_each_netdev_continue(d) \ - list_for_each_entry_continue(d, &dev_base_head, dev_list) -#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) - -static inline struct net_device *next_net_device(struct net_device *dev) -{ - struct list_head *lh; - lh = dev->dev_list.next; - return lh == &dev_base_head ? NULL : net_device_entry(lh); -} +#define for_each_netdev(net, d) \ + list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_safe(net, d, n) \ + list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue(net, d) \ + list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) -static inline struct net_device *first_net_device(void) -{ - return list_empty(&dev_base_head) ? NULL : - net_device_entry(dev_base_head.next); -} +#define next_net_device(d) \ +({ \ + struct net_device *dev = d; \ + struct list_head *lh; \ + struct net *net; \ + \ + net = dev->nd_net; \ + lh = dev->dev_list.next; \ + lh == &net->dev_base_head ? NULL : net_device_entry(lh); \ +}) + +#define first_net_device(N) \ +({ \ + struct net *NET = (N); \ + list_empty(&NET->dev_base_head) ? NULL : \ + net_device_entry(NET->dev_base_head.next); \ +}) extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); -extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); -extern struct net_device *dev_getfirstbyhwtype(unsigned short type); -extern struct net_device *__dev_getfirstbyhwtype(unsigned short type); +extern struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr); +extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); +extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); extern void dev_add_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt); extern void __dev_remove_pack(struct packet_type *pt); -extern struct net_device *dev_get_by_flags(unsigned short flags, +extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); -extern struct net_device *dev_get_by_name(const char *name); -extern struct net_device *__dev_get_by_name(const char *name); +extern struct net_device *dev_get_by_name(struct net *net, const char *name); +extern struct net_device *__dev_get_by_name(struct net *net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); extern int dev_close(struct net_device *dev); @@ -625,9 +640,9 @@ extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); -extern int call_netdevice_notifiers(unsigned long val, void *v); -extern struct net_device *dev_get_by_index(int ifindex); -extern struct net_device *__dev_get_by_index(int ifindex); +extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); +extern struct net_device *dev_get_by_index(struct net *net, int ifindex); +extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); @@ -732,11 +747,13 @@ #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern int dev_valid_name(const char *name); -extern int dev_ioctl(unsigned int cmd, void __user *); -extern int dev_ethtool(struct ifreq *); +extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); +extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_change_net_namespace(struct net_device *, + struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); @@ -1006,7 +1023,7 @@ extern void netdev_state_change(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ -extern void dev_load(const char *name); +extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; diff -Nurb linux-2.6.22-570/include/linux/netfilter/x_tables.h linux-2.6.22-591/include/linux/netfilter/x_tables.h --- linux-2.6.22-570/include/linux/netfilter/x_tables.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/netfilter/x_tables.h 2007-12-21 15:36:14.000000000 -0500 @@ -289,7 +289,7 @@ unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto); -extern int xt_register_table(struct xt_table *table, +extern int xt_register_table(struct net *net, struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); extern void *xt_unregister_table(struct xt_table *table); @@ -306,7 +306,7 @@ extern int xt_find_revision(int af, const char *name, u8 revision, int target, int *err); -extern struct xt_table *xt_find_table_lock(int af, const char *name); +extern struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name); extern void xt_table_unlock(struct xt_table *t); extern int xt_proto_init(int af); diff -Nurb linux-2.6.22-570/include/linux/netfilter.h linux-2.6.22-591/include/linux/netfilter.h --- linux-2.6.22-570/include/linux/netfilter.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/netfilter.h 2007-12-21 15:36:14.000000000 -0500 @@ -362,11 +362,6 @@ #endif } -#ifdef CONFIG_PROC_FS -#include -extern struct proc_dir_entry *proc_net_netfilter; -#endif - #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) diff -Nurb linux-2.6.22-570/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.22-591/include/linux/netfilter_ipv4/ip_tables.h --- linux-2.6.22-570/include/linux/netfilter_ipv4/ip_tables.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/netfilter_ipv4/ip_tables.h 2007-12-21 15:36:14.000000000 -0500 @@ -292,7 +292,7 @@ #include extern void ipt_init(void) __init; -extern int ipt_register_table(struct xt_table *table, +extern int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl); extern void ipt_unregister_table(struct xt_table *table); diff -Nurb linux-2.6.22-570/include/linux/netfilter_ipv4.h linux-2.6.22-591/include/linux/netfilter_ipv4.h --- linux-2.6.22-570/include/linux/netfilter_ipv4.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/netfilter_ipv4.h 2007-12-21 15:36:14.000000000 -0500 @@ -75,7 +75,7 @@ #define SO_ORIGINAL_DST 80 #ifdef __KERNEL__ -extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type); +extern int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type); extern int ip_xfrm_me_harder(struct sk_buff **pskb); extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff -Nurb linux-2.6.22-570/include/linux/netlink.h linux-2.6.22-591/include/linux/netlink.h --- linux-2.6.22-570/include/linux/netlink.h 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/include/linux/netlink.h 2007-12-21 15:36:14.000000000 -0500 @@ -21,12 +21,14 @@ #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ #define NETLINK_GENERIC 16 -/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_DM 17 /* Device Mapper */ #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ #define NETLINK_ECRYPTFS 19 #define MAX_LINKS 32 +struct net; + struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ @@ -157,7 +159,8 @@ #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, unsigned int groups, +extern struct sock *netlink_kernel_create(struct net *net, + int unit,unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module); @@ -204,6 +207,7 @@ struct netlink_notify { + struct net *net; int pid; int protocol; }; diff -Nurb linux-2.6.22-570/include/linux/netpoll.h linux-2.6.22-591/include/linux/netpoll.h --- linux-2.6.22-570/include/linux/netpoll.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/netpoll.h 2007-12-21 15:36:12.000000000 -0500 @@ -16,7 +16,7 @@ struct net_device *dev; char dev_name[IFNAMSIZ]; const char *name; - void (*rx_hook)(struct netpoll *, int, char *, int); + void (*rx_hook)(struct netpoll *, int, char *, int, struct sk_buff *); u32 local_ip, remote_ip; u16 local_port, remote_port; diff -Nurb linux-2.6.22-570/include/linux/nfs4.h linux-2.6.22-591/include/linux/nfs4.h --- linux-2.6.22-570/include/linux/nfs4.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/nfs4.h 2007-12-21 15:36:12.000000000 -0500 @@ -15,6 +15,7 @@ #include +#define NFS4_BITMAP_SIZE 2 #define NFS4_VERIFIER_SIZE 8 #define NFS4_STATEID_SIZE 16 #define NFS4_FHSIZE 128 diff -Nurb linux-2.6.22-570/include/linux/nfs4_mount.h linux-2.6.22-591/include/linux/nfs4_mount.h --- linux-2.6.22-570/include/linux/nfs4_mount.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/nfs4_mount.h 2007-12-21 15:36:12.000000000 -0500 @@ -65,6 +65,7 @@ #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ +#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */ #define NFS4_MOUNT_FLAGMASK 0xFFFF #endif diff -Nurb linux-2.6.22-570/include/linux/nfs_fs.h linux-2.6.22-591/include/linux/nfs_fs.h --- linux-2.6.22-570/include/linux/nfs_fs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/nfs_fs.h 2007-12-21 15:36:12.000000000 -0500 @@ -30,7 +30,9 @@ #ifdef __KERNEL__ #include +#include #include +#include #include #include #include @@ -69,9 +71,8 @@ struct nfs4_state; struct nfs_open_context { - atomic_t count; - struct vfsmount *vfsmnt; - struct dentry *dentry; + struct kref kref; + struct path path; struct rpc_cred *cred; struct nfs4_state *state; fl_owner_t lockowner; @@ -156,12 +157,9 @@ * This is the list of dirty unwritten pages. */ spinlock_t req_lock; - struct list_head dirty; - struct list_head commit; struct radix_tree_root nfs_page_tree; - unsigned int ndirty, - ncommit, + unsigned long ncommit, npages; /* Open contexts for shared mmap writes */ diff -Nurb linux-2.6.22-570/include/linux/nfs_fs_sb.h linux-2.6.22-591/include/linux/nfs_fs_sb.h --- linux-2.6.22-570/include/linux/nfs_fs_sb.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/nfs_fs_sb.h 2007-12-21 15:36:12.000000000 -0500 @@ -16,7 +16,6 @@ #define NFS_CS_INITING 1 /* busy initialising */ int cl_nfsversion; /* NFS protocol version */ unsigned long cl_res_state; /* NFS resources state */ -#define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ diff -Nurb linux-2.6.22-570/include/linux/nfs_mount.h linux-2.6.22-591/include/linux/nfs_mount.h --- linux-2.6.22-570/include/linux/nfs_mount.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/nfs_mount.h 2007-12-21 15:36:12.000000000 -0500 @@ -62,6 +62,7 @@ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ +#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_TAGGED 0x8000 /* context tagging */ #define NFS_MOUNT_FLAGMASK 0xFFFF diff -Nurb linux-2.6.22-570/include/linux/nfs_page.h linux-2.6.22-591/include/linux/nfs_page.h --- linux-2.6.22-570/include/linux/nfs_page.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/nfs_page.h 2007-12-21 15:36:12.000000000 -0500 @@ -16,12 +16,13 @@ #include #include -#include +#include /* * Valid flags for the radix tree */ -#define NFS_PAGE_TAG_WRITEBACK 0 +#define NFS_PAGE_TAG_LOCKED 0 +#define NFS_PAGE_TAG_COMMIT 1 /* * Valid flags for a dirty buffer @@ -33,8 +34,7 @@ struct nfs_inode; struct nfs_page { - struct list_head wb_list, /* Defines state of page: */ - *wb_list_head; /* read/write/commit */ + struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ @@ -42,7 +42,7 @@ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ - atomic_t wb_count; /* reference count */ + struct kref wb_kref; /* reference count */ unsigned long wb_flags; struct nfs_writeverf wb_verf; /* Commit cookie */ }; @@ -71,8 +71,8 @@ extern void nfs_release_request(struct nfs_page *req); -extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, - pgoff_t idx_start, unsigned int npages); +extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, + pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), @@ -84,12 +84,11 @@ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern int nfs_set_page_writeback_locked(struct nfs_page *req); -extern void nfs_clear_page_writeback(struct nfs_page *req); +extern void nfs_clear_page_tag_locked(struct nfs_page *req); /* - * Lock the page of an asynchronous request without incrementing the wb_count + * Lock the page of an asynchronous request without getting a new reference */ static inline int nfs_lock_request_dontget(struct nfs_page *req) @@ -98,14 +97,14 @@ } /* - * Lock the page of an asynchronous request + * Lock the page of an asynchronous request and take a reference */ static inline int nfs_lock_request(struct nfs_page *req) { if (test_and_set_bit(PG_BUSY, &req->wb_flags)) return 0; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 1; } @@ -118,7 +117,6 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) { list_add_tail(&req->wb_list, head); - req->wb_list_head = head; } @@ -132,7 +130,6 @@ if (list_empty(&req->wb_list)) return; list_del_init(&req->wb_list); - req->wb_list_head = NULL; } static inline struct nfs_page * diff -Nurb linux-2.6.22-570/include/linux/nfs_xdr.h linux-2.6.22-591/include/linux/nfs_xdr.h --- linux-2.6.22-570/include/linux/nfs_xdr.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/nfs_xdr.h 2007-12-21 15:36:12.000000000 -0500 @@ -144,6 +144,7 @@ nfs4_stateid delegation; __u32 do_recall; __u64 maxsize; + __u32 attrset[NFS4_BITMAP_SIZE]; }; /* diff -Nurb linux-2.6.22-570/include/linux/nsproxy.h linux-2.6.22-591/include/linux/nsproxy.h --- linux-2.6.22-570/include/linux/nsproxy.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/nsproxy.h 2007-12-21 15:36:14.000000000 -0500 @@ -10,6 +10,12 @@ struct ipc_namespace; struct pid_namespace; +#ifdef CONFIG_CONTAINER_NS +int ns_container_clone(struct task_struct *tsk); +#else +static inline int ns_container_clone(struct task_struct *tsk) { return 0; } +#endif + /* * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. @@ -29,10 +35,12 @@ struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; + struct user_namespace *user_ns; + struct net *net_ns; }; extern struct nsproxy init_nsproxy; -int copy_namespaces(int flags, struct task_struct *tsk); +int copy_namespaces(unsigned long flags, struct task_struct *tsk); struct nsproxy *copy_nsproxy(struct nsproxy *orig); void get_task_namespaces(struct task_struct *tsk); void free_nsproxy(struct nsproxy *ns); diff -Nurb linux-2.6.22-570/include/linux/pageblock-flags.h linux-2.6.22-591/include/linux/pageblock-flags.h --- linux-2.6.22-570/include/linux/pageblock-flags.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/pageblock-flags.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,52 @@ +/* + * Macros for manipulating and testing flags related to a + * MAX_ORDER_NR_PAGES block of pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Original author, Mel Gorman + * Major cleanups and reduction of bit operations, Andy Whitcroft + */ +#ifndef PAGEBLOCK_FLAGS_H +#define PAGEBLOCK_FLAGS_H + +#include + +/* Macro to aid the definition of ranges of bits */ +#define PB_range(name, required_bits) \ + name, name ## _end = (name + required_bits) - 1 + +/* Bit indices that affect a whole block of pages */ +enum pageblock_bits { + PB_range(PB_migrate, 2), /* 2 bits required for migrate types */ + NR_PAGEBLOCK_BITS +}; + +/* Forward declaration */ +struct page; + +/* Declarations for getting and setting flags. See mm/page_alloc.c */ +unsigned long get_pageblock_flags_group(struct page *page, + int start_bitidx, int end_bitidx); +void set_pageblock_flags_group(struct page *page, unsigned long flags, + int start_bitidx, int end_bitidx); + +#define get_pageblock_flags(page) \ + get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) +#define set_pageblock_flags(page) \ + set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) + +#endif /* PAGEBLOCK_FLAGS_H */ diff -Nurb linux-2.6.22-570/include/linux/pci_ids.h linux-2.6.22-591/include/linux/pci_ids.h --- linux-2.6.22-570/include/linux/pci_ids.h 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/include/linux/pci_ids.h 2007-12-21 15:36:12.000000000 -0500 @@ -2003,6 +2003,7 @@ #define PCI_VENDOR_ID_ENE 0x1524 #define PCI_DEVICE_ID_ENE_CB712_SD 0x0550 +#define PCI_DEVICE_ID_ENE_CB712_SD_2 0x0551 #define PCI_DEVICE_ID_ENE_1211 0x1211 #define PCI_DEVICE_ID_ENE_1225 0x1225 #define PCI_DEVICE_ID_ENE_1410 0x1410 diff -Nurb linux-2.6.22-570/include/linux/pid_namespace.h linux-2.6.22-591/include/linux/pid_namespace.h --- linux-2.6.22-570/include/linux/pid_namespace.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/pid_namespace.h 2007-12-21 15:36:12.000000000 -0500 @@ -29,7 +29,7 @@ kref_get(&ns->kref); } -extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns); +extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); extern void free_pid_ns(struct kref *kref); static inline void put_pid_ns(struct pid_namespace *ns) diff -Nurb linux-2.6.22-570/include/linux/pnp.h linux-2.6.22-591/include/linux/pnp.h --- linux-2.6.22-570/include/linux/pnp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/pnp.h 2007-12-21 15:36:12.000000000 -0500 @@ -335,6 +335,10 @@ int (*set)(struct pnp_dev *dev, struct pnp_resource_table *res); int (*disable)(struct pnp_dev *dev); + /* protocol specific suspend/resume */ + int (*suspend)(struct pnp_dev *dev, pm_message_t state); + int (*resume)(struct pnp_dev *dev); + /* used by pnp layer only (look but don't touch) */ unsigned char number; /* protocol number*/ struct device dev; /* link to driver model */ diff -Nurb linux-2.6.22-570/include/linux/prctl.h linux-2.6.22-591/include/linux/prctl.h --- linux-2.6.22-570/include/linux/prctl.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/prctl.h 2007-12-21 15:36:12.000000000 -0500 @@ -59,4 +59,8 @@ # define PR_ENDIAN_LITTLE 1 /* True little endian mode */ # define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ +/* Get/set process seccomp mode */ +#define PR_GET_SECCOMP 21 +#define PR_SET_SECCOMP 22 + #endif /* _LINUX_PRCTL_H */ diff -Nurb linux-2.6.22-570/include/linux/proc_fs.h linux-2.6.22-591/include/linux/proc_fs.h --- linux-2.6.22-570/include/linux/proc_fs.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/proc_fs.h 2007-12-21 15:36:14.000000000 -0500 @@ -86,8 +86,6 @@ extern struct proc_dir_entry proc_root; extern struct proc_dir_entry *proc_root_fs; -extern struct proc_dir_entry *proc_net; -extern struct proc_dir_entry *proc_net_stat; extern struct proc_dir_entry *proc_bus; extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; @@ -105,7 +103,6 @@ unsigned long task_vsize(struct mm_struct *); int task_statm(struct mm_struct *, int *, int *, int *, int *); char *task_mem(struct mm_struct *, char *); -void clear_refs_smap(struct mm_struct *mm); struct proc_dir_entry *de_get(struct proc_dir_entry *de); void de_put(struct proc_dir_entry *de); @@ -113,6 +110,10 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); +static inline void remove_proc_pde(struct proc_dir_entry *pde) +{ + return remove_proc_entry(pde->name, pde->parent); +} extern struct vfsmount *proc_mnt; extern int proc_fill_super(struct super_block *,void *,int); @@ -182,42 +183,18 @@ return res; } -static inline struct proc_dir_entry *proc_net_create(const char *name, - mode_t mode, get_info_t *get_info) -{ - return create_proc_info_entry(name,mode,proc_net,get_info); -} - -static inline struct proc_dir_entry *proc_net_fops_create(const char *name, - mode_t mode, const struct file_operations *fops) -{ - struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net); - if (res) - res->proc_fops = fops; - return res; -} - -static inline void proc_net_remove(const char *name) -{ - remove_proc_entry(name,proc_net); -} - #else #define proc_root_driver NULL -#define proc_net NULL #define proc_bus NULL -#define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; }) -#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) -static inline void proc_net_remove(const char *name) {} - static inline void proc_flush_task(struct task_struct *task) { } static inline struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { return NULL; } #define remove_proc_entry(name, parent) do {} while (0) +#define remove_proc_pde(PDE) do {} while (0) static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} diff -Nurb linux-2.6.22-570/include/linux/raid/raid5.h linux-2.6.22-591/include/linux/raid/raid5.h --- linux-2.6.22-570/include/linux/raid/raid5.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/raid/raid5.h 2007-12-21 15:36:12.000000000 -0500 @@ -116,13 +116,46 @@ * attach a request to an active stripe (add_stripe_bh()) * lockdev attach-buffer unlockdev * handle a stripe (handle_stripe()) - * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io + * lockstripe clrSTRIPE_HANDLE ... + * (lockdev check-buffers unlockdev) .. + * change-state .. + * record io/ops needed unlockstripe schedule io/ops * release an active stripe (release_stripe()) * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev * * The refcount counts each thread that have activated the stripe, * plus raid5d if it is handling it, plus one for each active request - * on a cached buffer. + * on a cached buffer, and plus one if the stripe is undergoing stripe + * operations. + * + * Stripe operations are performed outside the stripe lock, + * the stripe operations are: + * -copying data between the stripe cache and user application buffers + * -computing blocks to save a disk access, or to recover a missing block + * -updating the parity on a write operation (reconstruct write and + * read-modify-write) + * -checking parity correctness + * -running i/o to disk + * These operations are carried out by raid5_run_ops which uses the async_tx + * api to (optionally) offload operations to dedicated hardware engines. + * When requesting an operation handle_stripe sets the pending bit for the + * operation and increments the count. raid5_run_ops is then run whenever + * the count is non-zero. + * There are some critical dependencies between the operations that prevent some + * from being requested while another is in flight. + * 1/ Parity check operations destroy the in cache version of the parity block, + * so we prevent parity dependent operations like writes and compute_blocks + * from starting while a check is in progress. Some dma engines can perform + * the check without damaging the parity block, in these cases the parity + * block is re-marked up to date (assuming the check was successful) and is + * not re-read from disk. + * 2/ When a write operation is requested we immediately lock the affected + * blocks, and mark them as not up to date. This causes new read requests + * to be held off, as well as parity checks and compute block operations. + * 3/ Once a compute block operation has been requested handle_stripe treats + * that block as if it is up to date. raid5_run_ops guaruntees that any + * operation that is dependent on the compute block result is initiated after + * the compute block completes. */ struct stripe_head { @@ -136,15 +169,46 @@ spinlock_t lock; int bm_seq; /* sequence number for bitmap flushes */ int disks; /* disks in stripe */ + /* stripe_operations + * @pending - pending ops flags (set for request->issue->complete) + * @ack - submitted ops flags (set for issue->complete) + * @complete - completed ops flags (set for complete) + * @target - STRIPE_OP_COMPUTE_BLK target + * @count - raid5_runs_ops is set to run when this is non-zero + */ + struct stripe_operations { + unsigned long pending; + unsigned long ack; + unsigned long complete; + int target; + int count; + u32 zero_sum_result; + } ops; struct r5dev { struct bio req; struct bio_vec vec; struct page *page; - struct bio *toread, *towrite, *written; + struct bio *toread, *read, *towrite, *written; sector_t sector; /* sector of this page */ unsigned long flags; } dev[1]; /* allocated with extra space depending of RAID geometry */ }; + +/* stripe_head_state - collects and tracks the dynamic state of a stripe_head + * for handle_stripe. It is only valid under spin_lock(sh->lock); + */ +struct stripe_head_state { + int syncing, expanding, expanded; + int locked, uptodate, to_read, to_write, failed, written; + int to_fill, compute, req_compute, non_overwrite, dirty; + int failed_num; +}; + +/* r6_state - extra state data only relevant to r6 */ +struct r6_state { + int p_failed, q_failed, qd_idx, failed_num[2]; +}; + /* Flags */ #define R5_UPTODATE 0 /* page contains current data */ #define R5_LOCKED 1 /* IO has been submitted on "req" */ @@ -158,6 +222,15 @@ #define R5_ReWrite 9 /* have tried to over-write the readerror */ #define R5_Expanded 10 /* This block now has post-expand data */ +#define R5_Wantcompute 11 /* compute_block in progress treat as + * uptodate + */ +#define R5_Wantfill 12 /* dev->toread contains a bio that needs + * filling + */ +#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from + * other "towrites" + */ /* * Write method */ @@ -180,6 +253,24 @@ #define STRIPE_EXPAND_SOURCE 10 #define STRIPE_EXPAND_READY 11 /* + * Operations flags (in issue order) + */ +#define STRIPE_OP_BIOFILL 0 +#define STRIPE_OP_COMPUTE_BLK 1 +#define STRIPE_OP_PREXOR 2 +#define STRIPE_OP_BIODRAIN 3 +#define STRIPE_OP_POSTXOR 4 +#define STRIPE_OP_CHECK 5 +#define STRIPE_OP_IO 6 + +/* modifiers to the base operations + * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back + * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check + */ +#define STRIPE_OP_MOD_REPAIR_PD 7 +#define STRIPE_OP_MOD_DMA_CHECK 8 + +/* * Plugging: * * To improve write throughput, we need to delay the handling of some diff -Nurb linux-2.6.22-570/include/linux/raid/xor.h linux-2.6.22-591/include/linux/raid/xor.h --- linux-2.6.22-570/include/linux/raid/xor.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/raid/xor.h 2007-12-21 15:36:12.000000000 -0500 @@ -3,9 +3,10 @@ #include -#define MAX_XOR_BLOCKS 5 +#define MAX_XOR_BLOCKS 4 -extern void xor_block(unsigned int count, unsigned int bytes, void **ptr); +extern void xor_blocks(unsigned int count, unsigned int bytes, + void *dest, void **srcs); struct xor_block_template { struct xor_block_template *next; diff -Nurb linux-2.6.22-570/include/linux/reboot.h linux-2.6.22-591/include/linux/reboot.h --- linux-2.6.22-570/include/linux/reboot.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/reboot.h 2007-12-21 15:36:12.000000000 -0500 @@ -67,6 +67,11 @@ void ctrl_alt_del(void); +#define POWEROFF_CMD_PATH_LEN 256 +extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; + +extern int orderly_poweroff(bool force); + /* * Emergency restart, callable from an interrupt handler. */ diff -Nurb linux-2.6.22-570/include/linux/revoked_fs_i.h linux-2.6.22-591/include/linux/revoked_fs_i.h --- linux-2.6.22-570/include/linux/revoked_fs_i.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/revoked_fs_i.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,18 @@ +#ifndef _LINUX_REVOKED_FS_I_H +#define _LINUX_REVOKED_FS_I_H + +struct revokefs_inode_info { + struct task_struct *owner; + struct file *file; + unsigned int fd; + struct inode vfs_inode; +}; + +static inline struct revokefs_inode_info *revokefs_i(struct inode *inode) +{ + return container_of(inode, struct revokefs_inode_info, vfs_inode); +} + +void make_revoked_inode(struct inode *, int); + +#endif diff -Nurb linux-2.6.22-570/include/linux/rtnetlink.h linux-2.6.22-591/include/linux/rtnetlink.h --- linux-2.6.22-570/include/linux/rtnetlink.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/rtnetlink.h 2007-12-21 15:36:14.000000000 -0500 @@ -261,7 +261,7 @@ RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, - RTA_MP_ALGO, + RTA_MP_ALGO, /* no longer used */ RTA_TABLE, __RTA_MAX }; @@ -570,15 +570,21 @@ } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); +extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) -extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); -extern int rtnl_unicast(struct sk_buff *skb, u32 pid); -extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +#define rtattr_parse_nested_compat(tb, max, rta, data, len) \ +({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ + __rtattr_parse_nested_compat(tb, max, rta, len); }) + +extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); +extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags); -extern void rtnl_set_sk_err(u32 group, int error); +extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, u32 ts, u32 tsage, long expires, @@ -638,6 +644,18 @@ ({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ (skb)->len; }) +#define RTA_NEST_COMPAT(skb, type, attrlen, data) \ +({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \ + RTA_PUT(skb, type, attrlen, data); \ + RTA_NEST(skb, type); \ + __start; }) + +#define RTA_NEST_COMPAT_END(skb, start) \ +({ struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \ + (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ + RTA_NEST_END(skb, __nest); \ + (skb)->len; }) + #define RTA_NEST_CANCEL(skb, start) \ ({ if (start) \ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ diff -Nurb linux-2.6.22-570/include/linux/sched.h linux-2.6.22-591/include/linux/sched.h --- linux-2.6.22-570/include/linux/sched.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/sched.h 2007-12-21 15:36:14.000000000 -0500 @@ -26,7 +26,9 @@ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ +#define CLONE_NEWUSER 0x20000000 /* New user namespace */ #define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */ +#define CLONE_NEWNET 0x40000000 /* New network namespace */ /* * Scheduling policies @@ -266,6 +268,7 @@ asmlinkage void schedule(void); struct nsproxy; +struct user_namespace; /* Maximum number of active map areas.. This is a random (large) number */ #define DEFAULT_MAX_MAP_COUNT 65536 @@ -325,6 +328,27 @@ (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +extern void set_dumpable(struct mm_struct *mm, int value); +extern int get_dumpable(struct mm_struct *mm); + +/* mm flags */ +/* dumpable bits */ +#define MMF_DUMPABLE 0 /* core dump is permitted */ +#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ +#define MMF_DUMPABLE_BITS 2 + +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 4 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) + struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -383,7 +407,7 @@ unsigned int token_priority; unsigned int last_interval; - unsigned char dumpable:2; + unsigned long flags; /* Must use atomic bitops to access the bits */ /* coredumping support */ int core_waiters; @@ -757,9 +781,6 @@ #endif }; -extern int partition_sched_domains(cpumask_t *partition1, - cpumask_t *partition2); - /* * Maximum cache size the migration-costs auto-tuning code will * search from: @@ -770,8 +791,6 @@ struct io_context; /* See blkdev.h */ -struct cpuset; - #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) struct group_info { @@ -912,7 +931,7 @@ unsigned int rt_priority; cputime_t utime, stime; unsigned long nvcsw, nivcsw; /* context switch counts */ - struct timespec start_time; + struct timespec start_time, real_start_time; /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; @@ -1067,11 +1086,16 @@ short il_next; #endif #ifdef CONFIG_CPUSETS - struct cpuset *cpuset; nodemask_t mems_allowed; int cpuset_mems_generation; int cpuset_mem_spread_rotor; #endif +#ifdef CONFIG_CONTAINERS + /* Container info protected by css_group_lock */ + struct css_group *containers; + /* cg_list protected by css_group_lock and tsk->alloc_lock */ + struct list_head cg_list; +#endif struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; @@ -1514,7 +1538,8 @@ /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset. + * pins the final release of task.io_context. Also protects ->cpuset and + * ->container.subsys[]. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff -Nurb linux-2.6.22-570/include/linux/seccomp.h linux-2.6.22-591/include/linux/seccomp.h --- linux-2.6.22-570/include/linux/seccomp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/seccomp.h 2007-12-21 15:36:12.000000000 -0500 @@ -4,8 +4,6 @@ #ifdef CONFIG_SECCOMP -#define NR_SECCOMP_MODES 1 - #include #include @@ -23,6 +21,9 @@ return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); } +extern long prctl_get_seccomp(void); +extern long prctl_set_seccomp(unsigned long); + #else /* CONFIG_SECCOMP */ typedef struct { } seccomp_t; @@ -34,6 +35,16 @@ return 0; } +static inline long prctl_get_seccomp(void) +{ + return -EINVAL; +} + +static inline long prctl_set_seccomp(unsigned long arg2) +{ + return -EINVAL; +} + #endif /* CONFIG_SECCOMP */ #endif /* _LINUX_SECCOMP_H */ diff -Nurb linux-2.6.22-570/include/linux/security.h linux-2.6.22-591/include/linux/security.h --- linux-2.6.22-570/include/linux/security.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/security.h 2007-12-21 15:36:12.000000000 -0500 @@ -71,6 +71,7 @@ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); +extern unsigned long mmap_min_addr; /* * Values used in the task_security_ops calls */ @@ -1241,8 +1242,9 @@ int (*file_ioctl) (struct file * file, unsigned int cmd, unsigned long arg); int (*file_mmap) (struct file * file, - unsigned long reqprot, - unsigned long prot, unsigned long flags); + unsigned long reqprot, unsigned long prot, + unsigned long flags, unsigned long addr, + unsigned long addr_only); int (*file_mprotect) (struct vm_area_struct * vma, unsigned long reqprot, unsigned long prot); @@ -1814,9 +1816,12 @@ static inline int security_file_mmap (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags) + unsigned long flags, + unsigned long addr, + unsigned long addr_only) { - return security_ops->file_mmap (file, reqprot, prot, flags); + return security_ops->file_mmap (file, reqprot, prot, flags, addr, + addr_only); } static inline int security_file_mprotect (struct vm_area_struct *vma, @@ -2489,7 +2494,9 @@ static inline int security_file_mmap (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags) + unsigned long flags, + unsigned long addr, + unsigned long addr_only) { return 0; } diff -Nurb linux-2.6.22-570/include/linux/serial_8250.h linux-2.6.22-591/include/linux/serial_8250.h --- linux-2.6.22-570/include/linux/serial_8250.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/serial_8250.h 2007-12-21 15:36:12.000000000 -0500 @@ -57,6 +57,7 @@ int serial8250_register_port(struct uart_port *); void serial8250_unregister_port(int line); +void serial8250_unregister_by_port(struct uart_port *port); void serial8250_suspend_port(int line); void serial8250_resume_port(int line); diff -Nurb linux-2.6.22-570/include/linux/signal.h linux-2.6.22-591/include/linux/signal.h --- linux-2.6.22-570/include/linux/signal.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/signal.h 2007-12-21 15:36:12.000000000 -0500 @@ -238,12 +238,15 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); +extern int show_unhandled_signals; struct pt_regs; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); extern struct kmem_cache *sighand_cachep; +int unhandled_signal(struct task_struct *tsk, int sig); + /* * In POSIX a signal is sent either to a specific thread (Linux task) * or to the process as a whole (Linux thread group). How the signal diff -Nurb linux-2.6.22-570/include/linux/skbuff.h linux-2.6.22-591/include/linux/skbuff.h --- linux-2.6.22-570/include/linux/skbuff.h 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/include/linux/skbuff.h 2007-12-21 15:36:12.000000000 -0500 @@ -147,8 +147,8 @@ /* We divide dataref into two halves. The higher 16 bits hold references * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. It is up to the users of the skb to agree on - * where the payload starts. + * the entire skb->data. A clone of a headerless skb holds the length of + * the header in skb->hdr_len. * * All users must obey the rule that the skb->data reference count must be * greater than or equal to the payload reference count. @@ -206,6 +206,7 @@ * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header + * @hdr_len: writable header length of cloned skb * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored @@ -260,8 +261,9 @@ char cb[48]; unsigned int len, - data_len, - mac_len; + data_len; + __u16 mac_len, + hdr_len; union { __wsum csum; struct { @@ -1323,6 +1325,20 @@ } /** + * skb_clone_writable - is the header of a clone writable + * @skb: buffer to check + * @len: length up to which to write + * + * Returns true if modifying the header part of the cloned buffer + * does not requires the data to be copied. + */ +static inline int skb_clone_writable(struct sk_buff *skb, int len) +{ + return !skb_header_cloned(skb) && + skb_headroom(skb) + len <= skb->hdr_len; +} + +/** * skb_cow - copy header of skb when it is required * @skb: buffer to cow * @headroom: needed headroom diff -Nurb linux-2.6.22-570/include/linux/slab.h linux-2.6.22-591/include/linux/slab.h --- linux-2.6.22-570/include/linux/slab.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/slab.h 2007-12-21 15:36:12.000000000 -0500 @@ -26,12 +26,14 @@ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ +/* The following flags affect the page allocator grouping pages by mobility */ +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ +#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* * struct kmem_cache related prototypes */ diff -Nurb linux-2.6.22-570/include/linux/socket.h linux-2.6.22-591/include/linux/socket.h --- linux-2.6.22-570/include/linux/socket.h 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/include/linux/socket.h 2007-12-21 15:36:14.000000000 -0500 @@ -24,7 +24,6 @@ #include /* pid_t */ #include /* __user */ -extern int sysctl_somaxconn; #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); diff -Nurb linux-2.6.22-570/include/linux/string.h linux-2.6.22-591/include/linux/string.h --- linux-2.6.22-570/include/linux/string.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/string.h 2007-12-21 15:36:12.000000000 -0500 @@ -105,8 +105,12 @@ #endif extern char *kstrdup(const char *s, gfp_t gfp); +extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern char **argv_split(gfp_t gfp, const char *str, int *argcp); +extern void argv_free(char **argv); + #ifdef __cplusplus } #endif diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth.h linux-2.6.22-591/include/linux/sunrpc/auth.h --- linux-2.6.22-570/include/linux/sunrpc/auth.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/sunrpc/auth.h 2007-12-21 15:36:12.000000000 -0500 @@ -16,6 +16,7 @@ #include #include +#include /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 @@ -31,22 +32,28 @@ /* * Client user credentials */ +struct rpc_auth; +struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ - struct rpc_credops * cr_ops; - unsigned long cr_expire; /* when to gc */ - atomic_t cr_count; /* ref count */ - unsigned short cr_flags; /* various flags */ + struct list_head cr_lru; /* lru garbage collection */ + struct rcu_head cr_rcu; + struct rpc_auth * cr_auth; + const struct rpc_credops *cr_ops; #ifdef RPC_DEBUG unsigned long cr_magic; /* 0x0f4aa4f0 */ #endif + unsigned long cr_expire; /* when to gc */ + unsigned long cr_flags; /* various flags */ + atomic_t cr_count; /* ref count */ uid_t cr_uid; /* per-flavor data */ }; -#define RPCAUTH_CRED_NEW 0x0001 -#define RPCAUTH_CRED_UPTODATE 0x0002 +#define RPCAUTH_CRED_NEW 0 +#define RPCAUTH_CRED_UPTODATE 1 +#define RPCAUTH_CRED_HASHED 2 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 @@ -57,10 +64,10 @@ #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; - unsigned long nextgc; /* next garbage collection */ - unsigned long expire; /* cache expiry interval */ + spinlock_t lock; }; +struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ /* guess at number of u32's auth adds before @@ -70,7 +77,7 @@ unsigned int au_verfsize; unsigned int au_flags; /* various flags */ - struct rpc_authops * au_ops; /* operations */ + const struct rpc_authops *au_ops; /* operations */ rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss @@ -116,17 +123,19 @@ void *, __be32 *, void *); }; -extern struct rpc_authops authunix_ops; -extern struct rpc_authops authnull_ops; -#ifdef CONFIG_SUNRPC_SECURE -extern struct rpc_authops authdes_ops; -#endif +extern const struct rpc_authops authunix_ops; +extern const struct rpc_authops authnull_ops; + +void __init rpc_init_authunix(void); +void __init rpcauth_init_module(void); +void __exit rpcauth_remove_module(void); -int rpcauth_register(struct rpc_authops *); -int rpcauth_unregister(struct rpc_authops *); +int rpcauth_register(const struct rpc_authops *); +int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); -void rpcauth_destroy(struct rpc_auth *); +void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); void rpcauth_holdcred(struct rpc_task *); @@ -139,8 +148,9 @@ int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); -int rpcauth_init_credcache(struct rpc_auth *, unsigned long); -void rpcauth_free_credcache(struct rpc_auth *); +int rpcauth_init_credcache(struct rpc_auth *); +void rpcauth_destroy_credcache(struct rpc_auth *); +void rpcauth_clear_credcache(struct rpc_cred_cache *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth_gss.h linux-2.6.22-591/include/linux/sunrpc/auth_gss.h --- linux-2.6.22-570/include/linux/sunrpc/auth_gss.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/sunrpc/auth_gss.h 2007-12-21 15:36:12.000000000 -0500 @@ -85,11 +85,6 @@ struct gss_upcall_msg *gc_upcall; }; -#define gc_uid gc_base.cr_uid -#define gc_count gc_base.cr_count -#define gc_flags gc_base.cr_flags -#define gc_expire gc_base.cr_expire - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff -Nurb linux-2.6.22-570/include/linux/sunrpc/clnt.h linux-2.6.22-591/include/linux/sunrpc/clnt.h --- linux-2.6.22-570/include/linux/sunrpc/clnt.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/sunrpc/clnt.h 2007-12-21 15:36:12.000000000 -0500 @@ -24,8 +24,10 @@ * The high-level client handle */ struct rpc_clnt { - atomic_t cl_count; /* Number of clones */ - atomic_t cl_users; /* number of references */ + struct kref cl_kref; /* Number of references */ + struct list_head cl_clients; /* Global list of clients */ + struct list_head cl_tasks; /* List of tasks */ + spinlock_t cl_lock; /* spinlock */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ @@ -41,10 +43,7 @@ unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1,/* use getport() */ - cl_oneshot : 1,/* dispose after use */ - cl_dead : 1,/* abandoned */ - cl_tag : 1;/* context tagging */ + cl_autobind : 1;/* use getport() */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -111,17 +110,15 @@ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_INTR (1UL << 1) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) -#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) -#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) -#define RPC_CLNT_CREATE_NOPING (1UL << 5) -#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) +#define RPC_CLNT_CREATE_NOPING (1UL << 4) +#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); -int rpc_shutdown_client(struct rpc_clnt *); -int rpc_destroy_client(struct rpc_clnt *); +void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); @@ -133,13 +130,14 @@ void *calldata); int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags); +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, + int flags); void rpc_restart_call(struct rpc_task *); void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); -int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); diff -Nurb linux-2.6.22-570/include/linux/sunrpc/gss_api.h linux-2.6.22-591/include/linux/sunrpc/gss_api.h --- linux-2.6.22-570/include/linux/sunrpc/gss_api.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/sunrpc/gss_api.h 2007-12-21 15:36:12.000000000 -0500 @@ -77,7 +77,7 @@ struct module *gm_owner; struct xdr_netobj gm_oid; char *gm_name; - struct gss_api_ops *gm_ops; + const struct gss_api_ops *gm_ops; /* pseudoflavors supported by this mechanism: */ int gm_pf_num; struct pf_desc * gm_pfs; diff -Nurb linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h linux-2.6.22-591/include/linux/sunrpc/rpc_pipe_fs.h --- linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/sunrpc/rpc_pipe_fs.h 2007-12-21 15:36:12.000000000 -0500 @@ -23,9 +23,11 @@ void *private; struct list_head pipe; struct list_head in_upcall; + struct list_head in_downcall; int pipelen; int nreaders; int nwriters; + int nkern_readwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; diff -Nurb linux-2.6.22-570/include/linux/sunrpc/sched.h linux-2.6.22-591/include/linux/sunrpc/sched.h --- linux-2.6.22-570/include/linux/sunrpc/sched.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/sunrpc/sched.h 2007-12-21 15:36:12.000000000 -0500 @@ -110,11 +110,6 @@ if (!list_empty(head) && \ ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) -/* .. and walking list of all tasks */ -#define alltask_for_each(task, pos, head) \ - list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_task)),1) - typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { diff -Nurb linux-2.6.22-570/include/linux/syscalls.h linux-2.6.22-591/include/linux/syscalls.h --- linux-2.6.22-570/include/linux/syscalls.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/syscalls.h 2007-12-21 15:36:12.000000000 -0500 @@ -110,6 +110,9 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data); asmlinkage long sys_personality(u_long personality); +asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, + loff_t offset, loff_t nbytes); + asmlinkage long sys_sigpending(old_sigset_t __user *set); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, @@ -612,7 +615,11 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags, const struct itimerspec __user *utmr); asmlinkage long sys_eventfd(unsigned int count); +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +asmlinkage long sys_revokeat(int dfd, const char __user *filename); +asmlinkage long sys_frevoke(unsigned int fd); + #endif diff -Nurb linux-2.6.22-570/include/linux/sysctl.h linux-2.6.22-591/include/linux/sysctl.h --- linux-2.6.22-570/include/linux/sysctl.h 2007-12-21 15:36:02.000000000 -0500 +++ linux-2.6.22-591/include/linux/sysctl.h 2007-12-21 15:36:14.000000000 -0500 @@ -31,6 +31,7 @@ struct file; struct completion; +struct net; #define CTL_MAXNAME 10 /* how many path components do we allow in a call to sysctl? In other words, what is @@ -166,6 +167,7 @@ KERN_MAX_LOCK_DEPTH=74, KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ + KERN_POWEROFF_CMD=77, /* string: poweroff command line */ }; @@ -208,6 +210,7 @@ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ + VM_HUGETLB_TREAT_MOVABLE=36, /* Allocate hugepages from ZONE_MOVABLE */ /* s390 vm cmm sysctls */ VM_CMM_PAGES=1111, @@ -843,6 +846,9 @@ }; /* CTL_DEBUG names: */ +enum { + DEBUG_UNHANDLED_SIGNALS = 1, +}; /* CTL_DEV names: */ enum { @@ -980,6 +986,7 @@ void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); +extern ctl_handler sysctl_data; extern ctl_handler sysctl_string; extern ctl_handler sysctl_intvec; extern ctl_handler sysctl_jiffies; @@ -1056,6 +1063,12 @@ void unregister_sysctl_table(struct ctl_table_header * table); +#ifdef CONFIG_NET +extern struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table); +extern void unregister_net_sysctl_table(struct ctl_table_header *header); +extern ctl_table net_root_table[]; +#endif + #else /* __KERNEL__ */ #endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-570/include/linux/sysdev.h linux-2.6.22-591/include/linux/sysdev.h --- linux-2.6.22-570/include/linux/sysdev.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/sysdev.h 2007-12-21 15:36:12.000000000 -0500 @@ -101,8 +101,7 @@ #define _SYSDEV_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, \ - .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff -Nurb linux-2.6.22-570/include/linux/sysfs.h linux-2.6.22-591/include/linux/sysfs.h --- linux-2.6.22-570/include/linux/sysfs.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/sysfs.h 2007-12-21 15:36:14.000000000 -0500 @@ -19,9 +19,11 @@ struct kobject; struct module; -struct nameidata; -struct dentry; +/* FIXME + * The *owner field is no longer used, but leave around + * until the tree gets cleaned up fully. + */ struct attribute { const char * name; struct module * owner; @@ -41,13 +43,13 @@ */ #define __ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } #define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ .show = _name##_show, \ } @@ -61,8 +63,10 @@ struct attribute attr; size_t size; void *private; - ssize_t (*read)(struct kobject *, char *, loff_t, size_t); - ssize_t (*write)(struct kobject *, char *, loff_t, size_t); + ssize_t (*read)(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); + ssize_t (*write)(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); int (*mmap)(struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; @@ -72,12 +76,23 @@ ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); }; +struct shadow_dir_operations { + const void *(*current_tag)(void); + const void *(*kobject_tag)(struct kobject *kobj); +}; + +#define SYSFS_TYPE_MASK 0x00ff #define SYSFS_ROOT 0x0001 #define SYSFS_DIR 0x0002 #define SYSFS_KOBJ_ATTR 0x0004 #define SYSFS_KOBJ_BIN_ATTR 0x0008 #define SYSFS_KOBJ_LINK 0x0020 -#define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK) +#define SYSFS_SHADOW_DIR 0x0040 +#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) + +#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK +#define SYSFS_FLAG_REMOVED 0x0100 +#define SYSFS_FLAG_SHADOWED 0x0200 #ifdef CONFIG_SYSFS @@ -85,13 +100,13 @@ void (*func)(void *), void *data, struct module *owner); extern int __must_check -sysfs_create_dir(struct kobject *, struct dentry *); +sysfs_create_dir(struct kobject *); extern void sysfs_remove_dir(struct kobject *); extern int __must_check -sysfs_rename_dir(struct kobject *, struct dentry *, const char *new_name); +sysfs_rename_dir(struct kobject *kobj, const char *new_name); extern int __must_check sysfs_move_dir(struct kobject *, struct kobject *); @@ -114,6 +129,13 @@ extern void sysfs_remove_link(struct kobject *, const char * name); +extern int +sysfs_rename_link(struct kobject *kobj, struct kobject *target, + const char *old_name, const char *new_name); + +extern void +sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); + int __must_check sysfs_create_bin_file(struct kobject *kobj, struct bin_attribute *attr); void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); @@ -128,11 +150,7 @@ void sysfs_notify(struct kobject * k, char *dir, char *attr); - -extern int sysfs_make_shadowed_dir(struct kobject *kobj, - void * (*follow_link)(struct dentry *, struct nameidata *)); -extern struct dentry *sysfs_create_shadow_dir(struct kobject *kobj); -extern void sysfs_remove_shadow_dir(struct dentry *dir); +int sysfs_enable_shadowing(struct kobject *, const struct shadow_dir_operations *); extern int __must_check sysfs_init(void); @@ -144,7 +162,7 @@ return -ENOSYS; } -static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow) +static inline int sysfs_create_dir(struct kobject * kobj) { return 0; } @@ -154,9 +172,7 @@ ; } -static inline int sysfs_rename_dir(struct kobject * k, - struct dentry *new_parent, - const char *new_name) +static inline int sysfs_rename_dir(struct kobject * kobj, const char *new_name) { return 0; } @@ -195,6 +211,17 @@ ; } +static inline int +sysfs_rename_link(struct kobject * k, struct kobject *t, + const char *old_name, const char * new_name) +{ + return 0; +} + +static inline void +sysfs_delete_link(struct kobject *k, struct kobject *t, const char *name) +{ +} static inline int sysfs_create_bin_file(struct kobject * k, struct bin_attribute * a) { @@ -231,8 +258,8 @@ { } -static inline int sysfs_make_shadowed_dir(struct kobject *kobj, - void * (*follow_link)(struct dentry *, struct nameidata *)) +static inline int sysfs_enable_shadowing(struct kobject *kobj, + const struct shadow_dir_operations *shadow_ops) { return 0; } diff -Nurb linux-2.6.22-570/include/linux/taskstats.h linux-2.6.22-591/include/linux/taskstats.h --- linux-2.6.22-570/include/linux/taskstats.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/taskstats.h 2007-12-21 15:36:12.000000000 -0500 @@ -31,7 +31,7 @@ */ -#define TASKSTATS_VERSION 4 +#define TASKSTATS_VERSION 5 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -149,6 +149,9 @@ __u64 read_bytes; /* bytes of read I/O */ __u64 write_bytes; /* bytes of write I/O */ __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */ + + __u64 nvcsw; /* voluntary_ctxt_switches */ + __u64 nivcsw; /* nonvoluntary_ctxt_switches */ }; diff -Nurb linux-2.6.22-570/include/linux/tick.h linux-2.6.22-591/include/linux/tick.h --- linux-2.6.22-570/include/linux/tick.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/tick.h 2007-12-21 15:36:12.000000000 -0500 @@ -40,6 +40,7 @@ * @idle_sleeps: Number of idle calls, where the sched tick was stopped * @idle_entrytime: Time when the idle call was entered * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped + * @sleep_length: Duration of the current idle sleep */ struct tick_sched { struct hrtimer sched_timer; @@ -52,6 +53,7 @@ unsigned long idle_sleeps; ktime_t idle_entrytime; ktime_t idle_sleeptime; + ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; ktime_t idle_expires; @@ -100,10 +102,18 @@ extern void tick_nohz_stop_sched_tick(void); extern void tick_nohz_restart_sched_tick(void); extern void tick_nohz_update_jiffies(void); +extern ktime_t tick_nohz_get_sleep_length(void); +extern unsigned long tick_nohz_get_idle_jiffies(void); # else static inline void tick_nohz_stop_sched_tick(void) { } static inline void tick_nohz_restart_sched_tick(void) { } static inline void tick_nohz_update_jiffies(void) { } +static inline ktime_t tick_nohz_get_sleep_length(void) +{ + ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; + + return len; +} # endif /* !NO_HZ */ #endif diff -Nurb linux-2.6.22-570/include/linux/time.h linux-2.6.22-591/include/linux/time.h --- linux-2.6.22-570/include/linux/time.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/linux/time.h 2007-12-21 15:36:14.000000000 -0500 @@ -116,6 +116,8 @@ extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); +extern void getboottime(struct timespec *ts); +extern void monotonic_to_bootbased(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_is_continuous(void); diff -Nurb linux-2.6.22-570/include/linux/union_fs.h linux-2.6.22-591/include/linux/union_fs.h --- linux-2.6.22-570/include/linux/union_fs.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/union_fs.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_UNION_FS_H +#define _LINUX_UNION_FS_H + +#define UNIONFS_VERSION "2.0" +/* + * DEFINITIONS FOR USER AND KERNEL CODE: + */ +# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int) +# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int) + +/* We don't support normal remount, but unionctl uses it. */ +# define UNIONFS_REMOUNT_MAGIC 0x4a5a4380 + +/* should be at least LAST_USED_UNIONFS_PERMISSION<<1 */ +#define MAY_NFSRO 16 + +#endif /* _LINUX_UNIONFS_H */ + diff -Nurb linux-2.6.22-570/include/linux/unwind.h linux-2.6.22-591/include/linux/unwind.h --- linux-2.6.22-570/include/linux/unwind.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/unwind.h 2007-12-21 15:36:12.000000000 -0500 @@ -14,6 +14,63 @@ struct module; +#ifdef CONFIG_STACK_UNWIND + +#include + +#ifndef ARCH_UNWIND_SECTION_NAME +#define ARCH_UNWIND_SECTION_NAME ".eh_frame" +#endif + +/* + * Initialize unwind support. + */ +extern void unwind_init(void); +extern void unwind_setup(void); + +#ifdef CONFIG_MODULES + +extern void *unwind_add_table(struct module *, + const void *table_start, + unsigned long table_size); + +extern void unwind_remove_table(void *handle, int init_only); + +#endif + +extern int unwind_init_frame_info(struct unwind_frame_info *, + struct task_struct *, + /*const*/ struct pt_regs *); + +/* + * Prepare to unwind a blocked task. + */ +extern int unwind_init_blocked(struct unwind_frame_info *, + struct task_struct *); + +/* + * Prepare to unwind the currently running thread. + */ +extern int unwind_init_running(struct unwind_frame_info *, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg); + +/* + * Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. + */ +extern int unwind(struct unwind_frame_info *); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +extern int unwind_to_user(struct unwind_frame_info *); + +#else + struct unwind_frame_info {}; static inline void unwind_init(void) {} @@ -28,12 +85,12 @@ return NULL; } +#endif + static inline void unwind_remove_table(void *handle, int init_only) { } -#endif - static inline int unwind_init_frame_info(struct unwind_frame_info *info, struct task_struct *tsk, const struct pt_regs *regs) @@ -65,4 +122,6 @@ return -ENOSYS; } +#endif + #endif /* _LINUX_UNWIND_H */ diff -Nurb linux-2.6.22-570/include/linux/usb.h linux-2.6.22-591/include/linux/usb.h --- linux-2.6.22-570/include/linux/usb.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/usb.h 2007-12-21 15:36:14.000000000 -0500 @@ -146,6 +146,10 @@ * active alternate setting */ unsigned num_altsetting; /* number of alternate settings */ + /* If there is an interface association descriptor then it will list + * the associated interfaces */ + struct usb_interface_assoc_descriptor *intf_assoc; + int minor; /* minor number this interface is * bound to */ enum usb_interface_condition condition; /* state of binding */ @@ -175,6 +179,7 @@ /* this maximum is arbitrary */ #define USB_MAXINTERFACES 32 +#define USB_MAXIADS USB_MAXINTERFACES/2 /** * struct usb_interface_cache - long-term representation of a device interface @@ -245,6 +250,11 @@ struct usb_config_descriptor desc; char *string; /* iConfiguration string, if present */ + + /* List of any Interface Association Descriptors in this + * configuration. */ + struct usb_interface_assoc_descriptor *intf_assoc[USB_MAXIADS]; + /* the interfaces associated with this configuration, * stored in no particular order */ struct usb_interface *interface[USB_MAXINTERFACES]; diff -Nurb linux-2.6.22-570/include/linux/user_namespace.h linux-2.6.22-591/include/linux/user_namespace.h --- linux-2.6.22-570/include/linux/user_namespace.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/linux/user_namespace.h 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,61 @@ +#ifndef _LINUX_USER_NAMESPACE_H +#define _LINUX_USER_NAMESPACE_H + +#include +#include +#include +#include + +#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) +#define UIDHASH_SZ (1 << UIDHASH_BITS) + +struct user_namespace { + struct kref kref; + struct list_head uidhash_table[UIDHASH_SZ]; + struct user_struct *root_user; +}; + +extern struct user_namespace init_user_ns; + +#ifdef CONFIG_USER_NS + +static inline struct user_namespace *get_user_ns(struct user_namespace *ns) +{ + if (ns) + kref_get(&ns->kref); + return ns; +} + +extern struct user_namespace *copy_user_ns(int flags, + struct user_namespace *old_ns); +extern void free_user_ns(struct kref *kref); + +static inline void put_user_ns(struct user_namespace *ns) +{ + if (ns) + kref_put(&ns->kref, free_user_ns); +} + +#else + +static inline struct user_namespace *get_user_ns(struct user_namespace *ns) +{ + return &init_user_ns; +} + +static inline struct user_namespace *copy_user_ns(int flags, + struct user_namespace *old_ns) +{ + if (flags & CLONE_NEWUSER) + return ERR_PTR(-EINVAL); + + return NULL; +} + +static inline void put_user_ns(struct user_namespace *ns) +{ +} + +#endif + +#endif /* _LINUX_USER_H */ diff -Nurb linux-2.6.22-570/include/linux/utsname.h linux-2.6.22-591/include/linux/utsname.h --- linux-2.6.22-570/include/linux/utsname.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/utsname.h 2007-12-21 15:36:12.000000000 -0500 @@ -48,26 +48,14 @@ kref_get(&ns->kref); } -#ifdef CONFIG_UTS_NS -extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns); +extern struct uts_namespace *copy_utsname(unsigned long flags, + struct uts_namespace *ns); extern void free_uts_ns(struct kref *kref); static inline void put_uts_ns(struct uts_namespace *ns) { kref_put(&ns->kref, free_uts_ns); } -#else -static inline struct uts_namespace *copy_utsname(int flags, - struct uts_namespace *ns) -{ - return ns; -} - -static inline void put_uts_ns(struct uts_namespace *ns) -{ -} -#endif - static inline struct new_utsname *utsname(void) { return ¤t->nsproxy->uts_ns->name; diff -Nurb linux-2.6.22-570/include/linux/vmalloc.h linux-2.6.22-591/include/linux/vmalloc.h --- linux-2.6.22-570/include/linux/vmalloc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/linux/vmalloc.h 2007-12-21 15:36:12.000000000 -0500 @@ -65,9 +65,10 @@ unsigned long flags, int node, gfp_t gfp_mask); extern struct vm_struct *remove_vm_area(void *addr); + extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); -extern void unmap_vm_area(struct vm_struct *area); +extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* * Internals. Dont't use.. diff -Nurb linux-2.6.22-570/include/net/addrconf.h linux-2.6.22-591/include/net/addrconf.h --- linux-2.6.22-570/include/net/addrconf.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/net/addrconf.h 2007-12-21 15:36:12.000000000 -0500 @@ -61,7 +61,7 @@ extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) extern int ipv6_chk_home_addr(struct in6_addr *addr); #endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, diff -Nurb linux-2.6.22-570/include/net/af_unix.h linux-2.6.22-591/include/net/af_unix.h --- linux-2.6.22-570/include/net/af_unix.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/net/af_unix.h 2007-12-21 15:36:14.000000000 -0500 @@ -91,12 +91,11 @@ #define unix_sk(__sk) ((struct unix_sock *)__sk) #ifdef CONFIG_SYSCTL -extern int sysctl_unix_max_dgram_qlen; -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); +extern void unix_sysctl_register(struct net *net); +extern void unix_sysctl_unregister(struct net *net); #else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} +static inline void unix_sysctl_register(struct net *net) {} +static inline void unix_sysctl_unregister(struct net *net) {} #endif #endif #endif diff -Nurb linux-2.6.22-570/include/net/arp.h linux-2.6.22-591/include/net/arp.h --- linux-2.6.22-570/include/net/arp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/arp.h 2007-12-21 15:36:14.000000000 -0500 @@ -11,7 +11,7 @@ extern void arp_init(void); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); -extern int arp_ioctl(unsigned int cmd, void __user *arg); +extern int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th); diff -Nurb linux-2.6.22-570/include/net/dst.h linux-2.6.22-591/include/net/dst.h --- linux-2.6.22-570/include/net/dst.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/dst.h 2007-12-21 15:36:12.000000000 -0500 @@ -47,7 +47,6 @@ #define DST_NOXFRM 2 #define DST_NOPOLICY 4 #define DST_NOHASH 8 -#define DST_BALANCED 0x10 unsigned long expires; unsigned short header_len; /* more space at head required */ diff -Nurb linux-2.6.22-570/include/net/fib_rules.h linux-2.6.22-591/include/net/fib_rules.h --- linux-2.6.22-570/include/net/fib_rules.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/fib_rules.h 2007-12-21 15:36:14.000000000 -0500 @@ -56,12 +56,12 @@ int (*fill)(struct fib_rule *, struct sk_buff *, struct nlmsghdr *, struct fib_rule_hdr *); - u32 (*default_pref)(void); + u32 (*default_pref)(struct fib_rules_ops *ops); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ - void (*flush_cache)(void); + void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; const struct nla_policy *policy; @@ -101,8 +101,8 @@ return frh->table; } -extern int fib_rules_register(struct fib_rules_ops *); -extern int fib_rules_unregister(struct fib_rules_ops *); +extern int fib_rules_register(struct net *net, struct fib_rules_ops *); +extern int fib_rules_unregister(struct net *net, struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, diff -Nurb linux-2.6.22-570/include/net/flow.h linux-2.6.22-591/include/net/flow.h --- linux-2.6.22-570/include/net/flow.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/flow.h 2007-12-21 15:36:14.000000000 -0500 @@ -8,9 +8,11 @@ #define _NET_FLOW_H #include +#include #include struct flowi { + struct net *fl_net; int oif; int iif; __u32 mark; @@ -67,20 +69,16 @@ __be32 spi; -#ifdef CONFIG_IPV6_MIP6 struct { __u8 type; } mht; -#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi -#ifdef CONFIG_IPV6_MIP6 #define fl_mh_type uli_u.mht.type -#endif __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff -Nurb linux-2.6.22-570/include/net/inet6_hashtables.h linux-2.6.22-591/include/net/inet6_hashtables.h --- linux-2.6.22-570/include/net/inet6_hashtables.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/inet6_hashtables.h 2007-12-21 15:36:14.000000000 -0500 @@ -62,31 +62,31 @@ const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif); + const int dif, struct net *net); extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, - const int dif); + const int dif, struct net *net); static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, struct net *net) { struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, net); if (sk) return sk; - return inet6_lookup_listener(hashinfo, daddr, hnum, dif); + return inet6_lookup_listener(hashinfo, daddr, hnum, dif, net); } extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, - const int dif); + const int dif, struct net *net); #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ #endif /* _INET6_HASHTABLES_H */ diff -Nurb linux-2.6.22-570/include/net/inet_hashtables.h linux-2.6.22-591/include/net/inet_hashtables.h --- linux-2.6.22-570/include/net/inet_hashtables.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/inet_hashtables.h 2007-12-21 15:36:14.000000000 -0500 @@ -75,6 +75,7 @@ * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { + struct net *net; unsigned short port; signed short fastreuse; struct hlist_node node; @@ -138,34 +139,35 @@ extern struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, + struct net *net, const unsigned short snum); extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); -static inline int inet_bhashfn(const __u16 lport, const int bhash_size) +static inline int inet_bhashfn(struct net *net, const __u16 lport, const int bhash_size) { - return lport & (bhash_size - 1); + return (((unsigned long)net) ^ lport) & (bhash_size - 1); } extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum); /* These can have wildcards, don't try too hard. */ -static inline int inet_lhashfn(const unsigned short num) +static inline int inet_lhashfn(struct net *net, const unsigned short num) { - return num & (INET_LHTABLE_SIZE - 1); + return (((unsigned long)net) ^ num) & (INET_LHTABLE_SIZE - 1); } static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return inet_lhashfn(inet_sk(sk)->num); + return inet_lhashfn(sk->sk_net, inet_sk(sk)->num); } /* Caller must disable local BH processing. */ static inline void __inet_inherit_port(struct inet_hashinfo *table, struct sock *sk, struct sock *child) { - const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + const int bhash = inet_bhashfn(sk->sk_net, inet_sk(child)->num, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -274,12 +276,13 @@ extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, - const int dif); + const int dif, struct net *net); static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - __be32 daddr, __be16 dport, int dif) + __be32 daddr, __be16 dport, + int dif, struct net *net) { - return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); + return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif, net); } /* Socket demux engine toys. */ @@ -313,30 +316,34 @@ (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) +#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net) \ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) +#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif, __net) \ (((__sk)->sk_hash == (__hash)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) #endif /* 64-bit arch */ /* @@ -349,7 +356,7 @@ __inet_lookup_established(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif) + const int dif, struct net *net) { INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); @@ -358,19 +365,19 @@ /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &head->twchain) { - if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net)) goto hit; } sk = NULL; @@ -386,32 +393,32 @@ inet_lookup_established(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { return __inet_lookup_established(hashinfo, saddr, sport, daddr, - ntohs(dport), dif); + ntohs(dport), dif, net); } static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { u16 hnum = ntohs(dport); struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, - hnum, dif); - return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); + hnum, dif, net); + return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif, net); } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif, net); local_bh_enable(); return sk; diff -Nurb linux-2.6.22-570/include/net/inet_sock.h linux-2.6.22-591/include/net/inet_sock.h --- linux-2.6.22-570/include/net/inet_sock.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/inet_sock.h 2007-12-21 15:36:14.000000000 -0500 @@ -171,10 +171,12 @@ extern u32 inet_ehash_secret; extern void build_ehash_secret(void); -static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport, +static inline unsigned int inet_ehashfn(struct net *net, + const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr, + return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr ^ + (__force __u32) ((unsigned long)net), ((__u32) lport) << 16 | (__force __u32)fport, inet_ehash_secret); } @@ -187,7 +189,7 @@ const __be32 faddr = inet->daddr; const __be16 fport = inet->dport; - return inet_ehashfn(laddr, lport, faddr, fport); + return inet_ehashfn(sk->sk_net, laddr, lport, faddr, fport); } #endif /* _INET_SOCK_H */ diff -Nurb linux-2.6.22-570/include/net/inet_timewait_sock.h linux-2.6.22-591/include/net/inet_timewait_sock.h --- linux-2.6.22-570/include/net/inet_timewait_sock.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/net/inet_timewait_sock.h 2007-12-21 15:36:14.000000000 -0500 @@ -115,6 +115,7 @@ #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_net __tw_common.skc_net #define tw_xid __tw_common.skc_xid #define tw_vx_info __tw_common.skc_vx_info #define tw_nid __tw_common.skc_nid diff -Nurb linux-2.6.22-570/include/net/inetpeer.h linux-2.6.22-591/include/net/inetpeer.h --- linux-2.6.22-570/include/net/inetpeer.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/inetpeer.h 2007-12-21 15:36:14.000000000 -0500 @@ -15,6 +15,8 @@ #include #include +struct net; + struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ @@ -22,7 +24,11 @@ __be32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ - struct inet_peer *unused_next, **unused_prevp; + union { + struct inet_peer *unused_next; + struct net *net; + } u; + struct inet_peer **unused_prevp; __u32 dtime; /* the time of last use of not * referenced entries */ atomic_t refcnt; @@ -34,7 +40,7 @@ void inet_initpeers(void) __init; /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(__be32 daddr, int create); +struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create); /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); diff -Nurb linux-2.6.22-570/include/net/ip.h linux-2.6.22-591/include/net/ip.h --- linux-2.6.22-570/include/net/ip.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/ip.h 2007-12-21 15:36:14.000000000 -0500 @@ -149,13 +149,6 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -struct ipv4_config -{ - int log_martians; - int no_pmtu_disc; -}; - -extern struct ipv4_config ipv4_config; DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics); #define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field) #define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field) @@ -171,27 +164,6 @@ extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign); extern void snmp_mib_free(void *ptr[2]); -extern int sysctl_local_port_range[2]; -extern int sysctl_ip_default_ttl; -extern int sysctl_ip_nonlocal_bind; - -/* From ip_fragment.c */ -extern int sysctl_ipfrag_high_thresh; -extern int sysctl_ipfrag_low_thresh; -extern int sysctl_ipfrag_time; -extern int sysctl_ipfrag_secret_interval; -extern int sysctl_ipfrag_max_dist; - -/* From inetpeer.c */ -extern int inet_peer_threshold; -extern int inet_peer_minttl; -extern int inet_peer_maxttl; -extern int inet_peer_gc_mintime; -extern int inet_peer_gc_maxtime; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - extern void ipfrag_init(void); #ifdef CONFIG_INET @@ -332,8 +304,6 @@ }; struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user); -extern int ip_frag_nqueues; -extern atomic_t ip_frag_mem; /* * Functions provided by ip_forward.c @@ -392,5 +362,6 @@ #endif extern struct ctl_table ipv4_table[]; +extern struct ctl_table multi_ipv4_table[]; #endif /* _IP_H */ diff -Nurb linux-2.6.22-570/include/net/ip_fib.h linux-2.6.22-591/include/net/ip_fib.h --- linux-2.6.22-570/include/net/ip_fib.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/ip_fib.h 2007-12-21 15:36:14.000000000 -0500 @@ -39,7 +39,6 @@ int fc_mx_len; int fc_mp_len; u32 fc_flow; - u32 fc_mp_alg; u32 fc_nlflags; struct nl_info fc_nlinfo; }; @@ -89,6 +88,7 @@ #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED u32 fib_mp_alg; #endif + struct net * fib_net; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -103,10 +103,6 @@ unsigned char nh_sel; unsigned char type; unsigned char scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - __be32 network; - __be32 netmask; -#endif struct fib_info *fi; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rule *r; @@ -145,14 +141,6 @@ #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -#define FIB_RES_NETWORK(res) ((res).network) -#define FIB_RES_NETMASK(res) ((res).netmask) -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -#define FIB_RES_NETWORK(res) (0) -#define FIB_RES_NETMASK(res) (0) -#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ - struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -171,43 +159,43 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -extern struct fib_table *ip_fib_local_table; -extern struct fib_table *ip_fib_main_table; - -static inline struct fib_table *fib_get_table(u32 id) +static inline struct fib_table *fib_get_table(struct net *net, u32 id) { if (id != RT_TABLE_LOCAL) - return ip_fib_main_table; - return ip_fib_local_table; + return net->ip_fib_main_table; + return net->ip_fib_local_table; } -static inline struct fib_table *fib_new_table(u32 id) +static inline struct fib_table *fib_new_table(struct net *net, u32 id) { - return fib_get_table(id); + return fib_get_table(net, id); } static inline int fib_lookup(const struct flowi *flp, struct fib_result *res) { - if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) && - ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res)) + struct net *net = flp->fl_net; + struct fib_table *local_table = net->ip_fib_local_table; + struct fib_table *main_table = net->ip_fib_main_table; + if (local_table->tb_lookup(local_table, flp, res) && + main_table->tb_lookup(main_table, flp, res)) return -ENETUNREACH; return 0; } static inline void fib_select_default(const struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; + struct fib_table *main_table = net->ip_fib_main_table; if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res); + main_table->tb_select_default(main_table, flp, res); } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) -#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) extern int fib_lookup(struct flowi *flp, struct fib_result *res); -extern struct fib_table *fib_new_table(u32 id); -extern struct fib_table *fib_get_table(u32 id); +extern struct fib_table *fib_new_table(struct net *net, u32 id); +extern struct fib_table *fib_get_table(struct net *net, u32 id); extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -223,15 +211,17 @@ /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); -extern int fib_sync_down(__be32 local, struct net_device *dev, int force); +extern int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(u32 id); +extern void fib_hash_exit(struct fib_table *tb); #ifdef CONFIG_IP_MULTIPLE_TABLES -extern void __init fib4_rules_init(void); +extern void fib4_rules_init(struct net * net); +extern void fib4_rules_exit(struct net * net); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); @@ -274,8 +264,11 @@ } #ifdef CONFIG_PROC_FS -extern int fib_proc_init(void); -extern void fib_proc_exit(void); +extern int fib_proc_init(struct net * net); +extern void fib_proc_exit(struct net * net); #endif +extern int fib_info_init(struct net *net); +extern void fib_info_exit(struct net *net); + #endif /* _NET_FIB_H */ diff -Nurb linux-2.6.22-570/include/net/ip_mp_alg.h linux-2.6.22-591/include/net/ip_mp_alg.h --- linux-2.6.22-570/include/net/ip_mp_alg.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/ip_mp_alg.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,96 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _NET_IP_MP_ALG_H -#define _NET_IP_MP_ALG_H - -#include -#include -#include - -struct fib_nh; - -struct ip_mp_alg_ops { - void (*mp_alg_select_route)(const struct flowi *flp, - struct rtable *rth, struct rtable **rp); - void (*mp_alg_flush)(void); - void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh); - void (*mp_alg_remove)(struct rtable *rth); -}; - -extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg); -extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg); - -extern struct ip_mp_alg_ops *ip_mp_alg_table[]; - -static inline int multipath_select_route(const struct flowi *flp, - struct rtable *rth, - struct rtable **rp) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - /* mp_alg_select_route _MUST_ be implemented */ - if (ops && (rth->u.dst.flags & DST_BALANCED)) { - ops->mp_alg_select_route(flp, rth, rp); - return 1; - } -#endif - return 0; -} - -static inline void multipath_flush(void) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - int i; - - for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) { - struct ip_mp_alg_ops *ops = ip_mp_alg_table[i]; - - if (ops && ops->mp_alg_flush) - ops->mp_alg_flush(); - } -#endif -} - -static inline void multipath_set_nhinfo(struct rtable *rth, - __be32 network, __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - if (ops && ops->mp_alg_set_nhinfo) - ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh); -#endif -} - -static inline void multipath_remove(struct rtable *rth) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; - - if (ops && ops->mp_alg_remove && - (rth->u.dst.flags & DST_BALANCED)) - ops->mp_alg_remove(rth); -#endif -} - -static inline int multipath_comparekeys(const struct flowi *flp1, - const struct flowi *flp2) -{ - return flp1->fl4_dst == flp2->fl4_dst && - flp1->fl4_src == flp2->fl4_src && - flp1->oif == flp2->oif && - flp1->mark == flp2->mark && - !((flp1->fl4_tos ^ flp2->fl4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK)); -} - -#endif /* _NET_IP_MP_ALG_H */ diff -Nurb linux-2.6.22-570/include/net/llc_conn.h linux-2.6.22-591/include/net/llc_conn.h --- linux-2.6.22-570/include/net/llc_conn.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/llc_conn.h 2007-12-21 15:36:14.000000000 -0500 @@ -93,7 +93,7 @@ return skb->cb[sizeof(skb->cb) - 1]; } -extern struct sock *llc_sk_alloc(int family, gfp_t priority, +extern struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot); extern void llc_sk_free(struct sock *sk); diff -Nurb linux-2.6.22-570/include/net/mip6.h linux-2.6.22-591/include/net/mip6.h --- linux-2.6.22-570/include/net/mip6.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/mip6.h 2007-12-21 15:36:12.000000000 -0500 @@ -54,8 +54,4 @@ #define IP6_MH_TYPE_BERROR 7 /* Binding Error */ #define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR -extern int mip6_init(void); -extern void mip6_fini(void); -extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); - #endif diff -Nurb linux-2.6.22-570/include/net/neighbour.h linux-2.6.22-591/include/net/neighbour.h --- linux-2.6.22-570/include/net/neighbour.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/neighbour.h 2007-12-21 15:36:14.000000000 -0500 @@ -34,6 +34,7 @@ struct neigh_parms { + struct net *net; struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); @@ -126,6 +127,7 @@ struct pneigh_entry { struct pneigh_entry *next; + struct net *net; struct net_device *dev; u8 flags; u8 key[0]; @@ -187,6 +189,7 @@ const void *pkey, struct net_device *dev); extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, + struct net *net, const void *pkey); extern struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey, @@ -205,21 +208,24 @@ struct net_device *dev); extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); +extern struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, struct net *net); extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); extern void neigh_parms_destroy(struct neigh_parms *parms); extern unsigned long neigh_rand_reach_time(unsigned long base); extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); -extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat); -extern int pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev); +extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat); +extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); extern void neigh_app_ns(struct neighbour *n); extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); -struct neigh_seq_state { +struct neigh_seq_state +{ + struct net *net; struct neigh_table *tbl; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); diff -Nurb linux-2.6.22-570/include/net/net_namespace.h linux-2.6.22-591/include/net/net_namespace.h --- linux-2.6.22-570/include/net/net_namespace.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/include/net/net_namespace.h 2007-12-21 15:36:14.000000000 -0500 @@ -0,0 +1,236 @@ +/* + * Operations on the network namespace + */ +#ifndef __NET_NET_NAMESPACE_H +#define __NET_NET_NAMESPACE_H + +#include +#include +#include +#include +#include +#include +#include + +struct sock; +struct xt_af_pernet; +struct ipv4_devconf; +struct neigh_parms; +struct inet_peer; +struct xt_table; +struct net { + atomic_t count; /* To decided when the network namespace + * should go + */ + atomic_t use_count; /* For references we destroy on demand */ + struct list_head list; /* list of network namespace structures */ + struct work_struct work; /* work struct for freeing */ + +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_net; + struct proc_dir_entry *proc_net_stat; + struct proc_dir_entry proc_net_root; +# ifdef CONFIG_NETFILTER + struct proc_dir_entry *proc_net_netfilter; +# endif +#endif +#ifdef CONFIG_SYSCTL + struct ctl_table_header net_table_header; +#endif + struct net_device loopback_dev; /* The loopback */ + struct list_head dev_base_head; /* All devices */ + + struct hlist_head *dev_name_head; + struct hlist_head *dev_index_head; + + struct sock * rtnl; /* rtnetlink socket */ + + + /* core netfilter */ + struct xt_af_pernet * xtn; + + /* core fib_rules */ + struct list_head rules_ops; + spinlock_t rules_mod_lock; + +#ifdef CONFIG_XFRM + u32 sysctl_xfrm_aevent_etime; + u32 sysctl_xfrm_aevent_rseqth; + int sysctl_xfrm_larval_drop; + u32 sysctl_xfrm_acq_expires; +#endif /* CONFIG_XFRM */ + + int sysctl_somaxconn; + +#ifdef CONFIG_PACKET + /* List of all packet sockets. */ + rwlock_t packet_sklist_lock; + struct hlist_head packet_sklist; +#endif /* CONFIG_PACKET */ +#ifdef CONFIG_UNIX + int sysctl_unix_max_dgram_qlen; + void * unix_sysctl; +#endif /* CONFIG_UNIX */ +#ifdef CONFIG_IP_MULTIPLE_TABLES + void * fib4_table; +#endif /* CONFIG_IP_MULTIPLE_TABLES */ +#ifdef CONFIG_IP_FIB_HASH + int fn_hash_last_dflt; +#endif +#ifdef CONFIG_IP_FIB_TRIE + int trie_last_dflt; +#endif +#ifndef CONFIG_IP_MULTIPLE_TABLES + struct fib_table *ip_fib_local_table; + struct fib_table *ip_fib_main_table; +#endif + struct hlist_head *ip_fib_table_hash; + struct sock *nlfl; + + /* fib_semantics */ + struct hlist_head *fib_info_hash; + struct hlist_head *fib_info_laddrhash; + unsigned int fib_info_hash_size; + unsigned int fib_info_cnt; + struct hlist_head *fib_info_devhash; + + /* af_inet.c */ + int sysctl_ip_nonlocal_bind; /* __read_mostly */ + int sysctl_ip_default_ttl; /* __read_mostly */ + int sysctl_ipfrag_high_thresh; + int sysctl_ipfrag_low_thresh; + int sysctl_ipfrag_time; + int sysctl_ipfrag_secret_interval; + int sysctl_ipfrag_max_dist; + int sysctl_ipv4_no_pmtu_disc; + int sysctl_local_port_range[2]; + int sysctl_ip_dynaddr; + int sysctl_tcp_timestamps; /* __read_mostly */ + int sysctl_tcp_window_scaling; /* __read_mostly */ + /* inetpeer.c */ + int inet_peer_threshold; + int inet_peer_minttl; + int inet_peer_maxttl; + int inet_peer_gc_mintime; + int inet_peer_gc_maxtime; + + /* devinet */ + struct ipv4_devconf *ipv4_devconf; + struct ipv4_devconf *ipv4_devconf_dflt; + + /* arp.c */ + struct neigh_parms *arp_neigh_parms_default; + + /* icmp.c */ + struct socket **__icmp_socket; + + /* inetpeer.c */ + struct inet_peer *peer_root; + int peer_total; + struct inet_peer *inet_peer_unused_head; + struct inet_peer **inet_peer_unused_tailp; + struct timer_list peer_periodic_timer; + + /* ip_fragment.c */ + struct hlist_head *ipq_hash; + u32 ipfrag_hash_rnd; + struct list_head ipq_lru_list; + int ip_frag_nqueues; + atomic_t ip_frag_mem; + struct timer_list ipfrag_secret_timer; + + /* udp.c */ + int udp_port_rover; + + /* iptable_filter.c */ + struct xt_table *ip_packet_filter; +}; + +extern struct net init_net; +extern struct list_head net_namespace_list; + +extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); +extern void __put_net(struct net *net); + +static inline struct net *get_net(struct net *net) +{ + atomic_inc(&net->count); + return net; +} + +static inline void put_net(struct net *net) +{ + if (atomic_dec_and_test(&net->count)) + __put_net(net); +} + +static inline struct net *hold_net(struct net *net) +{ + atomic_inc(&net->use_count); + return net; +} + +static inline void release_net(struct net *net) +{ + atomic_dec(&net->use_count); +} + +extern void net_lock(void); +extern void net_unlock(void); + +#define for_each_net(VAR) \ + list_for_each_entry(VAR, &net_namespace_list, list) + + +struct pernet_operations { + struct list_head list; + int (*init)(struct net *net); + void (*exit)(struct net *net); +}; + +extern int register_pernet_subsys(struct pernet_operations *); +extern void unregister_pernet_subsys(struct pernet_operations *); +extern int register_pernet_device(struct pernet_operations *); +extern void unregister_pernet_device(struct pernet_operations *); + +#ifdef CONFIG_PROC_FS +static inline struct net *PDE_NET(struct proc_dir_entry *pde) +{ + return pde->parent->data; +} + +static inline struct net *PROC_NET(const struct inode *inode) +{ + return PDE_NET(PDE(inode)); +} + +static inline struct proc_dir_entry *proc_net_create(struct net *net, + const char *name, mode_t mode, get_info_t *get_info) +{ + return create_proc_info_entry(name,mode, net->proc_net, get_info); +} + +static inline struct proc_dir_entry *proc_net_fops_create(struct net *net, + const char *name, mode_t mode, const struct file_operations *fops) +{ + struct proc_dir_entry *res = + create_proc_entry(name, mode, net->proc_net); + if (res) + res->proc_fops = fops; + return res; +} + +static inline void proc_net_remove(struct net *net, const char *name) +{ + remove_proc_entry(name, net->proc_net); +} + +#else + +#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) +#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) +static inline void proc_net_remove(struct net *net, const char *name) {} + +#endif /* CONFIG_PROC_FS */ + +#endif /* __NET_NET_NAMESPACE_H */ diff -Nurb linux-2.6.22-570/include/net/netlink.h linux-2.6.22-591/include/net/netlink.h --- linux-2.6.22-570/include/net/netlink.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/netlink.h 2007-12-21 15:36:14.000000000 -0500 @@ -118,6 +118,9 @@ * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute + * nla_nest_compat_start(skb, type, start a nested compat attribute + * len, data) + * nla_nest_compat_end(skb, type) finalize a nested compat attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction * * Attribute Length Calculations: @@ -152,6 +155,7 @@ * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets + * nla_parse_nested_compat() parse nested compat attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= @@ -170,6 +174,7 @@ NLA_FLAG, NLA_MSECS, NLA_NESTED, + NLA_NESTED_COMPAT, NLA_NUL_STRING, NLA_BINARY, __NLA_TYPE_MAX, @@ -190,6 +195,7 @@ * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload + * NLA_NESTED_COMPAT Exact length of structure payload * All other Exact length of attribute payload * * Example: @@ -212,6 +218,7 @@ struct nl_info { struct nlmsghdr *nlh; u32 pid; + struct net *net; }; extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, @@ -733,6 +740,39 @@ { return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } + +/** + * nla_parse_nested_compat - parse nested compat attributes + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @nla: attribute containing the nested attributes + * @data: pointer to point to contained structure + * @len: length of contained structure + * @policy: validation policy + * + * Parse a nested compat attribute. The compat attribute contains a structure + * and optionally a set of nested attributes. On success the data pointer + * points to the nested data and tb contains the parsed attributes + * (see nla_parse). + */ +static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy, + int len) +{ + if (nla_len(nla) < len) + return -1; + if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr)) + return nla_parse_nested(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + +#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \ +({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \ + __nla_parse_nested_compat(tb, maxtype, nla, policy, len); }) /** * nla_put_u8 - Add a u16 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -965,6 +1005,51 @@ } /** + * nla_nest_compat_start - Start a new level of nested compat attributes + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * @attrlen: length of structure + * @data: pointer to structure + * + * Start a nested compat attribute that contains both a structure and + * a set of nested attributes. + * + * Returns the container attribute + */ +static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb, + int attrtype, int attrlen, + const void *data) +{ + struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); + + if (nla_put(skb, attrtype, attrlen, data) < 0) + return NULL; + if (nla_nest_start(skb, attrtype) == NULL) { + nlmsg_trim(skb, start); + return NULL; + } + return start; +} + +/** + * nla_nest_compat_end - Finalize nesting of compat attributes + * @skb: socket buffer the attribtues are stored in + * @start: container attribute + * + * Corrects the container attribute header to include the all + * appeneded attributes. + * + * Returns the total data length of the skb. + */ +static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) +{ + struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); + + start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; + return nla_nest_end(skb, nest); +} + +/** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in * @start: container attribute diff -Nurb linux-2.6.22-570/include/net/pkt_cls.h linux-2.6.22-591/include/net/pkt_cls.h --- linux-2.6.22-570/include/net/pkt_cls.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/pkt_cls.h 2007-12-21 15:36:14.000000000 -0500 @@ -2,6 +2,7 @@ #define __NET_PKT_CLS_H #include +#include #include #include @@ -357,7 +358,7 @@ if (indev[0]) { if (!skb->iif) return 0; - dev = __dev_get_by_index(skb->iif); + dev = __dev_get_by_index(&init_net, skb->iif); if (!dev || strcmp(indev, dev->name)) return 0; } diff -Nurb linux-2.6.22-570/include/net/protocol.h linux-2.6.22-591/include/net/protocol.h --- linux-2.6.22-570/include/net/protocol.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/protocol.h 2007-12-21 15:36:14.000000000 -0500 @@ -86,6 +86,7 @@ #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ +#define INET_PROTOSW_NETNS 0x08 /* Multiple namespaces support? */ extern struct net_protocol *inet_protocol_base; extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; diff -Nurb linux-2.6.22-570/include/net/raw.h linux-2.6.22-591/include/net/raw.h --- linux-2.6.22-570/include/net/raw.h 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/include/net/raw.h 2007-12-21 15:36:14.000000000 -0500 @@ -34,7 +34,7 @@ extern rwlock_t raw_v4_lock; -extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, +extern struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int tag); diff -Nurb linux-2.6.22-570/include/net/rawv6.h linux-2.6.22-591/include/net/rawv6.h --- linux-2.6.22-570/include/net/rawv6.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/rawv6.h 2007-12-21 15:36:12.000000000 -0500 @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#include + #define RAWV6_HTABLE_SIZE MAX_INET_PROTOS extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; @@ -23,6 +25,13 @@ int type, int code, int offset, __be32 info); +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +int rawv6_mh_filter_register(int (*filter)(struct sock *sock, + struct sk_buff *skb)); +int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, + struct sk_buff *skb)); +#endif + #endif #endif diff -Nurb linux-2.6.22-570/include/net/route.h linux-2.6.22-591/include/net/route.h --- linux-2.6.22-570/include/net/route.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/net/route.h 2007-12-21 15:36:14.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,6 @@ unsigned rt_flags; __u16 rt_type; - __u16 rt_multipath_alg; __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ @@ -123,9 +123,9 @@ extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); -extern unsigned inet_addr_type(__be32 addr); +extern unsigned inet_addr_type(struct net *net, __be32 addr); extern void ip_rt_multicast_event(struct in_device *); -extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); +extern int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg); extern void ip_rt_get_source(u8 *src, struct rtable *rt); extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); @@ -154,7 +154,8 @@ __be16 sport, __be16 dport, struct sock *sk, int flags) { - struct flowi fl = { .oif = oif, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = oif, .nl_u = { .ip4_u = { .daddr = dst, .saddr = src, .tos = tos } }, @@ -199,6 +200,7 @@ struct flowi fl; memcpy(&fl, &(*rp)->fl, sizeof(fl)); + fl.fl_net = sk->sk_net; fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; fl.proto = protocol; diff -Nurb linux-2.6.22-570/include/net/rtnetlink.h linux-2.6.22-591/include/net/rtnetlink.h --- linux-2.6.22-570/include/net/rtnetlink.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/rtnetlink.h 2007-12-21 15:36:12.000000000 -0500 @@ -22,4 +22,62 @@ return AF_UNSPEC; } +/** + * struct rtnl_link_ops - rtnetlink link operations + * + * @list: Used internally + * @kind: Identifier + * @maxtype: Highest device specific netlink attribute number + * @policy: Netlink policy for device specific attribute validation + * @validate: Optional validation function for netlink/changelink parameters + * @priv_size: sizeof net_device private space + * @setup: net_device setup function + * @newlink: Function for configuring and registering a new device + * @changelink: Function for changing parameters of an existing device + * @dellink: Function to remove a device + * @get_size: Function to calculate required room for dumping device + * specific netlink attributes + * @fill_info: Function to dump device specific netlink attributes + * @get_xstats_size: Function to calculate required room for dumping devic + * specific statistics + * @fill_xstats: Function to dump device specific statistics + */ +struct rtnl_link_ops { + struct list_head list; + + const char *kind; + + size_t priv_size; + void (*setup)(struct net_device *dev); + + int maxtype; + const struct nla_policy *policy; + int (*validate)(struct nlattr *tb[], + struct nlattr *data[]); + + int (*newlink)(struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]); + int (*changelink)(struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]); + void (*dellink)(struct net_device *dev); + + size_t (*get_size)(const struct net_device *dev); + int (*fill_info)(struct sk_buff *skb, + const struct net_device *dev); + + size_t (*get_xstats_size)(const struct net_device *dev); + int (*fill_xstats)(struct sk_buff *skb, + const struct net_device *dev); +}; + +extern int __rtnl_link_register(struct rtnl_link_ops *ops); +extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); + +extern int rtnl_link_register(struct rtnl_link_ops *ops); +extern void rtnl_link_unregister(struct rtnl_link_ops *ops); + +#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) + #endif diff -Nurb linux-2.6.22-570/include/net/sock.h linux-2.6.22-591/include/net/sock.h --- linux-2.6.22-570/include/net/sock.h 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/include/net/sock.h 2007-12-21 15:36:14.000000000 -0500 @@ -55,6 +55,7 @@ #include #include #include +#include /* * This structure really needs to be cleaned up. @@ -105,6 +106,7 @@ * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family + * @skc_net: reference to the network namespace of this socket * * This is the minimal network layer representation of sockets, the header * for struct sock and struct inet_timewait_sock. @@ -119,6 +121,7 @@ atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + struct net *skc_net; xid_t skc_xid; struct vx_info *skc_vx_info; nid_t skc_nid; @@ -199,6 +202,7 @@ #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_net __sk_common.skc_net #define sk_xid __sk_common.skc_xid #define sk_vx_info __sk_common.skc_vx_info #define sk_nid __sk_common.skc_nid @@ -781,7 +785,7 @@ SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern struct sock *sk_alloc(int family, +extern struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); @@ -1010,6 +1014,7 @@ #endif memcpy(nsk, osk, osk->sk_prot->obj_size); + get_net(nsk->sk_net); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); @@ -1373,6 +1378,7 @@ #ifdef CONFIG_SYSCTL extern struct ctl_table core_table[]; +extern struct ctl_table multi_core_table[]; #endif extern int sysctl_optmem_max; diff -Nurb linux-2.6.22-570/include/net/tcp.h linux-2.6.22-591/include/net/tcp.h --- linux-2.6.22-570/include/net/tcp.h 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/include/net/tcp.h 2007-12-21 15:36:14.000000000 -0500 @@ -191,8 +191,6 @@ extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_keepalive_time; @@ -1293,6 +1291,7 @@ }; struct tcp_iter_state { + struct net *net; sa_family_t family; enum tcp_seq_states state; struct sock *syn_wait_sk; @@ -1300,8 +1299,8 @@ struct seq_operations seq_ops; }; -extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); -extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); +extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); +extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); extern struct request_sock_ops tcp_request_sock_ops; diff -Nurb linux-2.6.22-570/include/net/tipc/tipc_port.h linux-2.6.22-591/include/net/tipc/tipc_port.h --- linux-2.6.22-570/include/net/tipc/tipc_port.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/tipc/tipc_port.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,8 @@ /* * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports * - * Copyright (c) 1994-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names * @congested: non-zero if cannot send because of link or port congestion + * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages */ @@ -68,6 +69,7 @@ u32 conn_unacked; int published; u32 congested; + u32 max_pkt; u32 ref; struct tipc_msg phdr; }; diff -Nurb linux-2.6.22-570/include/net/udp.h linux-2.6.22-591/include/net/udp.h --- linux-2.6.22-570/include/net/udp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/udp.h 2007-12-21 15:36:14.000000000 -0500 @@ -160,6 +160,7 @@ }; struct udp_iter_state { + struct net *net; sa_family_t family; struct hlist_head *hashtable; int bucket; @@ -167,8 +168,8 @@ }; #ifdef CONFIG_PROC_FS -extern int udp_proc_register(struct udp_seq_afinfo *afinfo); -extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); +extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo); +extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo); extern int udp4_proc_init(void); extern void udp4_proc_exit(void); diff -Nurb linux-2.6.22-570/include/net/wext.h linux-2.6.22-591/include/net/wext.h --- linux-2.6.22-570/include/net/wext.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/net/wext.h 2007-12-21 15:36:14.000000000 -0500 @@ -5,16 +5,23 @@ * wireless extensions interface to the core code */ +struct net; + #ifdef CONFIG_WIRELESS_EXT -extern int wext_proc_init(void); -extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +extern int wext_proc_init(struct net *net); +extern void wext_proc_exit(struct net *net); +extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg); #else -static inline int wext_proc_init(void) +static inline int wext_proc_init(struct net *net) { return 0; } -static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +static inline void wext_proc_exit(struct net *net) +{ + return; +} +static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { return -EINVAL; diff -Nurb linux-2.6.22-570/include/net/xfrm.h linux-2.6.22-591/include/net/xfrm.h --- linux-2.6.22-570/include/net/xfrm.h 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/include/net/xfrm.h 2007-12-21 15:36:14.000000000 -0500 @@ -19,13 +19,21 @@ #include #include +#define XFRM_PROTO_ESP 50 +#define XFRM_PROTO_AH 51 +#define XFRM_PROTO_COMP 108 +#define XFRM_PROTO_IPIP 4 +#define XFRM_PROTO_IPV6 41 +#define XFRM_PROTO_ROUTING IPPROTO_ROUTING +#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS + #define XFRM_ALIGN8(len) (((len) + 7) & ~7) #define MODULE_ALIAS_XFRM_MODE(family, encap) \ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) +#define MODULE_ALIAS_XFRM_TYPE(family, proto) \ + MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) extern struct sock *xfrm_nl; -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; extern struct mutex xfrm_cfg_mutex; @@ -509,11 +517,9 @@ case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_type); break; -#ifdef CONFIG_IPV6_MIP6 case IPPROTO_MH: port = htons(fl->fl_mh_type); break; -#endif default: port = 0; /*XXX*/ } diff -Nurb linux-2.6.22-570/include/scsi/iscsi_if.h linux-2.6.22-591/include/scsi/iscsi_if.h --- linux-2.6.22-570/include/scsi/iscsi_if.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/iscsi_if.h 2007-12-21 15:36:12.000000000 -0500 @@ -48,6 +48,7 @@ ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT = UEVENT_BASE + 14, ISCSI_UEVENT_TGT_DSCVR = UEVENT_BASE + 15, + ISCSI_UEVENT_SET_HOST_PARAM = UEVENT_BASE + 16, /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, @@ -71,6 +72,8 @@ /* messages u -> k */ struct msg_create_session { uint32_t initial_cmdsn; + uint16_t cmds_max; + uint16_t queue_depth; } c_session; struct msg_destroy_session { uint32_t sid; @@ -136,6 +139,11 @@ */ uint32_t enable; } tgt_dscvr; + struct msg_set_host_param { + uint32_t host_no; + uint32_t param; /* enum iscsi_host_param */ + uint32_t len; + } set_host_param; } u; union { /* messages k -> u */ @@ -223,6 +231,11 @@ ISCSI_PARAM_CONN_PORT, ISCSI_PARAM_CONN_ADDRESS, + ISCSI_PARAM_USERNAME, + ISCSI_PARAM_USERNAME_IN, + ISCSI_PARAM_PASSWORD, + ISCSI_PARAM_PASSWORD_IN, + /* must always be last */ ISCSI_PARAM_MAX, }; @@ -249,6 +262,24 @@ #define ISCSI_SESS_RECOVERY_TMO (1 << ISCSI_PARAM_SESS_RECOVERY_TMO) #define ISCSI_CONN_PORT (1 << ISCSI_PARAM_CONN_PORT) #define ISCSI_CONN_ADDRESS (1 << ISCSI_PARAM_CONN_ADDRESS) +#define ISCSI_USERNAME (1 << ISCSI_PARAM_USERNAME) +#define ISCSI_USERNAME_IN (1 << ISCSI_PARAM_USERNAME_IN) +#define ISCSI_PASSWORD (1 << ISCSI_PARAM_PASSWORD) +#define ISCSI_PASSWORD_IN (1 << ISCSI_PARAM_PASSWORD_IN) + +/* iSCSI HBA params */ +enum iscsi_host_param { + ISCSI_HOST_PARAM_HWADDRESS, + ISCSI_HOST_PARAM_INITIATOR_NAME, + ISCSI_HOST_PARAM_NETDEV_NAME, + ISCSI_HOST_PARAM_IPADDRESS, + ISCSI_HOST_PARAM_MAX, +}; + +#define ISCSI_HOST_HWADDRESS (1 << ISCSI_HOST_PARAM_HWADDRESS) +#define ISCSI_HOST_INITIATOR_NAME (1 << ISCSI_HOST_PARAM_INITIATOR_NAME) +#define ISCSI_HOST_NETDEV_NAME (1 << ISCSI_HOST_PARAM_NETDEV_NAME) +#define ISCSI_HOST_IPADDRESS (1 << ISCSI_HOST_PARAM_IPADDRESS) #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle) #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr) @@ -272,6 +303,9 @@ #define CAP_MULTI_CONN 0x40 #define CAP_TEXT_NEGO 0x80 #define CAP_MARKERS 0x100 +#define CAP_FW_DB 0x200 +#define CAP_SENDTARGETS_OFFLOAD 0x400 +#define CAP_DATA_PATH_OFFLOAD 0x800 /* * These flags describes reason of stop_conn() call diff -Nurb linux-2.6.22-570/include/scsi/libiscsi.h linux-2.6.22-591/include/scsi/libiscsi.h --- linux-2.6.22-570/include/scsi/libiscsi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/libiscsi.h 2007-12-21 15:36:12.000000000 -0500 @@ -48,9 +48,8 @@ #define debug_scsi(fmt...) #endif -#define ISCSI_XMIT_CMDS_MAX 128 /* must be power of 2 */ -#define ISCSI_MGMT_CMDS_MAX 32 /* must be power of 2 */ -#define ISCSI_CONN_MAX 1 +#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* must be power of 2 */ +#define ISCSI_MGMT_CMDS_MAX 16 /* must be power of 2 */ #define ISCSI_MGMT_ITT_OFFSET 0xa00 @@ -73,6 +72,8 @@ #define ISCSI_AGE_SHIFT 28 #define ISCSI_AGE_MASK (0xf << ISCSI_AGE_SHIFT) +#define ISCSI_ADDRESS_BUF_LEN 64 + struct iscsi_mgmt_task { /* * Becuae LLDs allocate their hdr differently, this is a pointer to @@ -80,7 +81,7 @@ */ struct iscsi_hdr *hdr; char *data; /* mgmt payload */ - int data_count; /* counts data to be sent */ + unsigned data_count; /* counts data to be sent */ uint32_t itt; /* this ITT */ void *dd_data; /* driver/transport data */ struct list_head running; @@ -90,6 +91,7 @@ ISCSI_TASK_COMPLETED, ISCSI_TASK_PENDING, ISCSI_TASK_RUNNING, + ISCSI_TASK_ABORTING, }; struct iscsi_cmd_task { @@ -99,16 +101,14 @@ */ struct iscsi_cmd *hdr; int itt; /* this ITT */ - int datasn; /* DataSN */ uint32_t unsol_datasn; - int imm_count; /* imm-data (bytes) */ - int unsol_count; /* unsolicited (bytes)*/ + unsigned imm_count; /* imm-data (bytes) */ + unsigned unsol_count; /* unsolicited (bytes)*/ /* offset in unsolicited stream (bytes); */ - int unsol_offset; - int data_count; /* remaining Data-Out */ + unsigned unsol_offset; + unsigned data_count; /* remaining Data-Out */ struct scsi_cmnd *sc; /* associated SCSI cmd*/ - int total_length; struct iscsi_conn *conn; /* used connection */ struct iscsi_mgmt_task *mtask; /* tmf mtask in progr */ @@ -152,18 +152,11 @@ struct iscsi_cmd_task *ctask; /* xmit ctask in progress */ /* xmit */ - struct kfifo *immqueue; /* immediate xmit queue */ struct kfifo *mgmtqueue; /* mgmt (control) xmit queue */ struct list_head mgmt_run_list; /* list of control tasks */ struct list_head xmitqueue; /* data-path cmd queue */ struct list_head run_list; /* list of cmds in progress */ struct work_struct xmitwork; /* per-conn. xmit workqueue */ - /* - * serializes connection xmit, access to kfifos: - * xmitqueue, immqueue, mgmtqueue - */ - struct mutex xmitmutex; - unsigned long suspend_tx; /* suspend Tx */ unsigned long suspend_rx; /* suspend Rx */ @@ -174,8 +167,8 @@ int tmabort_state; /* see TMABORT_INITIAL, etc.*/ /* negotiated params */ - int max_recv_dlength; /* initiator_max_recv_dsl*/ - int max_xmit_dlength; /* target_max_recv_dsl */ + unsigned max_recv_dlength; /* initiator_max_recv_dsl*/ + unsigned max_xmit_dlength; /* target_max_recv_dsl */ int hdrdgst_en; int datadgst_en; int ifmarker_en; @@ -183,6 +176,12 @@ /* values userspace uses to id a conn */ int persistent_port; char *persistent_address; + /* remote portal currently connected to */ + int portal_port; + char portal_address[ISCSI_ADDRESS_BUF_LEN]; + /* local address */ + int local_port; + char local_address[ISCSI_ADDRESS_BUF_LEN]; /* MIB-statistics */ uint64_t txdata_octets; @@ -213,18 +212,25 @@ /* configuration */ int initial_r2t_en; - int max_r2t; + unsigned max_r2t; int imm_data_en; - int first_burst; - int max_burst; + unsigned first_burst; + unsigned max_burst; int time2wait; int time2retain; int pdu_inorder_en; int dataseq_inorder_en; int erl; int tpgt; + char *username; + char *username_in; + char *password; + char *password_in; char *targetname; - + char *initiatorname; + /* hw address or netdev iscsi connection is bound to */ + char *hwaddress; + char *netdev; /* control data */ struct iscsi_transport *tt; struct Scsi_Host *host; @@ -255,12 +261,22 @@ extern int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)); + +/* + * iSCSI host helpers. + */ +extern int iscsi_host_set_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf, + int buflen); +extern int iscsi_host_get_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf); + /* * session management */ extern struct iscsi_cls_session * iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *, - int, int, uint32_t, uint32_t *); + uint16_t, uint16_t, int, int, uint32_t, uint32_t *); extern void iscsi_session_teardown(struct iscsi_cls_session *); extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *); extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *); @@ -289,8 +305,7 @@ /* * pdu and task processing */ -extern int iscsi_check_assign_cmdsn(struct iscsi_session *, - struct iscsi_nopin *); +extern void iscsi_update_cmdsn(struct iscsi_session *, struct iscsi_nopin *); extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *, struct iscsi_data *hdr); extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *, diff -Nurb linux-2.6.22-570/include/scsi/scsi_cmnd.h linux-2.6.22-591/include/scsi/scsi_cmnd.h --- linux-2.6.22-570/include/scsi/scsi_cmnd.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/scsi_cmnd.h 2007-12-21 15:36:12.000000000 -0500 @@ -135,4 +135,24 @@ extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t); extern void scsi_free_sgtable(struct scatterlist *, int); +extern int scsi_dma_map(struct scsi_cmnd *cmd); +extern void scsi_dma_unmap(struct scsi_cmnd *cmd); + +#define scsi_sg_count(cmd) ((cmd)->use_sg) +#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer) +#define scsi_bufflen(cmd) ((cmd)->request_bufflen) + +static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid) +{ + cmd->resid = resid; +} + +static inline int scsi_get_resid(struct scsi_cmnd *cmd) +{ + return cmd->resid; +} + +#define scsi_for_each_sg(cmd, sg, nseg, __i) \ + for (__i = 0, sg = scsi_sglist(cmd); __i < (nseg); __i++, (sg)++) + #endif /* _SCSI_SCSI_CMND_H */ diff -Nurb linux-2.6.22-570/include/scsi/scsi_device.h linux-2.6.22-591/include/scsi/scsi_device.h --- linux-2.6.22-570/include/scsi/scsi_device.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/scsi_device.h 2007-12-21 15:36:12.000000000 -0500 @@ -287,6 +287,7 @@ extern void scsi_target_unblock(struct device *); extern void scsi_remove_target(struct device *); extern void int_to_scsilun(unsigned int, struct scsi_lun *); +extern int scsilun_to_int(struct scsi_lun *); extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); diff -Nurb linux-2.6.22-570/include/scsi/scsi_host.h linux-2.6.22-591/include/scsi/scsi_host.h --- linux-2.6.22-570/include/scsi/scsi_host.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/scsi_host.h 2007-12-21 15:36:12.000000000 -0500 @@ -339,12 +339,6 @@ enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); /* - * suspend support - */ - int (*resume)(struct scsi_device *); - int (*suspend)(struct scsi_device *, pm_message_t state); - - /* * Name of proc directory */ char *proc_name; @@ -677,6 +671,10 @@ #define shost_printk(prefix, shost, fmt, a...) \ dev_printk(prefix, &(shost)->shost_gendev, fmt, ##a) +static inline void *shost_priv(struct Scsi_Host *shost) +{ + return (void *)shost->hostdata; +} int scsi_is_host_device(const struct device *); diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_fc.h linux-2.6.22-591/include/scsi/scsi_transport_fc.h --- linux-2.6.22-570/include/scsi/scsi_transport_fc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/scsi_transport_fc.h 2007-12-21 15:36:12.000000000 -0500 @@ -19,7 +19,7 @@ * * ======== * - * Copyright (C) 2004-2005 James Smart, Emulex Corporation + * Copyright (C) 2004-2007 James Smart, Emulex Corporation * Rewrite for host, target, device, and remote port attributes, * statistics, and service functions... * @@ -62,8 +62,10 @@ FC_PORTTYPE_NLPORT, /* (Public) Loop w/ FLPort */ FC_PORTTYPE_LPORT, /* (Private) Loop w/o FLPort */ FC_PORTTYPE_PTP, /* Point to Point w/ another NPort */ + FC_PORTTYPE_NPIV, /* VPORT based on NPIV */ }; + /* * fc_port_state: If you alter this, you also need to alter scsi_transport_fc.c * (for the ascii descriptions). @@ -84,6 +86,25 @@ /* + * fc_vport_state: If you alter this, you also need to alter + * scsi_transport_fc.c (for the ascii descriptions). + */ +enum fc_vport_state { + FC_VPORT_UNKNOWN, + FC_VPORT_ACTIVE, + FC_VPORT_DISABLED, + FC_VPORT_LINKDOWN, + FC_VPORT_INITIALIZING, + FC_VPORT_NO_FABRIC_SUPP, + FC_VPORT_NO_FABRIC_RSCS, + FC_VPORT_FABRIC_LOGOUT, + FC_VPORT_FABRIC_REJ_WWN, + FC_VPORT_FAILED, +}; + + + +/* * FC Classes of Service * Note: values are not enumerated, as they can be "or'd" together * for reporting (e.g. report supported_classes). If you alter this list, @@ -124,18 +145,116 @@ }; /* - * FC Remote Port Roles + * FC Port Roles * Note: values are not enumerated, as they can be "or'd" together * for reporting (e.g. report roles). If you alter this list, * you also need to alter scsi_transport_fc.c (for the ascii descriptions). */ -#define FC_RPORT_ROLE_UNKNOWN 0x00 -#define FC_RPORT_ROLE_FCP_TARGET 0x01 -#define FC_RPORT_ROLE_FCP_INITIATOR 0x02 -#define FC_RPORT_ROLE_IP_PORT 0x04 +#define FC_PORT_ROLE_UNKNOWN 0x00 +#define FC_PORT_ROLE_FCP_TARGET 0x01 +#define FC_PORT_ROLE_FCP_INITIATOR 0x02 +#define FC_PORT_ROLE_IP_PORT 0x04 + +/* The following are for compatibility */ +#define FC_RPORT_ROLE_UNKNOWN FC_PORT_ROLE_UNKNOWN +#define FC_RPORT_ROLE_FCP_TARGET FC_PORT_ROLE_FCP_TARGET +#define FC_RPORT_ROLE_FCP_INITIATOR FC_PORT_ROLE_FCP_INITIATOR +#define FC_RPORT_ROLE_IP_PORT FC_PORT_ROLE_IP_PORT + + +/* Macro for use in defining Virtual Port attributes */ +#define FC_VPORT_ATTR(_name,_mode,_show,_store) \ +struct class_device_attribute class_device_attr_vport_##_name = \ + __ATTR(_name,_mode,_show,_store) /* + * FC Virtual Port Attributes + * + * This structure exists for each FC port is a virtual FC port. Virtual + * ports share the physical link with the Physical port. Each virtual + * ports has a unique presense on the SAN, and may be instantiated via + * NPIV, Virtual Fabrics, or via additional ALPAs. As the vport is a + * unique presense, each vport has it's own view of the fabric, + * authentication priviledge, and priorities. + * + * A virtual port may support 1 or more FC4 roles. Typically it is a + * FCP Initiator. It could be a FCP Target, or exist sole for an IP over FC + * roles. FC port attributes for the vport will be reported on any + * fc_host class object allocated for an FCP Initiator. + * + * -- + * + * Fixed attributes are not expected to change. The driver is + * expected to set these values after receiving the fc_vport structure + * via the vport_create() call from the transport. + * The transport fully manages all get functions w/o driver interaction. + * + * Dynamic attributes are expected to change. The driver participates + * in all get/set operations via functions provided by the driver. + * + * Private attributes are transport-managed values. They are fully + * managed by the transport w/o driver interaction. + */ + +#define FC_VPORT_SYMBOLIC_NAMELEN 64 +struct fc_vport { + /* Fixed Attributes */ + + /* Dynamic Attributes */ + + /* Private (Transport-managed) Attributes */ + enum fc_vport_state vport_state; + enum fc_vport_state vport_last_state; + u64 node_name; + u64 port_name; + u32 roles; + u32 vport_id; /* Admin Identifier for the vport */ + enum fc_port_type vport_type; + char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN]; + + /* exported data */ + void *dd_data; /* Used for driver-specific storage */ + + /* internal data */ + struct Scsi_Host *shost; /* Physical Port Parent */ + unsigned int channel; + u32 number; + u8 flags; + struct list_head peers; + struct device dev; + struct work_struct vport_delete_work; +} __attribute__((aligned(sizeof(unsigned long)))); + +/* bit field values for struct fc_vport "flags" field: */ +#define FC_VPORT_CREATING 0x01 +#define FC_VPORT_DELETING 0x02 +#define FC_VPORT_DELETED 0x04 +#define FC_VPORT_DEL 0x06 /* Any DELETE state */ + +#define dev_to_vport(d) \ + container_of(d, struct fc_vport, dev) +#define transport_class_to_vport(classdev) \ + dev_to_vport(classdev->dev) +#define vport_to_shost(v) \ + (v->shost) +#define vport_to_shost_channel(v) \ + (v->channel) +#define vport_to_parent(v) \ + (v->dev.parent) + + +/* Error return codes for vport_create() callback */ +#define VPCERR_UNSUPPORTED -ENOSYS /* no driver/adapter + support */ +#define VPCERR_BAD_WWN -ENOTUNIQ /* driver validation + of WWNs failed */ +#define VPCERR_NO_FABRIC_SUPP -EOPNOTSUPP /* Fabric connection + is loop or the + Fabric Port does + not support NPIV */ + +/* * fc_rport_identifiers: This set of data contains all elements * to uniquely identify a remote FC port. The driver uses this data * to report the existence of a remote FC port in the topology. Internally, @@ -149,6 +268,7 @@ u32 roles; }; + /* Macro for use in defining Remote Port attributes */ #define FC_RPORT_ATTR(_name,_mode,_show,_store) \ struct class_device_attribute class_device_attr_rport_##_name = \ @@ -343,6 +463,7 @@ u8 supported_fc4s[FC_FC4_LIST_SIZE]; u32 supported_speeds; u32 maxframe_size; + u16 max_npiv_vports; char serial_number[FC_SERIAL_NUMBER_SIZE]; /* Dynamic Attributes */ @@ -361,8 +482,11 @@ /* internal data */ struct list_head rports; struct list_head rport_bindings; + struct list_head vports; u32 next_rport_number; u32 next_target_id; + u32 next_vport_number; + u16 npiv_vports_inuse; /* work queues for rport state manipulation */ char work_q_name[KOBJ_NAME_LEN]; @@ -388,6 +512,8 @@ (((struct fc_host_attrs *)(x)->shost_data)->supported_speeds) #define fc_host_maxframe_size(x) \ (((struct fc_host_attrs *)(x)->shost_data)->maxframe_size) +#define fc_host_max_npiv_vports(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->max_npiv_vports) #define fc_host_serial_number(x) \ (((struct fc_host_attrs *)(x)->shost_data)->serial_number) #define fc_host_port_id(x) \ @@ -412,10 +538,16 @@ (((struct fc_host_attrs *)(x)->shost_data)->rports) #define fc_host_rport_bindings(x) \ (((struct fc_host_attrs *)(x)->shost_data)->rport_bindings) +#define fc_host_vports(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->vports) #define fc_host_next_rport_number(x) \ (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) #define fc_host_next_target_id(x) \ (((struct fc_host_attrs *)(x)->shost_data)->next_target_id) +#define fc_host_next_vport_number(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->next_vport_number) +#define fc_host_npiv_vports_inuse(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse) #define fc_host_work_q_name(x) \ (((struct fc_host_attrs *)(x)->shost_data)->work_q_name) #define fc_host_work_q(x) \ @@ -452,8 +584,14 @@ void (*dev_loss_tmo_callbk)(struct fc_rport *); void (*terminate_rport_io)(struct fc_rport *); + void (*set_vport_symbolic_name)(struct fc_vport *); + int (*vport_create)(struct fc_vport *, bool); + int (*vport_disable)(struct fc_vport *, bool); + int (*vport_delete)(struct fc_vport *); + /* allocation lengths for host-specific data */ u32 dd_fcrport_size; + u32 dd_fcvport_size; /* * The driver sets these to tell the transport class it @@ -512,7 +650,7 @@ switch (rport->port_state) { case FC_PORTSTATE_ONLINE: - if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) + if (rport->roles & FC_PORT_ROLE_FCP_TARGET) result = 0; else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) result = DID_IMM_RETRY << 16; @@ -549,6 +687,27 @@ wwn[7] = inm & 0xff; } +/** + * fc_vport_set_state() - called to set a vport's state. Saves the old state, + * excepting the transitory states of initializing and sending the ELS + * traffic to instantiate the vport on the link. + * + * Assumes the driver has surrounded this with the proper locking to ensure + * a coherent state change. + * + * @vport: virtual port whose state is changing + * @new_state: new state + **/ +static inline void +fc_vport_set_state(struct fc_vport *vport, enum fc_vport_state new_state) +{ + if ((new_state != FC_VPORT_UNKNOWN) && + (new_state != FC_VPORT_INITIALIZING)) + vport->vport_last_state = vport->vport_state; + vport->vport_state = new_state; +} + + struct scsi_transport_template *fc_attach_transport( struct fc_function_template *); void fc_release_transport(struct scsi_transport_template *); @@ -567,5 +726,6 @@ * be sure to read the Vendor Type and ID formatting requirements * specified in scsi_netlink.h */ +int fc_vport_terminate(struct fc_vport *vport); #endif /* SCSI_TRANSPORT_FC_H */ diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h linux-2.6.22-591/include/scsi/scsi_transport_iscsi.h --- linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/include/scsi/scsi_transport_iscsi.h 2007-12-21 15:36:12.000000000 -0500 @@ -79,7 +79,8 @@ char *name; unsigned int caps; /* LLD sets this to indicate what values it can export to sysfs */ - unsigned int param_mask; + uint64_t param_mask; + uint64_t host_param_mask; struct scsi_host_template *host_template; /* LLD connection data size */ int conndata_size; @@ -89,7 +90,8 @@ unsigned int max_conn; unsigned int max_cmd_len; struct iscsi_cls_session *(*create_session) (struct iscsi_transport *it, - struct scsi_transport_template *t, uint32_t sn, uint32_t *hn); + struct scsi_transport_template *t, uint16_t, uint16_t, + uint32_t sn, uint32_t *hn); void (*destroy_session) (struct iscsi_cls_session *session); struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess, uint32_t cid); @@ -105,14 +107,18 @@ enum iscsi_param param, char *buf); int (*get_session_param) (struct iscsi_cls_session *session, enum iscsi_param param, char *buf); + int (*get_host_param) (struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf); + int (*set_host_param) (struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf, + int buflen); int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); void (*get_stats) (struct iscsi_cls_conn *conn, struct iscsi_stats *stats); void (*init_cmd_task) (struct iscsi_cmd_task *ctask); void (*init_mgmt_task) (struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask, - char *data, uint32_t data_size); + struct iscsi_mgmt_task *mtask); int (*xmit_cmd_task) (struct iscsi_conn *conn, struct iscsi_cmd_task *ctask); void (*cleanup_cmd_task) (struct iscsi_conn *conn, @@ -124,7 +130,7 @@ uint64_t *ep_handle); int (*ep_poll) (uint64_t ep_handle, int timeout_ms); void (*ep_disconnect) (uint64_t ep_handle); - int (*tgt_dscvr) (enum iscsi_tgt_dscvr type, uint32_t host_no, + int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, uint32_t enable, struct sockaddr *dst_addr); }; diff -Nurb linux-2.6.22-570/init/Kconfig linux-2.6.22-591/init/Kconfig --- linux-2.6.22-570/init/Kconfig 2007-12-21 15:36:05.000000000 -0500 +++ linux-2.6.22-591/init/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -120,15 +120,6 @@ section 6.4 of the Linux Programmer's Guide, available from . -config IPC_NS - bool "IPC Namespaces" - depends on SYSVIPC - default n - help - Support ipc namespaces. This allows containers, i.e. virtual - environments, to use ipc namespaces to provide different ipc - objects for different servers. If unsure, say N. - config SYSVIPC_SYSCTL bool depends on SYSVIPC @@ -218,13 +209,14 @@ Say N if unsure. -config UTS_NS - bool "UTS Namespaces" +config USER_NS + bool "User Namespaces (EXPERIMENTAL)" default n + depends on EXPERIMENTAL help - Support uts namespaces. This allows containers, i.e. - vservers, to use uts namespaces to provide different - uts info for different servers. If unsure, say N. + Support user namespaces. This allows containers, i.e. + vservers, to use user namespaces to provide different + user info for different servers. If unsure, say N. config AUDIT bool "Auditing support" @@ -298,9 +290,23 @@ depends on !OOM_PANIC default y +config CONTAINERS + bool + +config CONTAINER_DEBUG + bool "Example debug container subsystem" + select CONTAINERS + help + This option enables a simple container subsystem that + exports useful debugging information about the containers + framework + + Say N if unsure + config CPUSETS bool "Cpuset support" depends on SMP + select CONTAINERS help This option will let you create and manage CPUSETs which allow dynamically partitioning a system into sets of CPUs and @@ -329,6 +335,27 @@ If you are using a distro that was released in 2006 or later, it should be safe to say N here. +config CONTAINER_CPUACCT + bool "Simple CPU accounting container subsystem" + select CONTAINERS + help + Provides a simple Resource Controller for monitoring the + total CPU consumed by the tasks in a container + +config CONTAINER_NS + bool "Namespace container subsystem" + select CONTAINERS + help + Provides a simple namespace container subsystem to + provide hierarchical naming of sets of namespaces, + for instance virtual servers and checkpoint/restart + jobs. + +config PROC_PID_CPUSET + bool "Include legacy /proc//cpuset file" + depends on CPUSETS + default y + config RELAY bool "Kernel->user space relay support (formerly relayfs)" help @@ -605,6 +632,33 @@ endchoice +config PROC_SMAPS + default y + bool "Enable /proc/pid/smaps support" if EMBEDDED && PROC_FS && MMU + help + The /proc/pid/smaps interface reports a process's private and + shared memory per mapping. Disabling this interface will reduce + the size of the kernel for small machines. + +config PROC_CLEAR_REFS + default y + bool "Enable /proc/pid/clear_refs support" if EMBEDDED && PROC_FS && MMU + help + The /proc/pid/clear_refs interface allows clearing the + referenced bits on a process's memory maps to allow monitoring + working set size. Disabling this interface will reduce + the size of the kernel for small machines. + +config PROC_PAGEMAP + default y + bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU + help + The /proc/pid/pagemap interface allows reading the + kernel's virtual memory to page frame mapping to determine which + individual pages a process has mapped and which pages it shares + with other processes. Disabling this interface will reduce the + size of the kernel for small machines. + endmenu # General setup config RT_MUTEXES @@ -620,6 +674,19 @@ default 0 if BASE_FULL default 1 if !BASE_FULL +config PAGE_GROUP_BY_MOBILITY + bool "Group pages based on their mobility in the page allocator" + def_bool y + help + The standard allocator will fragment memory over time which means + that high order allocations will fail even if kswapd is running. If + this option is set, the allocator will try and group page types + based on their ability to migrate or reclaim. This is a best effort + attempt at lowering fragmentation which a few workloads care about. + The loss is a more complex allocator that may perform slower. If + you are interested in working with large pages, say Y and set + /proc/sys/vm/min_free_bytes to 16374. Otherwise say N + menu "Loadable module support" config MODULES diff -Nurb linux-2.6.22-570/init/do_mounts_initrd.c linux-2.6.22-591/init/do_mounts_initrd.c --- linux-2.6.22-570/init/do_mounts_initrd.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/init/do_mounts_initrd.c 2007-12-21 15:36:12.000000000 -0500 @@ -56,12 +56,9 @@ sys_chroot("."); pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); - if (pid > 0) { - while (pid != sys_wait4(-1, NULL, 0, NULL)) { - try_to_freeze(); + if (pid > 0) + while (pid != sys_wait4(-1, NULL, 0, NULL)) yield(); - } - } /* move initrd to rootfs' /old */ sys_fchdir(old_fd); diff -Nurb linux-2.6.22-570/init/main.c linux-2.6.22-591/init/main.c --- linux-2.6.22-570/init/main.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/init/main.c 2007-12-21 15:36:12.000000000 -0500 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -502,6 +503,7 @@ char * command_line; extern struct kernel_param __start___param[], __stop___param[]; + container_init_early(); smp_setup_processor_id(); /* @@ -627,6 +629,7 @@ #ifdef CONFIG_PROC_FS proc_root_init(); #endif + container_init(); cpuset_init(); taskstats_init_early(); delayacct_init(); diff -Nurb linux-2.6.22-570/ipc/msg.c linux-2.6.22-591/ipc/msg.c --- linux-2.6.22-570/ipc/msg.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/ipc/msg.c 2007-12-21 15:36:12.000000000 -0500 @@ -88,7 +88,7 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it); #endif -static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) +static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) { ns->ids[IPC_MSG_IDS] = ids; ns->msg_ctlmax = MSGMAX; @@ -97,7 +97,6 @@ ipc_init_ids(ids, ns->msg_ctlmni); } -#ifdef CONFIG_IPC_NS int msg_init_ns(struct ipc_namespace *ns) { struct ipc_ids *ids; @@ -129,7 +128,6 @@ kfree(ns->ids[IPC_MSG_IDS]); ns->ids[IPC_MSG_IDS] = NULL; } -#endif void __init msg_init(void) { diff -Nurb linux-2.6.22-570/ipc/sem.c linux-2.6.22-591/ipc/sem.c --- linux-2.6.22-570/ipc/sem.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/ipc/sem.c 2007-12-21 15:36:12.000000000 -0500 @@ -123,7 +123,7 @@ #define sc_semopm sem_ctls[2] #define sc_semmni sem_ctls[3] -static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) +static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) { ns->ids[IPC_SEM_IDS] = ids; ns->sc_semmsl = SEMMSL; @@ -134,7 +134,6 @@ ipc_init_ids(ids, ns->sc_semmni); } -#ifdef CONFIG_IPC_NS int sem_init_ns(struct ipc_namespace *ns) { struct ipc_ids *ids; @@ -166,7 +165,6 @@ kfree(ns->ids[IPC_SEM_IDS]); ns->ids[IPC_SEM_IDS] = NULL; } -#endif void __init sem_init (void) { diff -Nurb linux-2.6.22-570/ipc/shm.c linux-2.6.22-591/ipc/shm.c --- linux-2.6.22-570/ipc/shm.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/ipc/shm.c 2007-12-21 15:36:12.000000000 -0500 @@ -79,7 +79,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif -static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) +static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) { ns->ids[IPC_SHM_IDS] = ids; ns->shm_ctlmax = SHMMAX; @@ -100,7 +100,6 @@ shm_destroy(ns, shp); } -#ifdef CONFIG_IPC_NS int shm_init_ns(struct ipc_namespace *ns) { struct ipc_ids *ids; @@ -132,7 +131,6 @@ kfree(ns->ids[IPC_SHM_IDS]); ns->ids[IPC_SHM_IDS] = NULL; } -#endif void __init shm_init (void) { @@ -234,13 +232,13 @@ mutex_unlock(&shm_ids(ns).mutex); } -static struct page *shm_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static struct page *shm_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); - return sfd->vm_ops->nopage(vma, address, type); + return sfd->vm_ops->fault(vma, fdata); } #ifdef CONFIG_NUMA @@ -279,6 +277,7 @@ if (ret != 0) return ret; sfd->vm_ops = vma->vm_ops; + BUG_ON(!sfd->vm_ops->fault); vma->vm_ops = &shm_vm_ops; shm_open(vma); @@ -337,7 +336,7 @@ static struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ - .nopage = shm_nopage, + .fault = shm_fault, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, diff -Nurb linux-2.6.22-570/ipc/util.c linux-2.6.22-591/ipc/util.c --- linux-2.6.22-570/ipc/util.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/ipc/util.c 2007-12-21 15:36:12.000000000 -0500 @@ -52,7 +52,6 @@ }, }; -#ifdef CONFIG_IPC_NS static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) { int err; @@ -114,14 +113,6 @@ atomic_dec(&vs_global_ipc_ns); kfree(ns); } -#else -struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) -{ - if (flags & CLONE_NEWIPC) - return ERR_PTR(-EINVAL); - return ns; -} -#endif /** * ipc_init - initialise IPC subsystem @@ -149,7 +140,7 @@ * array itself. */ -void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size) +void ipc_init_ids(struct ipc_ids* ids, int size) { int i; diff -Nurb linux-2.6.22-570/ipc/util.h linux-2.6.22-591/ipc/util.h --- linux-2.6.22-570/ipc/util.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/ipc/util.h 2007-12-21 15:36:12.000000000 -0500 @@ -41,12 +41,8 @@ }; struct seq_file; -#ifdef CONFIG_IPC_NS -#define __ipc_init -#else -#define __ipc_init __init -#endif -void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size); + +void ipc_init_ids(struct ipc_ids *ids, int size); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); diff -Nurb linux-2.6.22-570/kernel/Makefile linux-2.6.22-591/kernel/Makefile --- linux-2.6.22-570/kernel/Makefile 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -4,11 +4,12 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o \ + sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o + hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ + utsname.o obj-y += vserver/ @@ -33,16 +34,22 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o +obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_COMPAT) += compat.o +obj-$(CONFIG_CONTAINERS) += container.o +obj-$(CONFIG_CONTAINER_DEBUG) += container_debug.o obj-$(CONFIG_CPUSETS) += cpuset.o +obj-$(CONFIG_CONTAINER_CPUACCT) += cpu_acct.o +obj-$(CONFIG_CONTAINER_NS) += ns_container.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ @@ -50,7 +57,6 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o -obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o diff -Nurb linux-2.6.22-570/kernel/audit.c linux-2.6.22-591/kernel/audit.c --- linux-2.6.22-570/kernel/audit.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/audit.c 2007-12-21 15:36:15.000000000 -0500 @@ -391,6 +391,7 @@ { struct sk_buff *skb; + set_freezable(); while (!kthread_should_stop()) { skb = skb_dequeue(&audit_skb_queue); wake_up(&audit_backlog_wait); @@ -794,8 +795,8 @@ printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, - NULL, THIS_MODULE); + audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, + audit_receive, NULL, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff -Nurb linux-2.6.22-570/kernel/auditsc.c linux-2.6.22-591/kernel/auditsc.c --- linux-2.6.22-570/kernel/auditsc.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/kernel/auditsc.c 2007-12-21 15:36:12.000000000 -0500 @@ -1500,6 +1500,7 @@ context->names[idx].ino = (unsigned long)-1; } } +EXPORT_SYMBOL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values diff -Nurb linux-2.6.22-570/kernel/container.c linux-2.6.22-591/kernel/container.c --- linux-2.6.22-570/kernel/container.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/container.c 2007-12-21 15:36:15.000000000 -0500 @@ -0,0 +1,2545 @@ +/* + * kernel/container.c + * + * Generic process-grouping system. + * + * Based originally on the cpuset system, extracted by Paul Menage + * Copyright (C) 2006 Google, Inc + * + * Copyright notices from the original cpuset code: + * -------------------------------------------------- + * Copyright (C) 2003 BULL SA. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. + * + * Portions derived from Patrick Mochel's sysfs code. + * sysfs is Copyright (c) 2001-3 Patrick Mochel + * + * 2003-10-10 Written by Simon Derr. + * 2003-10-22 Updates by Stephen Hemminger. + * 2004 May-July Rework by Paul Jackson. + * --------------------------------------------------- + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of the Linux + * distribution for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static DEFINE_MUTEX(container_mutex); + +/* Generate an array of container subsystem pointers */ +#define SUBSYS(_x) &_x ## _subsys, + +static struct container_subsys *subsys[] = { +#include +}; + +/* A containerfs_root represents the root of a container hierarchy, + * and may be associated with a superblock to form an active + * hierarchy */ +struct containerfs_root { + struct super_block *sb; + + /* The bitmask of subsystems attached to this hierarchy */ + unsigned long subsys_bits; + + /* A list running through the attached subsystems */ + struct list_head subsys_list; + + /* The root container for this hierarchy */ + struct container top_container; + + /* Tracks how many containers are currently defined in hierarchy.*/ + int number_of_containers; + + /* A list running through the mounted hierarchies */ + struct list_head root_list; + + /* The path to use for release notifications. No locking + * between setting and use - so if userspace updates this + * while subcontainers exist, you could miss a + * notification. We ensure that it's always a valid + * NUL-terminated string */ + char release_agent_path[PATH_MAX]; +}; + + +/* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the + * subsystems that are otherwise unattached - it never has more than a + * single container, and all tasks are part of that container. */ + +static struct containerfs_root rootnode; + +/* The list of hierarchy roots */ + +static LIST_HEAD(roots); +static int root_count; + +/* dummytop is a shorthand for the dummy hierarchy's top container */ +#define dummytop (&rootnode.top_container) + +/* This flag indicates whether tasks in the fork and exit paths should + * take callback_mutex and check for fork/exit handlers to call. This + * avoids us having to do extra work in the fork/exit path if none of the + * subsystems need to be called. + */ +static int need_forkexit_callback; + +/* bits in struct container flags field */ +enum { + /* Container is dead */ + CONT_REMOVED, + /* Container has previously had a child container or a task, + * but no longer (only if CONT_NOTIFY_ON_RELEASE is set) */ + CONT_RELEASABLE, + /* Container requires release notifications to userspace */ + CONT_NOTIFY_ON_RELEASE, +}; + +/* convenient tests for these bits */ +inline int container_is_removed(const struct container *cont) +{ + return test_bit(CONT_REMOVED, &cont->flags); +} + +inline int container_is_releasable(const struct container *cont) +{ + const int bits = + (1 << CONT_RELEASABLE) | + (1 << CONT_NOTIFY_ON_RELEASE); + return (cont->flags & bits) == bits; +} + +inline int notify_on_release(const struct container *cont) +{ + return test_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags); +} + +/* for_each_subsys() allows you to iterate on each subsystem attached to + * an active hierarchy */ +#define for_each_subsys(_root, _ss) \ +list_for_each_entry(_ss, &_root->subsys_list, sibling) + +/* for_each_root() allows you to iterate across the active hierarchies */ +#define for_each_root(_root) \ +list_for_each_entry(_root, &roots, root_list) + +/* the list of containers eligible for automatic release */ +static LIST_HEAD(release_list); +static void container_release_agent(struct work_struct *work); +static DECLARE_WORK(release_agent_work, container_release_agent); +static void check_for_release(struct container *cont); + +/* Link structure for associating css_group objects with containers */ +struct cg_container_link { + /* + * List running through cg_container_links associated with a + * container, anchored on container->css_groups + */ + struct list_head cont_link_list; + /* + * List running through cg_container_links pointing at a + * single css_group object, anchored on css_group->cg_links + */ + struct list_head cg_link_list; + struct css_group *cg; +}; + +/* The default css_group - used by init and its children prior to any + * hierarchies being mounted. It contains a pointer to the root state + * for each subsystem. Also used to anchor the list of css_groups. Not + * reference-counted, to improve performance when child containers + * haven't been created. + */ + +static struct css_group init_css_group; +static struct cg_container_link init_css_group_link; + +/* css_group_lock protects the list of css_group objects, and the + * chain of tasks off each css_group. Nests inside task->alloc_lock */ +static DEFINE_RWLOCK(css_group_lock); +static int css_group_count; + + +/* When we create or destroy a css_group, the operation simply + * takes/releases a reference count on all the containers referenced + * by subsystems in this css_group. This can end up multiple-counting + * some containers, but that's OK - the ref-count is just a + * busy/not-busy indicator; ensuring that we only count each container + * once would require taking a global lock to ensure that no + * subsystems moved between hierarchies while we were doing so. + * + * Possible TODO: decide at boot time based on the number of + * registered subsystems and the number of CPUs or NUMA nodes whether + * it's better for performance to ref-count every subsystem, or to + * take a global lock and only add one ref count to each hierarchy. + */ + +/* + * unlink a css_group from the list and free it + */ +static void unlink_css_group(struct css_group *cg) +{ + write_lock(&css_group_lock); + list_del(&cg->list); + css_group_count--; + while (!list_empty(&cg->cg_links)) { + struct cg_container_link *link; + link = list_entry(cg->cg_links.next, + struct cg_container_link, cg_link_list); + list_del(&link->cg_link_list); + list_del(&link->cont_link_list); + kfree(link); + } + write_unlock(&css_group_lock); +} + +static void release_css_group(struct kref *k) +{ + int i; + struct css_group *cg = container_of(k, struct css_group, ref); + + BUG_ON(!mutex_is_locked(&container_mutex)); + unlink_css_group(cg); + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container *cont = cg->subsys[i]->container; + if (atomic_dec_and_test(&cont->count) && + container_is_releasable(cont)) { + check_for_release(cont); + } + } + kfree(cg); +} + +/* + * In the task exit path we want to avoid taking container_mutex + * unless absolutely necessary, so the release process is slightly + * different. + */ +static void release_css_group_taskexit(struct kref *k) +{ + int i; + struct css_group *cg = container_of(k, struct css_group, ref); + + unlink_css_group(cg); + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container *cont = cg->subsys[i]->container; + if (notify_on_release(cont)) { + mutex_lock(&container_mutex); + set_bit(CONT_RELEASABLE, &cont->flags); + if (atomic_dec_and_test(&cont->count)) + check_for_release(cont); + mutex_unlock(&container_mutex); + } else { + atomic_dec(&cont->count); + } + } + kfree(cg); +} + +/* + * refcounted get/put for css_group objects + */ +static inline void get_css_group(struct css_group *cg) +{ + kref_get(&cg->ref); +} + +static inline void put_css_group(struct css_group *cg) +{ + kref_put(&cg->ref, release_css_group); +} + +static inline void put_css_group_taskexit(struct css_group *cg) +{ + kref_put(&cg->ref, release_css_group_taskexit); +} + +/* + * find_existing_css_group() is a helper for + * find_css_group(), and checks to see whether an existing + * css_group is suitable. This currently walks a linked-list for + * simplicity; a later patch will use a hash table for better + * performance + * + * oldcg: the container group that we're using before the container + * transition + * + * cont: the container that we're moving into + * + * template: location in which to build the desired set of subsystem + * state objects for the new container group + */ + +static struct css_group *find_existing_css_group( + struct css_group *oldcg, + struct container *cont, + struct container_subsys_state *template[]) +{ + int i; + struct containerfs_root *root = cont->root; + struct list_head *l = &init_css_group.list; + + /* Built the set of subsystem state objects that we want to + * see in the new css_group */ + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + if (root->subsys_bits & (1ull << i)) { + /* Subsystem is in this hierarchy. So we want + * the subsystem state from the new + * container */ + template[i] = cont->subsys[i]; + } else { + /* Subsystem is not in this hierarchy, so we + * don't want to change the subsystem state */ + template[i] = oldcg->subsys[i]; + } + } + + /* Look through existing container groups to find one to reuse */ + do { + struct css_group *cg = + list_entry(l, struct css_group, list); + + if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { + /* All subsystems matched */ + return cg; + } + /* Try the next container group */ + l = l->next; + } while (l != &init_css_group.list); + + /* No existing container group matched */ + return NULL; +} + +/* + * allocate_cg_links() allocates "count" cg_container_link structures + * and chains them on tmp through their cont_link_list fields. Returns 0 on + * success or a negative error + */ + +static int allocate_cg_links(int count, struct list_head *tmp) +{ + struct cg_container_link *link; + int i; + INIT_LIST_HEAD(tmp); + for (i = 0; i < count; i++) { + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + while (!list_empty(tmp)) { + link = list_entry(tmp->next, + struct cg_container_link, + cont_link_list); + list_del(&link->cont_link_list); + kfree(link); + } + return -ENOMEM; + } + list_add(&link->cont_link_list, tmp); + } + return 0; +} + +/* + * find_css_group() takes an existing container group and a + * container object, and returns a css_group object that's + * equivalent to the old group, but with the given container + * substituted into the appropriate hierarchy. Must be called with + * container_mutex held + */ + +static struct css_group *find_css_group( + struct css_group *oldcg, struct container *cont) +{ + struct css_group *res; + struct container_subsys_state *template[CONTAINER_SUBSYS_COUNT]; + int i; + + struct list_head tmp_cg_links; + struct cg_container_link *link; + + /* First see if we already have a container group that matches + * the desired set */ + write_lock(&css_group_lock); + res = find_existing_css_group(oldcg, cont, template); + if (res) + get_css_group(res); + write_unlock(&css_group_lock); + + if (res) + return res; + + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + /* Allocate all the cg_container_link objects that we'll need */ + if (allocate_cg_links(root_count, &tmp_cg_links) < 0) { + kfree(res); + return NULL; + } + + kref_init(&res->ref); + INIT_LIST_HEAD(&res->cg_links); + INIT_LIST_HEAD(&res->tasks); + + /* Copy the set of subsystem state objects generated in + * find_existing_css_group() */ + memcpy(res->subsys, template, sizeof(res->subsys)); + + write_lock(&css_group_lock); + /* Add reference counts and links from the new css_group. */ + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container *cont = res->subsys[i]->container; + struct container_subsys *ss = subsys[i]; + atomic_inc(&cont->count); + /* + * We want to add a link once per container, so we + * only do it for the first subsystem in each + * hierarchy + */ + if (ss->root->subsys_list.next == &ss->sibling) { + BUG_ON(list_empty(&tmp_cg_links)); + link = list_entry(tmp_cg_links.next, + struct cg_container_link, + cont_link_list); + list_del(&link->cont_link_list); + list_add(&link->cont_link_list, &cont->css_groups); + link->cg = res; + list_add(&link->cg_link_list, &res->cg_links); + } + } + if (list_empty(&rootnode.subsys_list)) { + link = list_entry(tmp_cg_links.next, + struct cg_container_link, + cont_link_list); + list_del(&link->cont_link_list); + list_add(&link->cont_link_list, &dummytop->css_groups); + link->cg = res; + list_add(&link->cg_link_list, &res->cg_links); + } + + BUG_ON(!list_empty(&tmp_cg_links)); + + /* Link this container group into the list */ + list_add(&res->list, &init_css_group.list); + css_group_count++; + INIT_LIST_HEAD(&res->tasks); + write_unlock(&css_group_lock); + + return res; +} + +/* + * There is one global container mutex. We also require taking + * task_lock() when dereferencing a task's container subsys pointers. + * See "The task_lock() exception", at the end of this comment. + * + * A task must hold container_mutex to modify containers. + * + * Any task can increment and decrement the count field without lock. + * So in general, code holding container_mutex can't rely on the count + * field not changing. However, if the count goes to zero, then only + * attach_task() can increment it again. Because a count of zero + * means that no tasks are currently attached, therefore there is no + * way a task attached to that container can fork (the other way to + * increment the count). So code holding container_mutex can safely + * assume that if the count is zero, it will stay zero. Similarly, if + * a task holds container_mutex on a container with zero count, it + * knows that the container won't be removed, as container_rmdir() + * needs that mutex. + * + * The container_common_file_write handler for operations that modify + * the container hierarchy holds container_mutex across the entire operation, + * single threading all such container modifications across the system. + * + * The fork and exit callbacks container_fork() and container_exit(), don't + * (usually) take container_mutex. These are the two most performance + * critical pieces of code here. The exception occurs on container_exit(), + * when a task in a notify_on_release container exits. Then container_mutex + * is taken, and if the container count is zero, a usermode call made + * to /sbin/container_release_agent with the name of the container (path + * relative to the root of container file system) as the argument. + * + * A container can only be deleted if both its 'count' of using tasks + * is zero, and its list of 'children' containers is empty. Since all + * tasks in the system use _some_ container, and since there is always at + * least one task in the system (init, pid == 1), therefore, top_container + * always has either children containers and/or using tasks. So we don't + * need a special hack to ensure that top_container cannot be deleted. + * + * The task_lock() exception + * + * The need for this exception arises from the action of + * attach_task(), which overwrites one tasks container pointer with + * another. It does so using container_mutexe, however there are + * several performance critical places that need to reference + * task->container without the expense of grabbing a system global + * mutex. Therefore except as noted below, when dereferencing or, as + * in attach_task(), modifying a task'ss container pointer we use + * task_lock(), which acts on a spinlock (task->alloc_lock) already in + * the task_struct routinely used for such matters. + * + * P.S. One more locking exception. RCU is used to guard the + * update of a tasks container pointer by attach_task() + */ + +/** + * container_lock - lock out any changes to container structures + * + */ + +void container_lock(void) +{ + mutex_lock(&container_mutex); +} + +/** + * container_unlock - release lock on container changes + * + * Undo the lock taken in a previous container_lock() call. + */ + +void container_unlock(void) +{ + mutex_unlock(&container_mutex); +} + +/* + * A couple of forward declarations required, due to cyclic reference loop: + * container_mkdir -> container_create -> container_populate_dir -> + * container_add_file -> container_create_file -> container_dir_inode_operations + * -> container_mkdir. + */ + +static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode); +static int container_rmdir(struct inode *unused_dir, struct dentry *dentry); +static int container_populate_dir(struct container *cont); +static struct inode_operations container_dir_inode_operations; +static struct file_operations proc_containerstats_operations; + +static struct inode *container_new_inode(mode_t mode, struct super_block *sb) +{ + struct inode *inode = new_inode(sb); + static struct backing_dev_info container_backing_dev_info = { + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + }; + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_mapping->backing_dev_info = &container_backing_dev_info; + } + return inode; +} + +static void container_diput(struct dentry *dentry, struct inode *inode) +{ + /* is dentry a directory ? if so, kfree() associated container */ + if (S_ISDIR(inode->i_mode)) { + struct container *cont = dentry->d_fsdata; + BUG_ON(!(container_is_removed(cont))); + kfree(cont); + } + iput(inode); +} + +static struct dentry *container_get_dentry(struct dentry *parent, + const char *name) +{ + struct dentry *d = lookup_one_len(name, parent, strlen(name)); + static struct dentry_operations container_dops = { + .d_iput = container_diput, + }; + + if (!IS_ERR(d)) + d->d_op = &container_dops; + return d; +} + +static void remove_dir(struct dentry *d) +{ + struct dentry *parent = dget(d->d_parent); + + d_delete(d); + simple_rmdir(parent->d_inode, d); + dput(parent); +} + +static void container_clear_directory(struct dentry *dentry) +{ + struct list_head *node; + + BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + spin_lock(&dcache_lock); + node = dentry->d_subdirs.next; + while (node != &dentry->d_subdirs) { + struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + list_del_init(node); + if (d->d_inode) { + /* This should never be called on a container + * directory with child containers */ + BUG_ON(d->d_inode->i_mode & S_IFDIR); + d = dget_locked(d); + spin_unlock(&dcache_lock); + d_delete(d); + simple_unlink(dentry->d_inode, d); + dput(d); + spin_lock(&dcache_lock); + } + node = dentry->d_subdirs.next; + } + spin_unlock(&dcache_lock); +} + +/* + * NOTE : the dentry must have been dget()'ed + */ +static void container_d_remove_dir(struct dentry *dentry) +{ + container_clear_directory(dentry); + + spin_lock(&dcache_lock); + list_del_init(&dentry->d_u.d_child); + spin_unlock(&dcache_lock); + remove_dir(dentry); +} + +static int rebind_subsystems(struct containerfs_root *root, + unsigned long final_bits) +{ + unsigned long added_bits, removed_bits; + struct container *cont = &root->top_container; + int i; + + removed_bits = root->subsys_bits & ~final_bits; + added_bits = final_bits & ~root->subsys_bits; + /* Check that any added subsystems are currently free */ + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + unsigned long long bit = 1ull << i; + struct container_subsys *ss = subsys[i]; + if (!(bit & added_bits)) + continue; + if (ss->root != &rootnode) { + /* Subsystem isn't free */ + return -EBUSY; + } + } + + /* Currently we don't handle adding/removing subsystems when + * any subcontainers exist. This is theoretically supportable + * but involves complex error handling, so it's being left until + * later */ + if (!list_empty(&cont->children)) + return -EBUSY; + + /* Process each subsystem */ + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container_subsys *ss = subsys[i]; + unsigned long bit = 1UL << i; + if (bit & added_bits) { + /* We're binding this subsystem to this hierarchy */ + BUG_ON(cont->subsys[i]); + BUG_ON(!dummytop->subsys[i]); + BUG_ON(dummytop->subsys[i]->container != dummytop); + cont->subsys[i] = dummytop->subsys[i]; + cont->subsys[i]->container = cont; + list_add(&ss->sibling, &root->subsys_list); + rcu_assign_pointer(ss->root, root); + if (ss->bind) + ss->bind(ss, cont); + + } else if (bit & removed_bits) { + /* We're removing this subsystem */ + BUG_ON(cont->subsys[i] != dummytop->subsys[i]); + BUG_ON(cont->subsys[i]->container != cont); + if (ss->bind) + ss->bind(ss, dummytop); + dummytop->subsys[i]->container = dummytop; + cont->subsys[i] = NULL; + rcu_assign_pointer(subsys[i]->root, &rootnode); + list_del(&ss->sibling); + } else if (bit & final_bits) { + /* Subsystem state should already exist */ + BUG_ON(!cont->subsys[i]); + } else { + /* Subsystem state shouldn't exist */ + BUG_ON(cont->subsys[i]); + } + } + root->subsys_bits = final_bits; + synchronize_rcu(); + + return 0; +} + +/* + * Release the last use of a hierarchy. Will never be called when + * there are active subcontainers since each subcontainer bumps the + * value of sb->s_active. + */ +static void container_put_super(struct super_block *sb) +{ + struct containerfs_root *root = sb->s_fs_info; + struct container *cont = &root->top_container; + int ret; + + root->sb = NULL; + sb->s_fs_info = NULL; + + mutex_lock(&container_mutex); + + BUG_ON(root->number_of_containers != 1); + BUG_ON(!list_empty(&cont->children)); + BUG_ON(!list_empty(&cont->sibling)); + BUG_ON(!root->subsys_bits); + + /* Rebind all subsystems back to the default hierarchy */ + ret = rebind_subsystems(root, 0); + BUG_ON(ret); + + write_lock(&css_group_lock); + while (!list_empty(&cont->css_groups)) { + struct cg_container_link *link; + link = list_entry(cont->css_groups.next, + struct cg_container_link, cont_link_list); + list_del(&link->cg_link_list); + list_del(&link->cont_link_list); + kfree(link); + } + write_unlock(&css_group_lock); + + list_del(&root->root_list); + root_count--; + kfree(root); + mutex_unlock(&container_mutex); +} + +static int container_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct containerfs_root *root = vfs->mnt_sb->s_fs_info; + struct container_subsys *ss; + + for_each_subsys(root, ss) + seq_printf(seq, ",%s", ss->name); + return 0; +} + +/* Convert a hierarchy specifier into a bitmask. LL=container_mutex */ +static int parse_containerfs_options(char *opts, unsigned long *bits) +{ + char *token, *o = opts ?: "all"; + + *bits = 0; + + while ((token = strsep(&o, ",")) != NULL) { + if (!*token) + return -EINVAL; + if (!strcmp(token, "all")) { + *bits = (1 << CONTAINER_SUBSYS_COUNT) - 1; + } else { + struct container_subsys *ss; + int i; + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + ss = subsys[i]; + if (!strcmp(token, ss->name)) { + *bits |= 1 << i; + break; + } + } + if (i == CONTAINER_SUBSYS_COUNT) + return -ENOENT; + } + } + + /* We can't have an empty hierarchy */ + if (!*bits) + return -EINVAL; + + return 0; +} + +static int container_remount(struct super_block *sb, int *flags, char *data) +{ + int ret = 0; + unsigned long subsys_bits; + struct containerfs_root *root = sb->s_fs_info; + struct container *cont = &root->top_container; + + mutex_lock(&cont->dentry->d_inode->i_mutex); + mutex_lock(&container_mutex); + + /* See what subsystems are wanted */ + ret = parse_containerfs_options(data, &subsys_bits); + if (ret) + goto out_unlock; + + ret = rebind_subsystems(root, subsys_bits); + + /* (re)populate subsystem files */ + if (!ret) + container_populate_dir(cont); + + out_unlock: + mutex_unlock(&container_mutex); + mutex_unlock(&cont->dentry->d_inode->i_mutex); + return ret; +} + +static struct super_operations container_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .put_super = container_put_super, + .show_options = container_show_options, + .remount_fs = container_remount, +}; + +static int container_fill_super(struct super_block *sb, void *options, + int unused_silent) +{ + struct inode *inode; + struct dentry *root; + struct containerfs_root *hroot = options; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = CONTAINER_SUPER_MAGIC; + sb->s_op = &container_ops; + + inode = container_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); + if (!inode) + return -ENOMEM; + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inode->i_op = &container_dir_inode_operations; + /* directories start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + root->d_fsdata = &hroot->top_container; + hroot->top_container.dentry = root; + + strcpy(hroot->release_agent_path, ""); + sb->s_fs_info = hroot; + hroot->sb = sb; + + return 0; +} + +static void init_container_root(struct containerfs_root *root) +{ + struct container *cont = &root->top_container; + INIT_LIST_HEAD(&root->subsys_list); + root->number_of_containers = 1; + cont->root = root; + cont->top_container = cont; + INIT_LIST_HEAD(&cont->sibling); + INIT_LIST_HEAD(&cont->children); + INIT_LIST_HEAD(&cont->css_groups); + INIT_LIST_HEAD(&cont->release_list); + list_add(&root->root_list, &roots); + root_count++; +} + +static int container_get_sb(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, + void *data, struct vfsmount *mnt) +{ + unsigned long subsys_bits = 0; + int ret = 0; + struct containerfs_root *root = NULL; + int use_existing = 0; + + mutex_lock(&container_mutex); + + /* First find the desired set of resource controllers */ + ret = parse_containerfs_options(data, &subsys_bits); + if (ret) + goto out_unlock; + + /* See if we already have a hierarchy containing this set */ + + for_each_root(root) { + /* We match - use this hieracrchy */ + if (root->subsys_bits == subsys_bits) { + use_existing = 1; + break; + } + /* We clash - fail */ + if (root->subsys_bits & subsys_bits) { + ret = -EBUSY; + goto out_unlock; + } + } + + if (!use_existing) { + /* We need a new root */ + struct list_head tmp_cg_links, *l; + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) { + ret = -ENOMEM; + goto out_unlock; + } + /* We're accessing css_group_count without locking + * here, but that's OK - it can only be increased by + * someone holding container_lock, and that's us. The + * worst that can happen is that we have some link + * structures left over */ + ret = allocate_cg_links(css_group_count, &tmp_cg_links); + if (ret < 0) { + kfree(root); + goto out_unlock; + } + init_container_root(root); + + /* Link the top container in this hierarchy into all + * the css_group objects */ + write_lock(&css_group_lock); + l = &init_css_group.list; + do { + struct css_group *cg; + struct cg_container_link *link; + cg = list_entry(l, struct css_group, list); + BUG_ON(list_empty(&tmp_cg_links)); + link = list_entry(tmp_cg_links.next, + struct cg_container_link, + cont_link_list); + list_del(&link->cont_link_list); + link->cg = cg; + list_add(&link->cont_link_list, + &root->top_container.css_groups); + list_add(&link->cg_link_list, &cg->cg_links); + l = l->next; + } while (l != &init_css_group.list); + write_unlock(&css_group_lock); + + while (!list_empty(&tmp_cg_links)) { + /* Probably shouldn't happen */ + struct cg_container_link *link; + printk(KERN_INFO "Freeing unused cg_container_link\n"); + link = list_entry(tmp_cg_links.next, + struct cg_container_link, + cont_link_list); + list_del(&link->cont_link_list); + kfree(link); + } + } + + if (!root->sb) { + /* We need a new superblock for this container combination */ + struct container *cont = &root->top_container; + + BUG_ON(root->subsys_bits); + ret = get_sb_nodev(fs_type, flags, root, + container_fill_super, mnt); + if (ret) + goto out_unlock; + + BUG_ON(!list_empty(&cont->sibling)); + BUG_ON(!list_empty(&cont->children)); + BUG_ON(root->number_of_containers != 1); + + ret = rebind_subsystems(root, subsys_bits); + + /* It's safe to nest i_mutex inside container_mutex in + * this case, since no-one else can be accessing this + * directory yet */ + mutex_lock(&cont->dentry->d_inode->i_mutex); + container_populate_dir(cont); + mutex_unlock(&cont->dentry->d_inode->i_mutex); + BUG_ON(ret); + } else { + /* Reuse the existing superblock */ + down_write(&(root->sb->s_umount)); + ret = simple_set_mnt(mnt, root->sb); + if (!ret) + atomic_inc(&root->sb->s_active); + } + + out_unlock: + mutex_unlock(&container_mutex); + return ret; +} + +static struct file_system_type container_fs_type = { + .name = "container", + .get_sb = container_get_sb, + .kill_sb = kill_litter_super, +}; + +static inline struct container *__d_cont(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline struct cftype *__d_cft(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +/* + * Called with container_mutex held. Writes path of container into buf. + * Returns 0 on success, -errno on error. + */ +int container_path(const struct container *cont, char *buf, int buflen) +{ + char *start; + + start = buf + buflen; + + *--start = '\0'; + for (;;) { + int len = cont->dentry->d_name.len; + if ((start -= len) < buf) + return -ENAMETOOLONG; + memcpy(start, cont->dentry->d_name.name, len); + cont = cont->parent; + if (!cont) + break; + if (!cont->parent) + continue; + if (--start < buf) + return -ENAMETOOLONG; + *start = '/'; + } + memmove(buf, start, buf + buflen - start); + return 0; +} + +static void get_first_subsys(const struct container *cont, + struct container_subsys_state **css, int *subsys_id) +{ + const struct containerfs_root *root = cont->root; + const struct container_subsys *test_ss; + BUG_ON(list_empty(&root->subsys_list)); + test_ss = list_entry(root->subsys_list.next, + struct container_subsys, sibling); + if (css) { + *css = cont->subsys[test_ss->subsys_id]; + BUG_ON(!*css); + } + if (subsys_id) + *subsys_id = test_ss->subsys_id; +} + +/* + * Attach task 'tsk' to container 'cont' + * + * Call holding container_mutex. May take task_lock of + * the task 'pid' during call. + */ +static int attach_task(struct container *cont, struct task_struct *tsk) +{ + int retval = 0; + struct container_subsys *ss; + struct container *oldcont; + struct css_group *cg = tsk->containers; + struct css_group *newcg; + struct containerfs_root *root = cont->root; + int subsys_id; + + get_first_subsys(cont, NULL, &subsys_id); + + /* Nothing to do if the task is already in that container */ + oldcont = task_container(tsk, subsys_id); + if (cont == oldcont) + return 0; + + for_each_subsys(root, ss) { + if (ss->can_attach) { + retval = ss->can_attach(ss, cont, tsk); + if (retval) { + return retval; + } + } + } + + /* Locate or allocate a new css_group for this task, + * based on its final set of containers */ + newcg = find_css_group(cg, cont); + if (!newcg) { + return -ENOMEM; + } + + task_lock(tsk); + if (tsk->flags & PF_EXITING) { + task_unlock(tsk); + put_css_group(newcg); + return -ESRCH; + } + rcu_assign_pointer(tsk->containers, newcg); + if (!list_empty(&tsk->cg_list)) { + write_lock(&css_group_lock); + list_del(&tsk->cg_list); + list_add(&tsk->cg_list, &newcg->tasks); + write_unlock(&css_group_lock); + } + task_unlock(tsk); + + for_each_subsys(root, ss) { + if (ss->attach) { + ss->attach(ss, cont, oldcont, tsk); + } + } + set_bit(CONT_RELEASABLE, &oldcont->flags); + synchronize_rcu(); + put_css_group(cg); + return 0; +} + +/* + * Attach task with pid 'pid' to container 'cont'. Call with + * container_mutex, may take task_lock of task + */ +static int attach_task_by_pid(struct container *cont, char *pidbuf) +{ + pid_t pid; + struct task_struct *tsk; + int ret; + + if (sscanf(pidbuf, "%d", &pid) != 1) + return -EIO; + + if (pid) { + rcu_read_lock(); + tsk = find_task_by_pid(pid); + if (!tsk || tsk->flags & PF_EXITING) { + rcu_read_unlock(); + return -ESRCH; + } + get_task_struct(tsk); + rcu_read_unlock(); + + if ((current->euid) && (current->euid != tsk->uid) + && (current->euid != tsk->suid)) { + put_task_struct(tsk); + return -EACCES; + } + } else { + tsk = current; + get_task_struct(tsk); + } + + ret = attach_task(cont, tsk); + put_task_struct(tsk); + return ret; +} + +/* The various types of files and directories in a container file system */ + +enum container_filetype { + FILE_ROOT, + FILE_DIR, + FILE_TASKLIST, + FILE_NOTIFY_ON_RELEASE, + FILE_RELEASABLE, + FILE_RELEASE_AGENT, +}; + +static ssize_t container_common_file_write(struct container *cont, + struct cftype *cft, + struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) +{ + enum container_filetype type = cft->private; + char *buffer; + int retval = 0; + + if (nbytes >= PATH_MAX) + return -E2BIG; + + /* +1 for nul-terminator */ + buffer = kmalloc(nbytes + 1, GFP_KERNEL); + if (buffer == NULL) + return -ENOMEM; + + if (copy_from_user(buffer, userbuf, nbytes)) { + retval = -EFAULT; + goto out1; + } + buffer[nbytes] = 0; /* nul-terminate */ + + mutex_lock(&container_mutex); + + if (container_is_removed(cont)) { + retval = -ENODEV; + goto out2; + } + + switch (type) { + case FILE_TASKLIST: + retval = attach_task_by_pid(cont, buffer); + break; + case FILE_NOTIFY_ON_RELEASE: + clear_bit(CONT_RELEASABLE, &cont->flags); + if (simple_strtoul(buffer, NULL, 10) != 0) + set_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags); + else + clear_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags); + break; + case FILE_RELEASE_AGENT: + { + struct containerfs_root *root = cont->root; + if (nbytes < sizeof(root->release_agent_path)) { + /* We never write anything other than '\0' + * into the last char of release_agent_path, + * so it always remains a NUL-terminated + * string */ + strncpy(root->release_agent_path, buffer, nbytes); + root->release_agent_path[nbytes] = 0; + } else { + retval = -ENOSPC; + } + break; + } + default: + retval = -EINVAL; + goto out2; + } + + if (retval == 0) + retval = nbytes; +out2: + mutex_unlock(&container_mutex); +out1: + kfree(buffer); + return retval; +} + +static ssize_t container_file_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct cftype *cft = __d_cft(file->f_dentry); + struct container *cont = __d_cont(file->f_dentry->d_parent); + + if (!cft) + return -ENODEV; + if (!cft->write) + return -EINVAL; + + return cft->write(cont, cft, file, buf, nbytes, ppos); +} + +static ssize_t container_read_uint(struct container *cont, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos) +{ + char tmp[64]; + u64 val = cft->read_uint(cont, cft); + int len = sprintf(tmp, "%llu\n", (unsigned long long) val); + + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); +} + +static ssize_t container_common_file_read(struct container *cont, + struct cftype *cft, + struct file *file, + char __user *buf, + size_t nbytes, loff_t *ppos) +{ + enum container_filetype type = cft->private; + char *page; + ssize_t retval = 0; + char *s; + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) + return -ENOMEM; + + s = page; + + switch (type) { + case FILE_RELEASE_AGENT: + { + struct containerfs_root *root; + size_t n; + mutex_lock(&container_mutex); + root = cont->root; + n = strnlen(root->release_agent_path, + sizeof(root->release_agent_path)); + n = min(n, (size_t) PAGE_SIZE); + strncpy(s, root->release_agent_path, n); + mutex_unlock(&container_mutex); + s += n; + break; + } + default: + retval = -EINVAL; + goto out; + } + *s++ = '\n'; + + retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); +out: + free_page((unsigned long)page); + return retval; +} + +static ssize_t container_file_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct cftype *cft = __d_cft(file->f_dentry); + struct container *cont = __d_cont(file->f_dentry->d_parent); + + if (!cft) + return -ENODEV; + + if (cft->read) + return cft->read(cont, cft, file, buf, nbytes, ppos); + if (cft->read_uint) + return container_read_uint(cont, cft, file, buf, nbytes, ppos); + return -EINVAL; +} + +static int container_file_open(struct inode *inode, struct file *file) +{ + int err; + struct cftype *cft; + + err = generic_file_open(inode, file); + if (err) + return err; + + cft = __d_cft(file->f_dentry); + if (!cft) + return -ENODEV; + if (cft->open) + err = cft->open(inode, file); + else + err = 0; + + return err; +} + +static int container_file_release(struct inode *inode, struct file *file) +{ + struct cftype *cft = __d_cft(file->f_dentry); + if (cft->release) + return cft->release(inode, file); + return 0; +} + +/* + * container_rename - Only allow simple rename of directories in place. + */ +static int container_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + if (!S_ISDIR(old_dentry->d_inode->i_mode)) + return -ENOTDIR; + if (new_dentry->d_inode) + return -EEXIST; + if (old_dir != new_dir) + return -EIO; + return simple_rename(old_dir, old_dentry, new_dir, new_dentry); +} + +static struct file_operations container_file_operations = { + .read = container_file_read, + .write = container_file_write, + .llseek = generic_file_llseek, + .open = container_file_open, + .release = container_file_release, +}; + +static struct inode_operations container_dir_inode_operations = { + .lookup = simple_lookup, + .mkdir = container_mkdir, + .rmdir = container_rmdir, + .rename = container_rename, +}; + +static int container_create_file(struct dentry *dentry, int mode, + struct super_block *sb) +{ + struct inode *inode; + + if (!dentry) + return -ENOENT; + if (dentry->d_inode) + return -EEXIST; + + inode = container_new_inode(mode, sb); + if (!inode) + return -ENOMEM; + + if (S_ISDIR(mode)) { + inode->i_op = &container_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + + /* start with the directory inode held, so that we can + * populate it without racing with another mkdir */ + mutex_lock(&inode->i_mutex); + } else if (S_ISREG(mode)) { + inode->i_size = 0; + inode->i_fop = &container_file_operations; + } + + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + return 0; +} + +/* + * container_create_dir - create a directory for an object. + * cont: the container we create the directory for. + * It must have a valid ->parent field + * And we are going to fill its ->dentry field. + * name: The name to give to the container directory. Will be copied. + * mode: mode to set on new directory. + */ +static int container_create_dir(struct container *cont, struct dentry *dentry, + int mode) +{ + struct dentry *parent; + int error = 0; + + parent = cont->parent->dentry; + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + error = container_create_file(dentry, S_IFDIR | mode, cont->root->sb); + if (!error) { + dentry->d_fsdata = cont; + inc_nlink(parent->d_inode); + cont->dentry = dentry; + } + dput(dentry); + + return error; +} + +int container_add_file(struct container *cont, const struct cftype *cft) +{ + struct dentry *dir = cont->dentry; + struct dentry *dentry; + int error; + + BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); + dentry = container_get_dentry(dir, cft->name); + if (!IS_ERR(dentry)) { + error = container_create_file(dentry, 0644 | S_IFREG, + cont->root->sb); + if (!error) + dentry->d_fsdata = (void *)cft; + dput(dentry); + } else + error = PTR_ERR(dentry); + return error; +} + +int container_add_files(struct container *cont, const struct cftype cft[], + int count) +{ + int i, err; + for (i = 0; i < count; i++) { + err = container_add_file(cont, &cft[i]); + if (err) + return err; + } + return 0; +} + +/* Count the number of tasks in a container. */ + +int container_task_count(const struct container *cont) +{ + int count = 0; + struct list_head *l; + + read_lock(&css_group_lock); + l = cont->css_groups.next; + while (l != &cont->css_groups) { + struct cg_container_link *link = + list_entry(l, struct cg_container_link, cont_link_list); + count += atomic_read(&link->cg->ref.refcount); + l = l->next; + } + read_unlock(&css_group_lock); + return count; +} + +/* Advance a list_head iterator pointing at a cg_container_link's */ +static void container_advance_iter(struct container *cont, + struct container_iter *it) +{ + struct list_head *l = it->cg_link; + struct cg_container_link *link; + struct css_group *cg; + + /* Advance to the next non-empty css_group */ + do { + l = l->next; + if (l == &cont->css_groups) { + it->cg_link = NULL; + return; + } + link = list_entry(l, struct cg_container_link, cont_link_list); + cg = link->cg; + } while (list_empty(&cg->tasks)); + it->cg_link = l; + it->task = cg->tasks.next; +} + +void container_iter_start(struct container *cont, struct container_iter *it) +{ + read_lock(&css_group_lock); + it->cg_link = &cont->css_groups; + container_advance_iter(cont, it); +} + +struct task_struct *container_iter_next(struct container *cont, + struct container_iter *it) +{ + struct task_struct *res; + struct list_head *l = it->task; + + /* If the iterator cg is NULL, we have no tasks */ + if (!it->cg_link) + return NULL; + res = list_entry(l, struct task_struct, cg_list); + /* Advance iterator to find next entry */ + l = l->next; + if (l == &res->containers->tasks) { + /* We reached the end of this task list - move on to + * the next cg_container_link */ + container_advance_iter(cont, it); + } else { + it->task = l; + } + return res; +} + +void container_iter_end(struct container *cont, struct container_iter *it) +{ + read_unlock(&css_group_lock); +} + +/* + * Stuff for reading the 'tasks' file. + * + * Reading this file can return large amounts of data if a container has + * *lots* of attached tasks. So it may need several calls to read(), + * but we cannot guarantee that the information we produce is correct + * unless we produce it entirely atomically. + * + * Upon tasks file open(), a struct ctr_struct is allocated, that + * will have a pointer to an array (also allocated here). The struct + * ctr_struct * is stored in file->private_data. Its resources will + * be freed by release() when the file is closed. The array is used + * to sprintf the PIDs and then used by read(). + */ +struct ctr_struct { + char *buf; + int bufsz; +}; + +/* + * Load into 'pidarray' up to 'npids' of the tasks using container + * 'cont'. Return actual number of pids loaded. No need to + * task_lock(p) when reading out p->container, since we're in an RCU + * read section, so the css_group can't go away, and is + * immutable after creation. + */ +static int pid_array_load(pid_t *pidarray, int npids, struct container *cont) +{ + int n = 0; + struct container_iter it; + struct task_struct *tsk; + container_iter_start(cont, &it); + while ((tsk = container_iter_next(cont, &it))) { + if (unlikely(n == npids)) + break; + pidarray[n++] = pid_nr(task_pid(tsk)); + } + container_iter_end(cont, &it); + return n; +} + +static int cmppid(const void *a, const void *b) +{ + return *(pid_t *)a - *(pid_t *)b; +} + +/* + * Convert array 'a' of 'npids' pid_t's to a string of newline separated + * decimal pids in 'buf'. Don't write more than 'sz' chars, but return + * count 'cnt' of how many chars would be written if buf were large enough. + */ +static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) +{ + int cnt = 0; + int i; + + for (i = 0; i < npids; i++) + cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); + return cnt; +} + +/* + * Handle an open on 'tasks' file. Prepare a buffer listing the + * process id's of tasks currently attached to the container being opened. + * + * Does not require any specific container mutexes, and does not take any. + */ +static int container_tasks_open(struct inode *unused, struct file *file) +{ + struct container *cont = __d_cont(file->f_dentry->d_parent); + struct ctr_struct *ctr; + pid_t *pidarray; + int npids; + char c; + + if (!(file->f_mode & FMODE_READ)) + return 0; + + ctr = kmalloc(sizeof(*ctr), GFP_KERNEL); + if (!ctr) + goto err0; + + /* + * If container gets more users after we read count, we won't have + * enough space - tough. This race is indistinguishable to the + * caller from the case that the additional container users didn't + * show up until sometime later on. + */ + npids = container_task_count(cont); + if (npids) { + pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); + if (!pidarray) + goto err1; + + npids = pid_array_load(pidarray, npids, cont); + sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); + + /* Call pid_array_to_buf() twice, first just to get bufsz */ + ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1; + ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL); + if (!ctr->buf) + goto err2; + ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids); + + kfree(pidarray); + } else { + ctr->buf = 0; + ctr->bufsz = 0; + } + file->private_data = ctr; + return 0; + +err2: + kfree(pidarray); +err1: + kfree(ctr); +err0: + return -ENOMEM; +} + +static ssize_t container_tasks_read(struct container *cont, + struct cftype *cft, + struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct ctr_struct *ctr = file->private_data; + + return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); +} + +static int container_tasks_release(struct inode *unused_inode, + struct file *file) +{ + struct ctr_struct *ctr; + + if (file->f_mode & FMODE_READ) { + ctr = file->private_data; + kfree(ctr->buf); + kfree(ctr); + } + return 0; +} + +static u64 container_read_notify_on_release(struct container *cont, + struct cftype *cft) +{ + return notify_on_release(cont); +} + +static u64 container_read_releasable(struct container *cont, struct cftype *cft) +{ + return test_bit(CONT_RELEASABLE, &cont->flags); +} + +/* + * for the common functions, 'private' gives the type of file + */ +static struct cftype files[] = { + { + .name = "tasks", + .open = container_tasks_open, + .read = container_tasks_read, + .write = container_common_file_write, + .release = container_tasks_release, + .private = FILE_TASKLIST, + }, + + { + .name = "notify_on_release", + .read_uint = container_read_notify_on_release, + .write = container_common_file_write, + .private = FILE_NOTIFY_ON_RELEASE, + }, + + { + .name = "releasable", + .read_uint = container_read_releasable, + .private = FILE_RELEASABLE, + } +}; + +static struct cftype cft_release_agent = { + .name = "release_agent", + .read = container_common_file_read, + .write = container_common_file_write, + .private = FILE_RELEASE_AGENT, +}; + +static int container_populate_dir(struct container *cont) +{ + int err; + struct container_subsys *ss; + + /* First clear out any existing files */ + container_clear_directory(cont->dentry); + + err = container_add_files(cont, files, ARRAY_SIZE(files)); + if (err < 0) + return err; + + if (cont == cont->top_container) { + if ((err = container_add_file(cont, &cft_release_agent)) < 0) + return err; + } + + for_each_subsys(cont->root, ss) { + if (ss->populate && (err = ss->populate(ss, cont)) < 0) + return err; + } + + return 0; +} + +static void init_container_css(struct container_subsys *ss, + struct container *cont) +{ + struct container_subsys_state *css = cont->subsys[ss->subsys_id]; + css->container = cont; + atomic_set(&css->refcnt, 0); +} + +/* + * container_create - create a container + * parent: container that will be parent of the new container. + * name: name of the new container. Will be strcpy'ed. + * mode: mode to set on new inode + * + * Must be called with the mutex on the parent inode held + */ + +static long container_create(struct container *parent, struct dentry *dentry, + int mode) +{ + struct container *cont; + struct containerfs_root *root = parent->root; + int err = 0; + struct container_subsys *ss; + struct super_block *sb = root->sb; + + cont = kzalloc(sizeof(*cont), GFP_KERNEL); + if (!cont) + return -ENOMEM; + + /* Grab a reference on the superblock so the hierarchy doesn't + * get deleted on unmount if there are child containers. This + * can be done outside container_mutex, since the sb can't + * disappear while someone has an open control file on the + * fs */ + atomic_inc(&sb->s_active); + + mutex_lock(&container_mutex); + + cont->flags = 0; + INIT_LIST_HEAD(&cont->sibling); + INIT_LIST_HEAD(&cont->children); + INIT_LIST_HEAD(&cont->css_groups); + INIT_LIST_HEAD(&cont->release_list); + + cont->parent = parent; + cont->root = parent->root; + cont->top_container = parent->top_container; + + for_each_subsys(root, ss) { + err = ss->create(ss, cont); + if (err) + goto err_destroy; + init_container_css(ss, cont); + } + + list_add(&cont->sibling, &cont->parent->children); + root->number_of_containers++; + + err = container_create_dir(cont, dentry, mode); + if (err < 0) + goto err_remove; + + /* The container directory was pre-locked for us */ + BUG_ON(!mutex_is_locked(&cont->dentry->d_inode->i_mutex)); + + err = container_populate_dir(cont); + /* If err < 0, we have a half-filled directory - oh well ;) */ + + mutex_unlock(&container_mutex); + mutex_unlock(&cont->dentry->d_inode->i_mutex); + + return 0; + + err_remove: + + list_del(&cont->sibling); + root->number_of_containers--; + + err_destroy: + + for_each_subsys(root, ss) { + if (cont->subsys[ss->subsys_id]) + ss->destroy(ss, cont); + } + + mutex_unlock(&container_mutex); + + /* Release the reference count that we took on the superblock */ + deactivate_super(sb); + + kfree(cont); + return err; +} + +static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct container *c_parent = dentry->d_parent->d_fsdata; + + /* the vfs holds inode->i_mutex already */ + return container_create(c_parent, dentry, mode | S_IFDIR); +} + +static inline int container_has_css_refs(struct container *cont) +{ + /* Check the reference count on each subsystem. Since we + * already established that there are no tasks in the + * container, if the css refcount is also 0, then there should + * be no outstanding references, so the subsystem is safe to + * destroy */ + struct container_subsys *ss; + for_each_subsys(cont->root, ss) { + struct container_subsys_state *css; + css = cont->subsys[ss->subsys_id]; + if (atomic_read(&css->refcnt)) { + return 1; + } + } + return 0; +} + +static int container_rmdir(struct inode *unused_dir, struct dentry *dentry) +{ + struct container *cont = dentry->d_fsdata; + struct dentry *d; + struct container *parent; + struct container_subsys *ss; + struct super_block *sb; + struct containerfs_root *root; + + /* the vfs holds both inode->i_mutex already */ + + mutex_lock(&container_mutex); + if (atomic_read(&cont->count) != 0) { + mutex_unlock(&container_mutex); + return -EBUSY; + } + if (!list_empty(&cont->children)) { + mutex_unlock(&container_mutex); + return -EBUSY; + } + + parent = cont->parent; + root = cont->root; + sb = root->sb; + + if (container_has_css_refs(cont)) { + mutex_unlock(&container_mutex); + return -EBUSY; + } + + for_each_subsys(root, ss) { + if (cont->subsys[ss->subsys_id]) + ss->destroy(ss, cont); + } + + set_bit(CONT_REMOVED, &cont->flags); + /* delete my sibling from parent->children */ + list_del(&cont->sibling); + spin_lock(&cont->dentry->d_lock); + d = dget(cont->dentry); + cont->dentry = NULL; + spin_unlock(&d->d_lock); + + container_d_remove_dir(d); + dput(d); + root->number_of_containers--; + + if (!list_empty(&cont->release_list)) + list_del(&cont->release_list); + set_bit(CONT_RELEASABLE, &parent->flags); + check_for_release(parent); + + mutex_unlock(&container_mutex); + /* Drop the active superblock reference that we took when we + * created the container */ + deactivate_super(sb); + return 0; +} + +static void container_init_subsys(struct container_subsys *ss) +{ + int retval; + struct container_subsys_state *css; + struct list_head *l; + printk(KERN_ERR "Initializing container subsys %s\n", ss->name); + + /* Create the top container state for this subsystem */ + ss->root = &rootnode; + retval = ss->create(ss, dummytop); + BUG_ON(retval); + BUG_ON(!dummytop->subsys[ss->subsys_id]); + init_container_css(ss, dummytop); + css = dummytop->subsys[ss->subsys_id]; + + /* Update all container groups to contain a subsys + * pointer to this state - since the subsystem is + * newly registered, all tasks and hence all container + * groups are in the subsystem's top container. */ + write_lock(&css_group_lock); + l = &init_css_group.list; + do { + struct css_group *cg = + list_entry(l, struct css_group, list); + cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; + l = l->next; + } while (l != &init_css_group.list); + write_unlock(&css_group_lock); + + /* If this subsystem requested that it be notified with fork + * events, we should send it one now for every process in the + * system */ + if (ss->fork) { + struct task_struct *g, *p; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + ss->fork(ss, p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + } + + need_forkexit_callback |= ss->fork || ss->exit; + + ss->active = 1; +} + +/** + * container_init_early - initialize containers at system boot, and + * initialize any subsystems that request early init. + */ +int __init container_init_early(void) +{ + int i; + kref_init(&init_css_group.ref); + kref_get(&init_css_group.ref); + INIT_LIST_HEAD(&init_css_group.list); + INIT_LIST_HEAD(&init_css_group.cg_links); + INIT_LIST_HEAD(&init_css_group.tasks); + css_group_count = 1; + init_container_root(&rootnode); + init_task.containers = &init_css_group; + + init_css_group_link.cg = &init_css_group; + list_add(&init_css_group_link.cont_link_list, + &rootnode.top_container.css_groups); + list_add(&init_css_group_link.cg_link_list, + &init_css_group.cg_links); + + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container_subsys *ss = subsys[i]; + + BUG_ON(!ss->name); + BUG_ON(strlen(ss->name) > MAX_CONTAINER_TYPE_NAMELEN); + BUG_ON(!ss->create); + BUG_ON(!ss->destroy); + if (ss->subsys_id != i) { + printk(KERN_ERR "Subsys %s id == %d\n", + ss->name, ss->subsys_id); + BUG(); + } + + if (ss->early_init) + container_init_subsys(ss); + } + return 0; +} + +/** + * container_init - register container filesystem and /proc file, and + * initialize any subsystems that didn't request early init. + */ +int __init container_init(void) +{ + int err; + int i; + struct proc_dir_entry *entry; + + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container_subsys *ss = subsys[i]; + if (!ss->early_init) + container_init_subsys(ss); + } + + err = register_filesystem(&container_fs_type); + if (err < 0) + goto out; + + entry = create_proc_entry("containers", 0, NULL); + if (entry) + entry->proc_fops = &proc_containerstats_operations; + +out: + return err; +} + +/* + * proc_container_show() + * - Print task's container paths into seq_file, one line for each hierarchy + * - Used for /proc//container. + * - No need to task_lock(tsk) on this tsk->container reference, as it + * doesn't really matter if tsk->container changes after we read it, + * and we take container_mutex, keeping attach_task() from changing it + * anyway. No need to check that tsk->container != NULL, thanks to + * the_top_container_hack in container_exit(), which sets an exiting tasks + * container to top_container. + */ + +/* TODO: Use a proper seq_file iterator */ +static int proc_container_show(struct seq_file *m, void *v) +{ + struct pid *pid; + struct task_struct *tsk; + char *buf; + int retval; + struct containerfs_root *root; + + retval = -ENOMEM; + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + goto out; + + retval = -ESRCH; + pid = m->private; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) + goto out_free; + + retval = 0; + + mutex_lock(&container_mutex); + + for_each_root(root) { + struct container_subsys *ss; + struct container *cont; + int subsys_id; + int count = 0; + + /* Skip this hierarchy if it has no active subsystems */ + if (!root->subsys_bits) + continue; + for_each_subsys(root, ss) + seq_printf(m, "%s%s", count++ ? "," : "", ss->name); + seq_putc(m, ':'); + get_first_subsys(&root->top_container, NULL, &subsys_id); + cont = task_container(tsk, subsys_id); + retval = container_path(cont, buf, PAGE_SIZE); + if (retval < 0) + goto out_unlock; + seq_puts(m, buf); + seq_putc(m, '\n'); + } + +out_unlock: + mutex_unlock(&container_mutex); + put_task_struct(tsk); +out_free: + kfree(buf); +out: + return retval; +} + +static int container_open(struct inode *inode, struct file *file) +{ + struct pid *pid = PROC_I(inode)->pid; + return single_open(file, proc_container_show, pid); +} + +struct file_operations proc_container_operations = { + .open = container_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* Display information about each subsystem and each hierarchy */ +static int proc_containerstats_show(struct seq_file *m, void *v) +{ + int i; + struct containerfs_root *root; + + mutex_lock(&container_mutex); + seq_puts(m, "Hierarchies:\n"); + for_each_root(root) { + struct container_subsys *ss; + int first = 1; + seq_printf(m, "%p: bits=%lx containers=%d (", root, + root->subsys_bits, root->number_of_containers); + for_each_subsys(root, ss) { + seq_printf(m, "%s%s", first ? "" : ", ", ss->name); + first = false; + } + seq_putc(m, ')'); + if (root->sb) { + seq_printf(m, " s_active=%d", + atomic_read(&root->sb->s_active)); + } + seq_putc(m, '\n'); + } + seq_puts(m, "Subsystems:\n"); + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container_subsys *ss = subsys[i]; + seq_printf(m, "%d: name=%s hierarchy=%p\n", + i, ss->name, ss->root); + } + seq_printf(m, "Container groups: %d\n", css_group_count); + mutex_unlock(&container_mutex); + return 0; +} + +static int containerstats_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_containerstats_show, 0); +} + +static struct file_operations proc_containerstats_operations = { + .open = containerstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * container_fork - attach newly forked task to its parents container. + * @tsk: pointer to task_struct of forking parent process. + * + * Description: A task inherits its parent's container at fork(). + * + * A pointer to the shared css_group was automatically copied in + * fork.c by dup_task_struct(). However, we ignore that copy, since + * it was not made under the protection of RCU or container_mutex, so + * might no longer be a valid container pointer. attach_task() might + * have already changed current->containers, allowing the previously + * referenced container group to be removed and freed. + * + * At the point that container_fork() is called, 'current' is the parent + * task, and the passed argument 'child' points to the child task. + */ +void container_fork(struct task_struct *child) +{ + write_lock(&css_group_lock); + child->containers = current->containers; + get_css_group(child->containers); + list_add(&child->cg_list, &child->containers->tasks); + write_unlock(&css_group_lock); +} + +/** + * container_fork_callbacks - called on a new task very soon before + * adding it to the tasklist. No need to take any locks since no-one + * can be operating on this task + */ +void container_fork_callbacks(struct task_struct *child) +{ + if (need_forkexit_callback) { + int i; + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container_subsys *ss = subsys[i]; + if (ss->fork) + ss->fork(ss, child); + } + } +} + +/** + * container_exit - detach container from exiting task + * @tsk: pointer to task_struct of exiting process + * + * Description: Detach container from @tsk and release it. + * + * Note that containers marked notify_on_release force every task in + * them to take the global container_mutex mutex when exiting. + * This could impact scaling on very large systems. Be reluctant to + * use notify_on_release containers where very high task exit scaling + * is required on large systems. + * + * the_top_container_hack: + * + * Set the exiting tasks container to the root container (top_container). + * + * We call container_exit() while the task is still competent to + * handle notify_on_release(), then leave the task attached to the + * root container in each hierarchy for the remainder of its exit. + * + * To do this properly, we would increment the reference count on + * top_container, and near the very end of the kernel/exit.c do_exit() + * code we would add a second container function call, to drop that + * reference. This would just create an unnecessary hot spot on + * the top_container reference count, to no avail. + * + * Normally, holding a reference to a container without bumping its + * count is unsafe. The container could go away, or someone could + * attach us to a different container, decrementing the count on + * the first container that we never incremented. But in this case, + * top_container isn't going away, and either task has PF_EXITING set, + * which wards off any attach_task() attempts, or task is a failed + * fork, never visible to attach_task. + * + */ +void container_exit(struct task_struct *tsk, int run_callbacks) +{ + int i; + struct css_group *cg = NULL; + + if (run_callbacks && need_forkexit_callback) { + for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { + struct container_subsys *ss = subsys[i]; + if (ss->exit) + ss->exit(ss, tsk); + } + } + + /* Reassign the task to the init_css_group. */ + task_lock(tsk); + write_lock(&css_group_lock); + list_del(&tsk->cg_list); + write_unlock(&css_group_lock); + + cg = tsk->containers; + tsk->containers = &init_css_group; + task_unlock(tsk); + if (cg) + put_css_group_taskexit(cg); +} + +/** + * container_clone - duplicate the current container in the hierarchy + * that the given subsystem is attached to, and move this task into + * the new child + */ +int container_clone(struct task_struct *tsk, struct container_subsys *subsys) +{ + struct dentry *dentry; + int ret = 0; + char nodename[MAX_CONTAINER_TYPE_NAMELEN]; + struct container *parent, *child; + struct inode *inode; + struct css_group *cg; + struct containerfs_root *root; + struct container_subsys *ss; + + /* We shouldn't be called by an unregistered subsystem */ + BUG_ON(!subsys->active); + + /* First figure out what hierarchy and container we're dealing + * with, and pin them so we can drop container_mutex */ + mutex_lock(&container_mutex); + again: + root = subsys->root; + if (root == &rootnode) { + printk(KERN_INFO + "Not cloning container for unused subsystem %s\n", + subsys->name); + mutex_unlock(&container_mutex); + return 0; + } + cg = tsk->containers; + parent = task_container(tsk, subsys->subsys_id); + + snprintf(nodename, MAX_CONTAINER_TYPE_NAMELEN, "node_%d", tsk->pid); + + /* Pin the hierarchy */ + atomic_inc(&parent->root->sb->s_active); + + /* Keep the container alive */ + get_css_group(cg); + mutex_unlock(&container_mutex); + + /* Now do the VFS work to create a container */ + inode = parent->dentry->d_inode; + + /* Hold the parent directory mutex across this operation to + * stop anyone else deleting the new container */ + mutex_lock(&inode->i_mutex); + dentry = container_get_dentry(parent->dentry, nodename); + if (IS_ERR(dentry)) { + printk(KERN_INFO + "Couldn't allocate dentry for %s: %ld\n", nodename, + PTR_ERR(dentry)); + ret = PTR_ERR(dentry); + goto out_release; + } + + /* Create the container directory, which also creates the container */ + ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755, NULL); + child = __d_cont(dentry); + dput(dentry); + if (ret) { + printk(KERN_INFO + "Failed to create container %s: %d\n", nodename, + ret); + goto out_release; + } + + if (!child) { + printk(KERN_INFO + "Couldn't find new container %s\n", nodename); + ret = -ENOMEM; + goto out_release; + } + + /* The container now exists. Retake container_mutex and check + * that we're still in the same state that we thought we + * were. */ + mutex_lock(&container_mutex); + if ((root != subsys->root) || + (parent != task_container(tsk, subsys->subsys_id))) { + /* Aargh, we raced ... */ + mutex_unlock(&inode->i_mutex); + put_css_group(cg); + + deactivate_super(parent->root->sb); + /* The container is still accessible in the VFS, but + * we're not going to try to rmdir() it at this + * point. */ + printk(KERN_INFO + "Race in container_clone() - leaking container %s\n", + nodename); + goto again; + } + + /* do any required auto-setup */ + for_each_subsys(root, ss) { + if (ss->post_clone) + ss->post_clone(ss, child); + } + + /* All seems fine. Finish by moving the task into the new container */ + ret = attach_task(child, tsk); + mutex_unlock(&container_mutex); + + out_release: + mutex_unlock(&inode->i_mutex); + + mutex_lock(&container_mutex); + put_css_group(cg); + mutex_unlock(&container_mutex); + deactivate_super(parent->root->sb); + return ret; +} + +/* See if "cont" is a descendant of the current task's container in + * the appropriate hierarchy */ + +int container_is_descendant(const struct container *cont) +{ + int ret; + struct container *target; + int subsys_id; + + if (cont == dummytop) + return 1; + get_first_subsys(cont, NULL, &subsys_id); + target = task_container(current, subsys_id); + while (cont != target && cont!= cont->top_container) { + cont = cont->parent; + } + ret = (cont == target); + return ret; +} + +static void check_for_release(struct container *cont) +{ + BUG_ON(!mutex_is_locked(&container_mutex)); + if (container_is_releasable(cont) && !atomic_read(&cont->count) + && list_empty(&cont->children) && !container_has_css_refs(cont)) { + /* Container is currently removeable. If it's not + * already queued for a userspace notification, queue + * it now */ + if (list_empty(&cont->release_list)) { + list_add(&cont->release_list, &release_list); + schedule_work(&release_agent_work); + } + } +} + +void css_put(struct container_subsys_state *css) +{ + struct container *cont = css->container; + if (notify_on_release(cont)) { + mutex_lock(&container_mutex); + set_bit(CONT_RELEASABLE, &cont->flags); + if (atomic_dec_and_test(&css->refcnt)) { + check_for_release(cont); + } + mutex_unlock(&container_mutex); + } else { + atomic_dec(&css->refcnt); + } +} + +void container_set_release_agent_path(struct container_subsys *ss, + const char *path) +{ + mutex_lock(&container_mutex); + strcpy(ss->root->release_agent_path, path); + mutex_unlock(&container_mutex); +} + +/* + * Notify userspace when a container is released, by running the + * configured release agent with the name of the container (path + * relative to the root of container file system) as the argument. + * + * Most likely, this user command will try to rmdir this container. + * + * This races with the possibility that some other task will be + * attached to this container before it is removed, or that some other + * user task will 'mkdir' a child container of this container. That's ok. + * The presumed 'rmdir' will fail quietly if this container is no longer + * unused, and this container will be reprieved from its death sentence, + * to continue to serve a useful existence. Next time it's released, + * we will get notified again, if it still has 'notify_on_release' set. + * + * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which + * means only wait until the task is successfully execve()'d. The + * separate release agent task is forked by call_usermodehelper(), + * then control in this thread returns here, without waiting for the + * release agent task. We don't bother to wait because the caller of + * this routine has no use for the exit status of the release agent + * task, so no sense holding our caller up for that. + * + */ + +static void container_release_agent(struct work_struct *work) +{ + BUG_ON(work != &release_agent_work); + mutex_lock(&container_mutex); + while (!list_empty(&release_list)) { + char *argv[3], *envp[3]; + int i; + char *pathbuf; + struct container *cont = list_entry(release_list.next, + struct container, + release_list); + list_del_init(&cont->release_list); + + pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!pathbuf) + continue; + + if (container_path(cont, pathbuf, PAGE_SIZE) < 0) { + kfree(pathbuf); + continue; + } + + i = 0; + argv[i++] = cont->root->release_agent_path; + argv[i++] = (char *)pathbuf; + argv[i] = NULL; + + i = 0; + /* minimal command environment */ + envp[i++] = "HOME=/"; + envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[i] = NULL; + + /* Drop the lock while we invoke the usermode helper, + * since the exec could involve hitting disk and hence + * be a slow process */ + mutex_unlock(&container_mutex); + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + kfree(pathbuf); + mutex_lock(&container_mutex); + } + mutex_unlock(&container_mutex); +} diff -Nurb linux-2.6.22-570/kernel/container_debug.c linux-2.6.22-591/kernel/container_debug.c --- linux-2.6.22-570/kernel/container_debug.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/container_debug.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,89 @@ +/* + * kernel/ccontainer_debug.c - Example container subsystem that + * exposes debug info + * + * Copyright (C) Google Inc, 2007 + * + * Developed by Paul Menage (menage@google.com) + * + */ + +#include +#include + +static int debug_create(struct container_subsys *ss, struct container *cont) +{ + struct container_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); + if (!css) + return -ENOMEM; + cont->subsys[debug_subsys_id] = css; + return 0; +} + +static void debug_destroy(struct container_subsys *ss, struct container *cont) +{ + kfree(cont->subsys[debug_subsys_id]); +} + +static u64 container_refcount_read(struct container *cont, struct cftype *cft) +{ + return atomic_read(&cont->count); +} + +static u64 taskcount_read(struct container *cont, struct cftype *cft) +{ + u64 count; + container_lock(); + count = container_task_count(cont); + container_unlock(); + return count; +} + +static u64 current_css_group_read(struct container *cont, struct cftype *cft) +{ + return (u64) current->containers; +} + +static u64 current_css_group_refcount_read(struct container *cont, + struct cftype *cft) +{ + u64 count; + rcu_read_lock(); + count = atomic_read(¤t->containers->ref.refcount); + rcu_read_unlock(); + return count; +} + +static struct cftype files[] = { + { + .name = "debug.container_refcount", + .read_uint = container_refcount_read, + }, + { + .name = "debug.taskcount", + .read_uint = taskcount_read, + }, + + { + .name = "debug.current_css_group", + .read_uint = current_css_group_read, + }, + + { + .name = "debug.current_css_group_refcount", + .read_uint = current_css_group_refcount_read, + }, +}; + +static int debug_populate(struct container_subsys *ss, struct container *cont) +{ + return container_add_files(cont, files, ARRAY_SIZE(files)); +} + +struct container_subsys debug_subsys = { + .name = "debug", + .create = debug_create, + .destroy = debug_destroy, + .populate = debug_populate, + .subsys_id = debug_subsys_id, +}; diff -Nurb linux-2.6.22-570/kernel/cpu_acct.c linux-2.6.22-591/kernel/cpu_acct.c --- linux-2.6.22-570/kernel/cpu_acct.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/cpu_acct.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,185 @@ +/* + * kernel/cpu_acct.c - CPU accounting container subsystem + * + * Copyright (C) Google Inc, 2006 + * + * Developed by Paul Menage (menage@google.com) and Balbir Singh + * (balbir@in.ibm.com) + * + */ + +/* + * Example container subsystem for reporting total CPU usage of tasks in a + * container, along with percentage load over a time interval + */ + +#include +#include +#include +#include + +struct cpuacct { + struct container_subsys_state css; + spinlock_t lock; + /* total time used by this class */ + cputime64_t time; + + /* time when next load calculation occurs */ + u64 next_interval_check; + + /* time used in current period */ + cputime64_t current_interval_time; + + /* time used in last period */ + cputime64_t last_interval_time; +}; + +struct container_subsys cpuacct_subsys; + +static inline struct cpuacct *container_ca(struct container *cont) +{ + return container_of(container_subsys_state(cont, cpuacct_subsys_id), + struct cpuacct, css); +} + +static inline struct cpuacct *task_ca(struct task_struct *task) +{ + return container_of(task_subsys_state(task, cpuacct_subsys_id), + struct cpuacct, css); +} + +#define INTERVAL (HZ * 10) + +static inline u64 next_interval_boundary(u64 now) { + /* calculate the next interval boundary beyond the + * current time */ + do_div(now, INTERVAL); + return (now + 1) * INTERVAL; +} + +static int cpuacct_create(struct container_subsys *ss, struct container *cont) +{ + struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) + return -ENOMEM; + spin_lock_init(&ca->lock); + ca->next_interval_check = next_interval_boundary(get_jiffies_64()); + cont->subsys[cpuacct_subsys_id] = &ca->css; + return 0; +} + +static void cpuacct_destroy(struct container_subsys *ss, + struct container *cont) +{ + kfree(container_ca(cont)); +} + +/* Lazily update the load calculation if necessary. Called with ca locked */ +static void cpuusage_update(struct cpuacct *ca) +{ + u64 now = get_jiffies_64(); + /* If we're not due for an update, return */ + if (ca->next_interval_check > now) + return; + + if (ca->next_interval_check <= (now - INTERVAL)) { + /* If it's been more than an interval since the last + * check, then catch up - the last interval must have + * been zero load */ + ca->last_interval_time = 0; + ca->next_interval_check = next_interval_boundary(now); + } else { + /* If a steal takes the last interval time negative, + * then we just ignore it */ + if ((s64)ca->current_interval_time > 0) { + ca->last_interval_time = ca->current_interval_time; + } else { + ca->last_interval_time = 0; + } + ca->next_interval_check += INTERVAL; + } + ca->current_interval_time = 0; +} + +static u64 cpuusage_read(struct container *cont, + struct cftype *cft) +{ + struct cpuacct *ca = container_ca(cont); + u64 time; + + spin_lock_irq(&ca->lock); + cpuusage_update(ca); + time = cputime64_to_jiffies64(ca->time); + spin_unlock_irq(&ca->lock); + + /* Convert 64-bit jiffies to seconds */ + time *= 1000; + do_div(time, HZ); + return time; +} + +static u64 load_read(struct container *cont, + struct cftype *cft) +{ + struct cpuacct *ca = container_ca(cont); + u64 time; + + /* Find the time used in the previous interval */ + spin_lock_irq(&ca->lock); + cpuusage_update(ca); + time = cputime64_to_jiffies64(ca->last_interval_time); + spin_unlock_irq(&ca->lock); + + /* Convert time to a percentage, to give the load in the + * previous period */ + time *= 100; + do_div(time, INTERVAL); + + return time; +} + +static struct cftype files[] = { + { + .name = "cpuacct.usage", + .read_uint = cpuusage_read, + }, + { + .name = "cpuacct.load", + .read_uint = load_read, + } +}; + +static int cpuacct_populate(struct container_subsys *ss, + struct container *cont) +{ + return container_add_files(cont, files, ARRAY_SIZE(files)); +} + +void cpuacct_charge(struct task_struct *task, cputime_t cputime) +{ + + struct cpuacct *ca; + unsigned long flags; + + if (!cpuacct_subsys.active) + return; + rcu_read_lock(); + ca = task_ca(task); + if (ca) { + spin_lock_irqsave(&ca->lock, flags); + cpuusage_update(ca); + ca->time = cputime64_add(ca->time, cputime); + ca->current_interval_time = + cputime64_add(ca->current_interval_time, cputime); + spin_unlock_irqrestore(&ca->lock, flags); + } + rcu_read_unlock(); +} + +struct container_subsys cpuacct_subsys = { + .name = "cpuacct", + .create = cpuacct_create, + .destroy = cpuacct_destroy, + .populate = cpuacct_populate, + .subsys_id = cpuacct_subsys_id, +}; diff -Nurb linux-2.6.22-570/kernel/cpuset.c linux-2.6.22-591/kernel/cpuset.c --- linux-2.6.22-570/kernel/cpuset.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/cpuset.c 2007-12-21 15:36:12.000000000 -0500 @@ -5,6 +5,7 @@ * * Copyright (C) 2003 BULL SA. * Copyright (C) 2004-2006 Silicon Graphics, Inc. + * Copyright (C) 2006 Google, Inc * * Portions derived from Patrick Mochel's sysfs code. * sysfs is Copyright (c) 2001-3 Patrick Mochel @@ -12,6 +13,7 @@ * 2003-10-10 Written by Simon Derr. * 2003-10-22 Updates by Stephen Hemminger. * 2004 May-July Rework by Paul Jackson. + * 2006 Rework by Paul Menage to use generic containers * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of the Linux @@ -53,8 +55,6 @@ #include #include -#define CPUSET_SUPER_MAGIC 0x27e0eb - /* * Tracks how many cpusets are currently defined in system. * When there is only one cpuset (the root cpuset) we can @@ -62,6 +62,10 @@ */ int number_of_cpusets __read_mostly; +/* Retrieve the cpuset from a container */ +struct container_subsys cpuset_subsys; +struct cpuset; + /* See "Frequency meter" comments, below. */ struct fmeter { @@ -72,24 +76,13 @@ }; struct cpuset { + struct container_subsys_state css; + unsigned long flags; /* "unsigned long" so bitops work */ cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ - /* - * Count is atomic so can incr (fork) or decr (exit) without a lock. - */ - atomic_t count; /* count tasks using this cpuset */ - - /* - * We link our 'sibling' struct into our parents 'children'. - * Our children link their 'sibling' into our 'children'. - */ - struct list_head sibling; /* my parents children */ - struct list_head children; /* my children */ - struct cpuset *parent; /* my parent */ - struct dentry *dentry; /* cpuset fs entry */ /* * Copy of global cpuset_mems_generation as of the most @@ -100,13 +93,32 @@ struct fmeter fmeter; /* memory_pressure filter */ }; +/* Update the cpuset for a container */ +static inline void set_container_cs(struct container *cont, struct cpuset *cs) +{ + cont->subsys[cpuset_subsys_id] = &cs->css; +} + +/* Retrieve the cpuset for a container */ +static inline struct cpuset *container_cs(struct container *cont) +{ + return container_of(container_subsys_state(cont, cpuset_subsys_id), + struct cpuset, css); +} + +/* Retrieve the cpuset for a task */ +static inline struct cpuset *task_cs(struct task_struct *task) +{ + return container_of(task_subsys_state(task, cpuset_subsys_id), + struct cpuset, css); +} + + /* bits in struct cpuset flags field */ typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, CS_MEMORY_MIGRATE, - CS_REMOVED, - CS_NOTIFY_ON_RELEASE, CS_SPREAD_PAGE, CS_SPREAD_SLAB, } cpuset_flagbits_t; @@ -122,16 +134,6 @@ return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); } -static inline int is_removed(const struct cpuset *cs) -{ - return test_bit(CS_REMOVED, &cs->flags); -} - -static inline int notify_on_release(const struct cpuset *cs) -{ - return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); -} - static inline int is_memory_migrate(const struct cpuset *cs) { return test_bit(CS_MEMORY_MIGRATE, &cs->flags); @@ -172,14 +174,8 @@ .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), .cpus_allowed = CPU_MASK_ALL, .mems_allowed = NODE_MASK_ALL, - .count = ATOMIC_INIT(0), - .sibling = LIST_HEAD_INIT(top_cpuset.sibling), - .children = LIST_HEAD_INIT(top_cpuset.children), }; -static struct vfsmount *cpuset_mount; -static struct super_block *cpuset_sb; - /* * We have two global cpuset mutexes below. They can nest. * It is ok to first take manage_mutex, then nest callback_mutex. We also @@ -263,297 +259,36 @@ * the routine cpuset_update_task_memory_state(). */ -static DEFINE_MUTEX(manage_mutex); static DEFINE_MUTEX(callback_mutex); -/* - * A couple of forward declarations required, due to cyclic reference loop: - * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file - * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. - */ - -static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode); -static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry); - -static struct backing_dev_info cpuset_backing_dev_info = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, -}; - -static struct inode *cpuset_new_inode(mode_t mode) -{ - struct inode *inode = new_inode(cpuset_sb); - - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info; - } - return inode; -} - -static void cpuset_diput(struct dentry *dentry, struct inode *inode) -{ - /* is dentry a directory ? if so, kfree() associated cpuset */ - if (S_ISDIR(inode->i_mode)) { - struct cpuset *cs = dentry->d_fsdata; - BUG_ON(!(is_removed(cs))); - kfree(cs); - } - iput(inode); -} - -static struct dentry_operations cpuset_dops = { - .d_iput = cpuset_diput, -}; - -static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name) -{ - struct dentry *d = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(d)) - d->d_op = &cpuset_dops; - return d; -} - -static void remove_dir(struct dentry *d) -{ - struct dentry *parent = dget(d->d_parent); - - d_delete(d); - simple_rmdir(parent->d_inode, d); - dput(parent); -} - -/* - * NOTE : the dentry must have been dget()'ed - */ -static void cpuset_d_remove_dir(struct dentry *dentry) -{ - struct list_head *node; - - spin_lock(&dcache_lock); - node = dentry->d_subdirs.next; - while (node != &dentry->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); - list_del_init(node); - if (d->d_inode) { - d = dget_locked(d); - spin_unlock(&dcache_lock); - d_delete(d); - simple_unlink(dentry->d_inode, d); - dput(d); - spin_lock(&dcache_lock); - } - node = dentry->d_subdirs.next; - } - list_del_init(&dentry->d_u.d_child); - spin_unlock(&dcache_lock); - remove_dir(dentry); -} - -static struct super_operations cpuset_ops = { - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, -}; - -static int cpuset_fill_super(struct super_block *sb, void *unused_data, - int unused_silent) -{ - struct inode *inode; - struct dentry *root; - - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = CPUSET_SUPER_MAGIC; - sb->s_op = &cpuset_ops; - cpuset_sb = sb; - - inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR); - if (inode) { - inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - /* directories start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - } else { - return -ENOMEM; - } - - root = d_alloc_root(inode); - if (!root) { - iput(inode); - return -ENOMEM; - } - sb->s_root = root; - return 0; -} - +/* This is ugly, but preserves the userspace API for existing cpuset + * users. If someone tries to mount the "cpuset" filesystem, we + * silently switch it to mount "container" instead */ static int cpuset_get_sb(struct file_system_type *fs_type, int flags, const char *unused_dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); + struct file_system_type *container_fs = get_fs_type("container"); + int ret = -ENODEV; + if (container_fs) { + ret = container_fs->get_sb(container_fs, flags, + unused_dev_name, + "cpuset", mnt); + put_filesystem(container_fs); + if (!ret) { + container_set_release_agent_path( + &cpuset_subsys, + "/sbin/cpuset_release_agent"); + } + } + return ret; } static struct file_system_type cpuset_fs_type = { .name = "cpuset", .get_sb = cpuset_get_sb, - .kill_sb = kill_litter_super, -}; - -/* struct cftype: - * - * The files in the cpuset filesystem mostly have a very simple read/write - * handling, some common function will take care of it. Nevertheless some cases - * (read tasks) are special and therefore I define this structure for every - * kind of file. - * - * - * When reading/writing to a file: - * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata - * - the 'cftype' of the file is file->f_path.dentry->d_fsdata - */ - -struct cftype { - char *name; - int private; - int (*open) (struct inode *inode, struct file *file); - ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos); - int (*write) (struct file *file, const char __user *buf, size_t nbytes, - loff_t *ppos); - int (*release) (struct inode *inode, struct file *file); }; -static inline struct cpuset *__d_cs(struct dentry *dentry) -{ - return dentry->d_fsdata; -} - -static inline struct cftype *__d_cft(struct dentry *dentry) -{ - return dentry->d_fsdata; -} - -/* - * Call with manage_mutex held. Writes path of cpuset into buf. - * Returns 0 on success, -errno on error. - */ - -static int cpuset_path(const struct cpuset *cs, char *buf, int buflen) -{ - char *start; - - start = buf + buflen; - - *--start = '\0'; - for (;;) { - int len = cs->dentry->d_name.len; - if ((start -= len) < buf) - return -ENAMETOOLONG; - memcpy(start, cs->dentry->d_name.name, len); - cs = cs->parent; - if (!cs) - break; - if (!cs->parent) - continue; - if (--start < buf) - return -ENAMETOOLONG; - *start = '/'; - } - memmove(buf, start, buf + buflen - start); - return 0; -} - -/* - * Notify userspace when a cpuset is released, by running - * /sbin/cpuset_release_agent with the name of the cpuset (path - * relative to the root of cpuset file system) as the argument. - * - * Most likely, this user command will try to rmdir this cpuset. - * - * This races with the possibility that some other task will be - * attached to this cpuset before it is removed, or that some other - * user task will 'mkdir' a child cpuset of this cpuset. That's ok. - * The presumed 'rmdir' will fail quietly if this cpuset is no longer - * unused, and this cpuset will be reprieved from its death sentence, - * to continue to serve a useful existence. Next time it's released, - * we will get notified again, if it still has 'notify_on_release' set. - * - * The final arg to call_usermodehelper() is 0, which means don't - * wait. The separate /sbin/cpuset_release_agent task is forked by - * call_usermodehelper(), then control in this thread returns here, - * without waiting for the release agent task. We don't bother to - * wait because the caller of this routine has no use for the exit - * status of the /sbin/cpuset_release_agent task, so no sense holding - * our caller up for that. - * - * When we had only one cpuset mutex, we had to call this - * without holding it, to avoid deadlock when call_usermodehelper() - * allocated memory. With two locks, we could now call this while - * holding manage_mutex, but we still don't, so as to minimize - * the time manage_mutex is held. - */ - -static void cpuset_release_agent(const char *pathbuf) -{ - char *argv[3], *envp[3]; - int i; - - if (!pathbuf) - return; - - i = 0; - argv[i++] = "/sbin/cpuset_release_agent"; - argv[i++] = (char *)pathbuf; - argv[i] = NULL; - - i = 0; - /* minimal command environment */ - envp[i++] = "HOME=/"; - envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[i] = NULL; - - call_usermodehelper(argv[0], argv, envp, 0); - kfree(pathbuf); -} - -/* - * Either cs->count of using tasks transitioned to zero, or the - * cs->children list of child cpusets just became empty. If this - * cs is notify_on_release() and now both the user count is zero and - * the list of children is empty, prepare cpuset path in a kmalloc'd - * buffer, to be returned via ppathbuf, so that the caller can invoke - * cpuset_release_agent() with it later on, once manage_mutex is dropped. - * Call here with manage_mutex held. - * - * This check_for_release() routine is responsible for kmalloc'ing - * pathbuf. The above cpuset_release_agent() is responsible for - * kfree'ing pathbuf. The caller of these routines is responsible - * for providing a pathbuf pointer, initialized to NULL, then - * calling check_for_release() with manage_mutex held and the address - * of the pathbuf pointer, then dropping manage_mutex, then calling - * cpuset_release_agent() with pathbuf, as set by check_for_release(). - */ - -static void check_for_release(struct cpuset *cs, char **ppathbuf) -{ - if (notify_on_release(cs) && atomic_read(&cs->count) == 0 && - list_empty(&cs->children)) { - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return; - if (cpuset_path(cs, buf, PAGE_SIZE) < 0) - kfree(buf); - else - *ppathbuf = buf; - } -} - /* * Return in *pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy @@ -651,20 +386,19 @@ struct task_struct *tsk = current; struct cpuset *cs; - if (tsk->cpuset == &top_cpuset) { + if (task_cs(tsk) == &top_cpuset) { /* Don't need rcu for top_cpuset. It's never freed. */ my_cpusets_mem_gen = top_cpuset.mems_generation; } else { rcu_read_lock(); - cs = rcu_dereference(tsk->cpuset); - my_cpusets_mem_gen = cs->mems_generation; + my_cpusets_mem_gen = task_cs(current)->mems_generation; rcu_read_unlock(); } if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { mutex_lock(&callback_mutex); task_lock(tsk); - cs = tsk->cpuset; /* Maybe changed when task not locked */ + cs = task_cs(tsk); /* Maybe changed when task not locked */ guarantee_online_mems(cs, &tsk->mems_allowed); tsk->cpuset_mems_generation = cs->mems_generation; if (is_spread_page(cs)) @@ -719,11 +453,12 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) { + struct container *cont; struct cpuset *c, *par; /* Each of our child cpusets must be a subset of us */ - list_for_each_entry(c, &cur->children, sibling) { - if (!is_cpuset_subset(c, trial)) + list_for_each_entry(cont, &cur->css.container->children, sibling) { + if (!is_cpuset_subset(container_cs(cont), trial)) return -EBUSY; } @@ -738,7 +473,8 @@ return -EACCES; /* If either I or some sibling (!= me) is exclusive, we can't overlap */ - list_for_each_entry(c, &par->children, sibling) { + list_for_each_entry(cont, &par->css.container->children, sibling) { + c = container_cs(cont); if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) @@ -753,68 +489,13 @@ } /* - * For a given cpuset cur, partition the system as follows - * a. All cpus in the parent cpuset's cpus_allowed that are not part of any - * exclusive child cpusets - * b. All cpus in the current cpuset's cpus_allowed that are not part of any - * exclusive child cpusets - * Build these two partitions by calling partition_sched_domains - * - * Call with manage_mutex held. May nest a call to the - * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. - * Must not be called holding callback_mutex, because we must - * not call lock_cpu_hotplug() while holding callback_mutex. - */ - -static void update_cpu_domains(struct cpuset *cur) -{ - struct cpuset *c, *par = cur->parent; - cpumask_t pspan, cspan; - - if (par == NULL || cpus_empty(cur->cpus_allowed)) - return; - - /* - * Get all cpus from parent's cpus_allowed not part of exclusive - * children - */ - pspan = par->cpus_allowed; - list_for_each_entry(c, &par->children, sibling) { - if (is_cpu_exclusive(c)) - cpus_andnot(pspan, pspan, c->cpus_allowed); - } - if (!is_cpu_exclusive(cur)) { - cpus_or(pspan, pspan, cur->cpus_allowed); - if (cpus_equal(pspan, cur->cpus_allowed)) - return; - cspan = CPU_MASK_NONE; - } else { - if (cpus_empty(pspan)) - return; - cspan = cur->cpus_allowed; - /* - * Get all cpus from current cpuset's cpus_allowed not part - * of exclusive children - */ - list_for_each_entry(c, &cur->children, sibling) { - if (is_cpu_exclusive(c)) - cpus_andnot(cspan, cspan, c->cpus_allowed); - } - } - - lock_cpu_hotplug(); - partition_sched_domains(&pspan, &cspan); - unlock_cpu_hotplug(); -} - -/* * Call with manage_mutex held. May take callback_mutex during call. */ static int update_cpumask(struct cpuset *cs, char *buf) { struct cpuset trialcs; - int retval, cpus_unchanged; + int retval; /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ if (cs == &top_cpuset) @@ -836,17 +517,15 @@ } cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map); /* cpus_allowed cannot be empty for a cpuset with attached tasks. */ - if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed)) + if (container_task_count(cs->css.container) && + cpus_empty(trialcs.cpus_allowed)) return -ENOSPC; retval = validate_change(cs, &trialcs); if (retval < 0) return retval; - cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); mutex_lock(&callback_mutex); cs->cpus_allowed = trialcs.cpus_allowed; mutex_unlock(&callback_mutex); - if (is_cpu_exclusive(cs) && !cpus_unchanged) - update_cpu_domains(cs); return 0; } @@ -895,7 +574,7 @@ do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); mutex_lock(&callback_mutex); - guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); + guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed); mutex_unlock(&callback_mutex); } @@ -913,16 +592,19 @@ * their mempolicies to the cpusets new mems_allowed. */ +static void *cpuset_being_rebound; + static int update_nodemask(struct cpuset *cs, char *buf) { struct cpuset trialcs; nodemask_t oldmem; - struct task_struct *g, *p; + struct task_struct *p; struct mm_struct **mmarray; int i, n, ntasks; int migrate; int fudge; int retval; + struct container_iter it; /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ if (cs == &top_cpuset) @@ -949,7 +631,8 @@ goto done; } /* mems_allowed cannot be empty for a cpuset with attached tasks. */ - if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) { + if (container_task_count(cs->css.container) && + nodes_empty(trialcs.mems_allowed)) { retval = -ENOSPC; goto done; } @@ -962,7 +645,7 @@ cs->mems_generation = cpuset_mems_generation++; mutex_unlock(&callback_mutex); - set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ + cpuset_being_rebound = cs; /* causes mpol_copy() rebind */ fudge = 10; /* spare mmarray[] slots */ fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ @@ -976,37 +659,37 @@ * enough mmarray[] w/o using GFP_ATOMIC. */ while (1) { - ntasks = atomic_read(&cs->count); /* guess */ + ntasks = container_task_count(cs->css.container); /* guess */ ntasks += fudge; mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); if (!mmarray) goto done; - write_lock_irq(&tasklist_lock); /* block fork */ - if (atomic_read(&cs->count) <= ntasks) + read_lock(&tasklist_lock); /* block fork */ + if (container_task_count(cs->css.container) <= ntasks) break; /* got enough */ - write_unlock_irq(&tasklist_lock); /* try again */ + read_unlock(&tasklist_lock); /* try again */ kfree(mmarray); } n = 0; /* Load up mmarray[] with mm reference for each task in cpuset. */ - do_each_thread(g, p) { + container_iter_start(cs->css.container, &it); + while ((p = container_iter_next(cs->css.container, &it))) { struct mm_struct *mm; if (n >= ntasks) { printk(KERN_WARNING "Cpuset mempolicy rebind incomplete.\n"); - continue; + break; } - if (p->cpuset != cs) - continue; mm = get_task_mm(p); if (!mm) continue; mmarray[n++] = mm; - } while_each_thread(g, p); - write_unlock_irq(&tasklist_lock); + } + container_iter_end(cs->css.container, &it); + read_unlock(&tasklist_lock); /* * Now that we've dropped the tasklist spinlock, we can @@ -1033,12 +716,17 @@ /* We're done rebinding vma's to this cpusets new mems_allowed. */ kfree(mmarray); - set_cpuset_being_rebound(NULL); + cpuset_being_rebound = NULL; retval = 0; done: return retval; } +int current_cpuset_is_being_rebound(void) +{ + return task_cs(current) == cpuset_being_rebound; +} + /* * Call with manage_mutex held. */ @@ -1067,7 +755,7 @@ { int turning_on; struct cpuset trialcs; - int err, cpu_exclusive_changed; + int err; turning_on = (simple_strtoul(buf, NULL, 10) != 0); @@ -1080,14 +768,10 @@ err = validate_change(cs, &trialcs); if (err < 0) return err; - cpu_exclusive_changed = - (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); mutex_lock(&callback_mutex); cs->flags = trialcs.flags; mutex_unlock(&callback_mutex); - if (cpu_exclusive_changed) - update_cpu_domains(cs); return 0; } @@ -1189,85 +873,34 @@ return val; } -/* - * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly - * writing the path of the old cpuset in 'ppathbuf' if it needs to be - * notified on release. - * - * Call holding manage_mutex. May take callback_mutex and task_lock of - * the task 'pid' during call. - */ - -static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) +int cpuset_can_attach(struct container_subsys *ss, + struct container *cont, struct task_struct *tsk) { - pid_t pid; - struct task_struct *tsk; - struct cpuset *oldcs; - cpumask_t cpus; - nodemask_t from, to; - struct mm_struct *mm; - int retval; + struct cpuset *cs = container_cs(cont); - if (sscanf(pidbuf, "%d", &pid) != 1) - return -EIO; if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) return -ENOSPC; - if (pid) { - read_lock(&tasklist_lock); - - tsk = find_task_by_pid(pid); - if (!tsk || tsk->flags & PF_EXITING) { - read_unlock(&tasklist_lock); - return -ESRCH; - } - - get_task_struct(tsk); - read_unlock(&tasklist_lock); - - if ((current->euid) && (current->euid != tsk->uid) - && (current->euid != tsk->suid)) { - put_task_struct(tsk); - return -EACCES; - } - } else { - tsk = current; - get_task_struct(tsk); - } + return security_task_setscheduler(tsk, 0, NULL); +} - retval = security_task_setscheduler(tsk, 0, NULL); - if (retval) { - put_task_struct(tsk); - return retval; - } +void cpuset_attach(struct container_subsys *ss, + struct container *cont, struct container *oldcont, + struct task_struct *tsk) +{ + cpumask_t cpus; + nodemask_t from, to; + struct mm_struct *mm; + struct cpuset *cs = container_cs(cont); + struct cpuset *oldcs = container_cs(oldcont); mutex_lock(&callback_mutex); - - task_lock(tsk); - oldcs = tsk->cpuset; - /* - * After getting 'oldcs' cpuset ptr, be sure still not exiting. - * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack - * then fail this attach_task(), to avoid breaking top_cpuset.count. - */ - if (tsk->flags & PF_EXITING) { - task_unlock(tsk); - mutex_unlock(&callback_mutex); - put_task_struct(tsk); - return -ESRCH; - } - atomic_inc(&cs->count); - rcu_assign_pointer(tsk->cpuset, cs); - task_unlock(tsk); - guarantee_online_cpus(cs, &cpus); set_cpus_allowed(tsk, cpus); + mutex_unlock(&callback_mutex); from = oldcs->mems_allowed; to = cs->mems_allowed; - - mutex_unlock(&callback_mutex); - mm = get_task_mm(tsk); if (mm) { mpol_rebind_mm(mm, &to); @@ -1276,40 +909,31 @@ mmput(mm); } - put_task_struct(tsk); - synchronize_rcu(); - if (atomic_dec_and_test(&oldcs->count)) - check_for_release(oldcs, ppathbuf); - return 0; } /* The various types of files and directories in a cpuset file system */ typedef enum { - FILE_ROOT, - FILE_DIR, FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, - FILE_NOTIFY_ON_RELEASE, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, - FILE_TASKLIST, } cpuset_filetype_t; -static ssize_t cpuset_common_file_write(struct file *file, +static ssize_t cpuset_common_file_write(struct container *cont, + struct cftype *cft, + struct file *file, const char __user *userbuf, size_t nbytes, loff_t *unused_ppos) { - struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); - struct cftype *cft = __d_cft(file->f_path.dentry); + struct cpuset *cs = container_cs(cont); cpuset_filetype_t type = cft->private; char *buffer; - char *pathbuf = NULL; int retval = 0; /* Crude upper limit on largest legitimate cpulist user might write. */ @@ -1326,9 +950,9 @@ } buffer[nbytes] = 0; /* nul-terminate */ - mutex_lock(&manage_mutex); + container_lock(); - if (is_removed(cs)) { + if (container_is_removed(cont)) { retval = -ENODEV; goto out2; } @@ -1346,9 +970,6 @@ case FILE_MEM_EXCLUSIVE: retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); break; - case FILE_NOTIFY_ON_RELEASE: - retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); - break; case FILE_MEMORY_MIGRATE: retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); break; @@ -1366,9 +987,6 @@ retval = update_flag(CS_SPREAD_SLAB, cs, buffer); cs->mems_generation = cpuset_mems_generation++; break; - case FILE_TASKLIST: - retval = attach_task(cs, buffer, &pathbuf); - break; default: retval = -EINVAL; goto out2; @@ -1377,30 +995,12 @@ if (retval == 0) retval = nbytes; out2: - mutex_unlock(&manage_mutex); - cpuset_release_agent(pathbuf); + container_unlock(); out1: kfree(buffer); return retval; } -static ssize_t cpuset_file_write(struct file *file, const char __user *buf, - size_t nbytes, loff_t *ppos) -{ - ssize_t retval = 0; - struct cftype *cft = __d_cft(file->f_path.dentry); - if (!cft) - return -ENODEV; - - /* special function ? */ - if (cft->write) - retval = cft->write(file, buf, nbytes, ppos); - else - retval = cpuset_common_file_write(file, buf, nbytes, ppos); - - return retval; -} - /* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller @@ -1435,17 +1035,19 @@ return nodelist_scnprintf(page, PAGE_SIZE, mask); } -static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, +static ssize_t cpuset_common_file_read(struct container *cont, + struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos) { - struct cftype *cft = __d_cft(file->f_path.dentry); - struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); + struct cpuset *cs = container_cs(cont); cpuset_filetype_t type = cft->private; char *page; ssize_t retval = 0; char *s; - if (!(page = (char *)__get_free_page(GFP_KERNEL))) + if (!(page = (char *)__get_free_page(GFP_TEMPORARY))) return -ENOMEM; s = page; @@ -1463,9 +1065,6 @@ case FILE_MEM_EXCLUSIVE: *s++ = is_mem_exclusive(cs) ? '1' : '0'; break; - case FILE_NOTIFY_ON_RELEASE: - *s++ = notify_on_release(cs) ? '1' : '0'; - break; case FILE_MEMORY_MIGRATE: *s++ = is_memory_migrate(cs) ? '1' : '0'; break; @@ -1493,390 +1092,140 @@ return retval; } -static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - ssize_t retval = 0; - struct cftype *cft = __d_cft(file->f_path.dentry); - if (!cft) - return -ENODEV; - - /* special function ? */ - if (cft->read) - retval = cft->read(file, buf, nbytes, ppos); - else - retval = cpuset_common_file_read(file, buf, nbytes, ppos); - - return retval; -} - -static int cpuset_file_open(struct inode *inode, struct file *file) -{ - int err; - struct cftype *cft; - - err = generic_file_open(inode, file); - if (err) - return err; - - cft = __d_cft(file->f_path.dentry); - if (!cft) - return -ENODEV; - if (cft->open) - err = cft->open(inode, file); - else - err = 0; - - return err; -} - -static int cpuset_file_release(struct inode *inode, struct file *file) -{ - struct cftype *cft = __d_cft(file->f_path.dentry); - if (cft->release) - return cft->release(inode, file); - return 0; -} - -/* - * cpuset_rename - Only allow simple rename of directories in place. - */ -static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - if (!S_ISDIR(old_dentry->d_inode->i_mode)) - return -ENOTDIR; - if (new_dentry->d_inode) - return -EEXIST; - if (old_dir != new_dir) - return -EIO; - return simple_rename(old_dir, old_dentry, new_dir, new_dentry); -} - -static const struct file_operations cpuset_file_operations = { - .read = cpuset_file_read, - .write = cpuset_file_write, - .llseek = generic_file_llseek, - .open = cpuset_file_open, - .release = cpuset_file_release, -}; - -static const struct inode_operations cpuset_dir_inode_operations = { - .lookup = simple_lookup, - .mkdir = cpuset_mkdir, - .rmdir = cpuset_rmdir, - .rename = cpuset_rename, -}; - -static int cpuset_create_file(struct dentry *dentry, int mode) -{ - struct inode *inode; - - if (!dentry) - return -ENOENT; - if (dentry->d_inode) - return -EEXIST; - - inode = cpuset_new_inode(mode); - if (!inode) - return -ENOMEM; - - if (S_ISDIR(mode)) { - inode->i_op = &cpuset_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - - /* start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - } else if (S_ISREG(mode)) { - inode->i_size = 0; - inode->i_fop = &cpuset_file_operations; - } - - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ - return 0; -} - -/* - * cpuset_create_dir - create a directory for an object. - * cs: the cpuset we create the directory for. - * It must have a valid ->parent field - * And we are going to fill its ->dentry field. - * name: The name to give to the cpuset directory. Will be copied. - * mode: mode to set on new directory. - */ - -static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode) -{ - struct dentry *dentry = NULL; - struct dentry *parent; - int error = 0; - - parent = cs->parent->dentry; - dentry = cpuset_get_dentry(parent, name); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - error = cpuset_create_file(dentry, S_IFDIR | mode); - if (!error) { - dentry->d_fsdata = cs; - inc_nlink(parent->d_inode); - cs->dentry = dentry; - } - dput(dentry); - - return error; -} - -static int cpuset_add_file(struct dentry *dir, const struct cftype *cft) -{ - struct dentry *dentry; - int error; - - mutex_lock(&dir->d_inode->i_mutex); - dentry = cpuset_get_dentry(dir, cft->name); - if (!IS_ERR(dentry)) { - error = cpuset_create_file(dentry, 0644 | S_IFREG); - if (!error) - dentry->d_fsdata = (void *)cft; - dput(dentry); - } else - error = PTR_ERR(dentry); - mutex_unlock(&dir->d_inode->i_mutex); - return error; -} - -/* - * Stuff for reading the 'tasks' file. - * - * Reading this file can return large amounts of data if a cpuset has - * *lots* of attached tasks. So it may need several calls to read(), - * but we cannot guarantee that the information we produce is correct - * unless we produce it entirely atomically. - * - * Upon tasks file open(), a struct ctr_struct is allocated, that - * will have a pointer to an array (also allocated here). The struct - * ctr_struct * is stored in file->private_data. Its resources will - * be freed by release() when the file is closed. The array is used - * to sprintf the PIDs and then used by read(). - */ - -/* cpusets_tasks_read array */ - -struct ctr_struct { - char *buf; - int bufsz; -}; - -/* - * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'. - * Return actual number of pids loaded. No need to task_lock(p) - * when reading out p->cpuset, as we don't really care if it changes - * on the next cycle, and we are not going to try to dereference it. - */ -static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) -{ - int n = 0; - struct task_struct *g, *p; - - read_lock(&tasklist_lock); - - do_each_thread(g, p) { - if (p->cpuset == cs) { - if (unlikely(n == npids)) - goto array_full; - pidarray[n++] = p->pid; - } - } while_each_thread(g, p); - -array_full: - read_unlock(&tasklist_lock); - return n; -} - -static int cmppid(const void *a, const void *b) -{ - return *(pid_t *)a - *(pid_t *)b; -} - -/* - * Convert array 'a' of 'npids' pid_t's to a string of newline separated - * decimal pids in 'buf'. Don't write more than 'sz' chars, but return - * count 'cnt' of how many chars would be written if buf were large enough. - */ -static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) -{ - int cnt = 0; - int i; - - for (i = 0; i < npids; i++) - cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); - return cnt; -} - -/* - * Handle an open on 'tasks' file. Prepare a buffer listing the - * process id's of tasks currently attached to the cpuset being opened. - * - * Does not require any specific cpuset mutexes, and does not take any. - */ -static int cpuset_tasks_open(struct inode *unused, struct file *file) -{ - struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); - struct ctr_struct *ctr; - pid_t *pidarray; - int npids; - char c; - - if (!(file->f_mode & FMODE_READ)) - return 0; - - ctr = kmalloc(sizeof(*ctr), GFP_KERNEL); - if (!ctr) - goto err0; - - /* - * If cpuset gets more users after we read count, we won't have - * enough space - tough. This race is indistinguishable to the - * caller from the case that the additional cpuset users didn't - * show up until sometime later on. - */ - npids = atomic_read(&cs->count); - pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); - if (!pidarray) - goto err1; - - npids = pid_array_load(pidarray, npids, cs); - sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); - - /* Call pid_array_to_buf() twice, first just to get bufsz */ - ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1; - ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL); - if (!ctr->buf) - goto err2; - ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids); - - kfree(pidarray); - file->private_data = ctr; - return 0; - -err2: - kfree(pidarray); -err1: - kfree(ctr); -err0: - return -ENOMEM; -} - -static ssize_t cpuset_tasks_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos) -{ - struct ctr_struct *ctr = file->private_data; - return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); -} -static int cpuset_tasks_release(struct inode *unused_inode, struct file *file) -{ - struct ctr_struct *ctr; - if (file->f_mode & FMODE_READ) { - ctr = file->private_data; - kfree(ctr->buf); - kfree(ctr); - } - return 0; -} /* * for the common functions, 'private' gives the type of file */ -static struct cftype cft_tasks = { - .name = "tasks", - .open = cpuset_tasks_open, - .read = cpuset_tasks_read, - .release = cpuset_tasks_release, - .private = FILE_TASKLIST, -}; - static struct cftype cft_cpus = { .name = "cpus", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_CPULIST, }; static struct cftype cft_mems = { .name = "mems", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_MEMLIST, }; static struct cftype cft_cpu_exclusive = { .name = "cpu_exclusive", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_CPU_EXCLUSIVE, }; static struct cftype cft_mem_exclusive = { .name = "mem_exclusive", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_MEM_EXCLUSIVE, }; -static struct cftype cft_notify_on_release = { - .name = "notify_on_release", - .private = FILE_NOTIFY_ON_RELEASE, -}; - static struct cftype cft_memory_migrate = { .name = "memory_migrate", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_MEMORY_MIGRATE, }; static struct cftype cft_memory_pressure_enabled = { .name = "memory_pressure_enabled", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_MEMORY_PRESSURE_ENABLED, }; static struct cftype cft_memory_pressure = { .name = "memory_pressure", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_MEMORY_PRESSURE, }; static struct cftype cft_spread_page = { .name = "memory_spread_page", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_SPREAD_PAGE, }; static struct cftype cft_spread_slab = { .name = "memory_spread_slab", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, .private = FILE_SPREAD_SLAB, }; -static int cpuset_populate_dir(struct dentry *cs_dentry) +int cpuset_populate(struct container_subsys *ss, struct container *cont) { int err; - if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0) + if ((err = container_add_file(cont, &cft_cpus)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0) + if ((err = container_add_file(cont, &cft_mems)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0) + if ((err = container_add_file(cont, &cft_cpu_exclusive)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0) + if ((err = container_add_file(cont, &cft_mem_exclusive)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) + if ((err = container_add_file(cont, &cft_memory_migrate)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) + if ((err = container_add_file(cont, &cft_memory_pressure)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) + if ((err = container_add_file(cont, &cft_spread_page)) < 0) return err; - if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0) - return err; - if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0) - return err; - if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) + if ((err = container_add_file(cont, &cft_spread_slab)) < 0) return err; + /* memory_pressure_enabled is in root cpuset only */ + if (err == 0 && !cont->parent) + err = container_add_file(cont, &cft_memory_pressure_enabled); return 0; } /* + * post_clone() is called at the end of container_clone(). + * 'container' was just created automatically as a result of + * a container_clone(), and the current task is about to + * be moved into 'container'. + * + * Currently we refuse to set up the container - thereby + * refusing the task to be entered, and as a result refusing + * the sys_unshare() or clone() which initiated it - if any + * sibling cpusets have exclusive cpus or mem. + * + * If this becomes a problem for some users who wish to + * allow that scenario, then cpuset_post_clone() could be + * changed to grant parent->cpus_allowed-sibling_cpus_exclusive + * (and likewise for mems) to the new container. + */ +void cpuset_post_clone(struct container_subsys *ss, + struct container *container) +{ + struct container *parent, *child; + struct cpuset *cs, *parent_cs; + + parent = container->parent; + list_for_each_entry(child, &parent->children, sibling) { + cs = container_cs(child); + if (is_mem_exclusive(cs) || is_cpu_exclusive(cs)) + return; + } + cs = container_cs(container); + parent_cs = container_cs(parent); + + cs->mems_allowed = parent_cs->mems_allowed; + cs->cpus_allowed = parent_cs->cpus_allowed; + return; +} + +/* * cpuset_create - create a cpuset * parent: cpuset that will be parent of the new cpuset. * name: name of the new cpuset. Will be strcpy'ed. @@ -1885,124 +1234,62 @@ * Must be called with the mutex on the parent inode held */ -static long cpuset_create(struct cpuset *parent, const char *name, int mode) +int cpuset_create(struct container_subsys *ss, struct container *cont) { struct cpuset *cs; - int err; + struct cpuset *parent; + if (!cont->parent) { + /* This is early initialization for the top container */ + set_container_cs(cont, &top_cpuset); + top_cpuset.css.container = cont; + top_cpuset.mems_generation = cpuset_mems_generation++; + return 0; + } + parent = container_cs(cont->parent); cs = kmalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return -ENOMEM; - mutex_lock(&manage_mutex); cpuset_update_task_memory_state(); cs->flags = 0; - if (notify_on_release(parent)) - set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); if (is_spread_page(parent)) set_bit(CS_SPREAD_PAGE, &cs->flags); if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); cs->cpus_allowed = CPU_MASK_NONE; cs->mems_allowed = NODE_MASK_NONE; - atomic_set(&cs->count, 0); - INIT_LIST_HEAD(&cs->sibling); - INIT_LIST_HEAD(&cs->children); cs->mems_generation = cpuset_mems_generation++; fmeter_init(&cs->fmeter); cs->parent = parent; - - mutex_lock(&callback_mutex); - list_add(&cs->sibling, &cs->parent->children); + set_container_cs(cont, cs); + cs->css.container = cont; number_of_cpusets++; - mutex_unlock(&callback_mutex); - - err = cpuset_create_dir(cs, name, mode); - if (err < 0) - goto err; - - /* - * Release manage_mutex before cpuset_populate_dir() because it - * will down() this new directory's i_mutex and if we race with - * another mkdir, we might deadlock. - */ - mutex_unlock(&manage_mutex); - - err = cpuset_populate_dir(cs->dentry); - /* If err < 0, we have a half-filled directory - oh well ;) */ return 0; -err: - list_del(&cs->sibling); - mutex_unlock(&manage_mutex); - kfree(cs); - return err; -} - -static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct cpuset *c_parent = dentry->d_parent->d_fsdata; - - /* the vfs holds inode->i_mutex already */ - return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); } -/* - * Locking note on the strange update_flag() call below: - * - * If the cpuset being removed is marked cpu_exclusive, then simulate - * turning cpu_exclusive off, which will call update_cpu_domains(). - * The lock_cpu_hotplug() call in update_cpu_domains() must not be - * made while holding callback_mutex. Elsewhere the kernel nests - * callback_mutex inside lock_cpu_hotplug() calls. So the reverse - * nesting would risk an ABBA deadlock. - */ - -static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) +void cpuset_destroy(struct container_subsys *ss, struct container *cont) { - struct cpuset *cs = dentry->d_fsdata; - struct dentry *d; - struct cpuset *parent; - char *pathbuf = NULL; - - /* the vfs holds both inode->i_mutex already */ + struct cpuset *cs = container_cs(cont); - mutex_lock(&manage_mutex); cpuset_update_task_memory_state(); - if (atomic_read(&cs->count) > 0) { - mutex_unlock(&manage_mutex); - return -EBUSY; - } - if (!list_empty(&cs->children)) { - mutex_unlock(&manage_mutex); - return -EBUSY; - } - if (is_cpu_exclusive(cs)) { - int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); - if (retval < 0) { - mutex_unlock(&manage_mutex); - return retval; - } - } - parent = cs->parent; - mutex_lock(&callback_mutex); - set_bit(CS_REMOVED, &cs->flags); - list_del(&cs->sibling); /* delete my sibling from parent->children */ - spin_lock(&cs->dentry->d_lock); - d = dget(cs->dentry); - cs->dentry = NULL; - spin_unlock(&d->d_lock); - cpuset_d_remove_dir(d); - dput(d); number_of_cpusets--; - mutex_unlock(&callback_mutex); - if (list_empty(&parent->children)) - check_for_release(parent, &pathbuf); - mutex_unlock(&manage_mutex); - cpuset_release_agent(pathbuf); - return 0; + kfree(cs); } +struct container_subsys cpuset_subsys = { + .name = "cpuset", + .create = cpuset_create, + .destroy = cpuset_destroy, + .can_attach = cpuset_can_attach, + .attach = cpuset_attach, + .populate = cpuset_populate, + .post_clone = cpuset_post_clone, + .subsys_id = cpuset_subsys_id, + .early_init = 1, +}; + /* * cpuset_init_early - just enough so that the calls to * cpuset_update_task_memory_state() in early init code @@ -2011,13 +1298,11 @@ int __init cpuset_init_early(void) { - struct task_struct *tsk = current; - - tsk->cpuset = &top_cpuset; - tsk->cpuset->mems_generation = cpuset_mems_generation++; + top_cpuset.mems_generation = cpuset_mems_generation++; return 0; } + /** * cpuset_init - initialize cpusets at system boot * @@ -2026,8 +1311,7 @@ int __init cpuset_init(void) { - struct dentry *root; - int err; + int err = 0; top_cpuset.cpus_allowed = CPU_MASK_ALL; top_cpuset.mems_allowed = NODE_MASK_ALL; @@ -2035,30 +1319,12 @@ fmeter_init(&top_cpuset.fmeter); top_cpuset.mems_generation = cpuset_mems_generation++; - init_task.cpuset = &top_cpuset; - err = register_filesystem(&cpuset_fs_type); if (err < 0) - goto out; - cpuset_mount = kern_mount(&cpuset_fs_type); - if (IS_ERR(cpuset_mount)) { - printk(KERN_ERR "cpuset: could not mount!\n"); - err = PTR_ERR(cpuset_mount); - cpuset_mount = NULL; - goto out; - } - root = cpuset_mount->mnt_sb->s_root; - root->d_fsdata = &top_cpuset; - inc_nlink(root->d_inode); - top_cpuset.dentry = root; - root->d_inode->i_op = &cpuset_dir_inode_operations; - number_of_cpusets = 1; - err = cpuset_populate_dir(root); - /* memory_pressure_enabled is in root cpuset only */ - if (err == 0) - err = cpuset_add_file(root, &cft_memory_pressure_enabled); -out: return err; + + number_of_cpusets = 1; + return 0; } /* @@ -2084,10 +1350,12 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) { + struct container *cont; struct cpuset *c; /* Each of our child cpusets mems must be online */ - list_for_each_entry(c, &cur->children, sibling) { + list_for_each_entry(cont, &cur->css.container->children, sibling) { + c = container_cs(cont); guarantee_online_cpus_mems_in_subtree(c); if (!cpus_empty(c->cpus_allowed)) guarantee_online_cpus(c, &c->cpus_allowed); @@ -2114,7 +1382,7 @@ static void common_cpu_mem_hotplug_unplug(void) { - mutex_lock(&manage_mutex); + container_lock(); mutex_lock(&callback_mutex); guarantee_online_cpus_mems_in_subtree(&top_cpuset); @@ -2122,7 +1390,7 @@ top_cpuset.mems_allowed = node_online_map; mutex_unlock(&callback_mutex); - mutex_unlock(&manage_mutex); + container_unlock(); } /* @@ -2170,109 +1438,7 @@ } /** - * cpuset_fork - attach newly forked task to its parents cpuset. - * @tsk: pointer to task_struct of forking parent process. - * - * Description: A task inherits its parent's cpuset at fork(). - * - * A pointer to the shared cpuset was automatically copied in fork.c - * by dup_task_struct(). However, we ignore that copy, since it was - * not made under the protection of task_lock(), so might no longer be - * a valid cpuset pointer. attach_task() might have already changed - * current->cpuset, allowing the previously referenced cpuset to - * be removed and freed. Instead, we task_lock(current) and copy - * its present value of current->cpuset for our freshly forked child. - * - * At the point that cpuset_fork() is called, 'current' is the parent - * task, and the passed argument 'child' points to the child task. - **/ -void cpuset_fork(struct task_struct *child) -{ - task_lock(current); - child->cpuset = current->cpuset; - atomic_inc(&child->cpuset->count); - task_unlock(current); -} - -/** - * cpuset_exit - detach cpuset from exiting task - * @tsk: pointer to task_struct of exiting process - * - * Description: Detach cpuset from @tsk and release it. - * - * Note that cpusets marked notify_on_release force every task in - * them to take the global manage_mutex mutex when exiting. - * This could impact scaling on very large systems. Be reluctant to - * use notify_on_release cpusets where very high task exit scaling - * is required on large systems. - * - * Don't even think about derefencing 'cs' after the cpuset use count - * goes to zero, except inside a critical section guarded by manage_mutex - * or callback_mutex. Otherwise a zero cpuset use count is a license to - * any other task to nuke the cpuset immediately, via cpuset_rmdir(). - * - * This routine has to take manage_mutex, not callback_mutex, because - * it is holding that mutex while calling check_for_release(), - * which calls kmalloc(), so can't be called holding callback_mutex(). - * - * the_top_cpuset_hack: - * - * Set the exiting tasks cpuset to the root cpuset (top_cpuset). - * - * Don't leave a task unable to allocate memory, as that is an - * accident waiting to happen should someone add a callout in - * do_exit() after the cpuset_exit() call that might allocate. - * If a task tries to allocate memory with an invalid cpuset, - * it will oops in cpuset_update_task_memory_state(). - * - * We call cpuset_exit() while the task is still competent to - * handle notify_on_release(), then leave the task attached to - * the root cpuset (top_cpuset) for the remainder of its exit. - * - * To do this properly, we would increment the reference count on - * top_cpuset, and near the very end of the kernel/exit.c do_exit() - * code we would add a second cpuset function call, to drop that - * reference. This would just create an unnecessary hot spot on - * the top_cpuset reference count, to no avail. - * - * Normally, holding a reference to a cpuset without bumping its - * count is unsafe. The cpuset could go away, or someone could - * attach us to a different cpuset, decrementing the count on - * the first cpuset that we never incremented. But in this case, - * top_cpuset isn't going away, and either task has PF_EXITING set, - * which wards off any attach_task() attempts, or task is a failed - * fork, never visible to attach_task. - * - * Another way to do this would be to set the cpuset pointer - * to NULL here, and check in cpuset_update_task_memory_state() - * for a NULL pointer. This hack avoids that NULL check, for no - * cost (other than this way too long comment ;). - **/ - -void cpuset_exit(struct task_struct *tsk) -{ - struct cpuset *cs; - - task_lock(current); - cs = tsk->cpuset; - tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */ - task_unlock(current); - - if (notify_on_release(cs)) { - char *pathbuf = NULL; - - mutex_lock(&manage_mutex); - if (atomic_dec_and_test(&cs->count)) - check_for_release(cs, &pathbuf); - mutex_unlock(&manage_mutex); - cpuset_release_agent(pathbuf); - } else { - atomic_dec(&cs->count); - } -} - -/** * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. * @@ -2288,7 +1454,7 @@ mutex_lock(&callback_mutex); task_lock(tsk); - guarantee_online_cpus(tsk->cpuset, &mask); + guarantee_online_cpus(task_cs(tsk), &mask); task_unlock(tsk); mutex_unlock(&callback_mutex); @@ -2316,7 +1482,7 @@ mutex_lock(&callback_mutex); task_lock(tsk); - guarantee_online_mems(tsk->cpuset, &mask); + guarantee_online_mems(task_cs(tsk), &mask); task_unlock(tsk); mutex_unlock(&callback_mutex); @@ -2447,7 +1613,7 @@ mutex_lock(&callback_mutex); task_lock(current); - cs = nearest_exclusive_ancestor(current->cpuset); + cs = nearest_exclusive_ancestor(task_cs(current)); task_unlock(current); allowed = node_isset(node, cs->mems_allowed); @@ -2584,7 +1750,7 @@ task_unlock(current); goto done; } - cs1 = nearest_exclusive_ancestor(current->cpuset); + cs1 = nearest_exclusive_ancestor(task_cs(current)); task_unlock(current); task_lock((struct task_struct *)p); @@ -2592,7 +1758,7 @@ task_unlock((struct task_struct *)p); goto done; } - cs2 = nearest_exclusive_ancestor(p->cpuset); + cs2 = nearest_exclusive_ancestor(task_cs((struct task_struct *)p)); task_unlock((struct task_struct *)p); overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); @@ -2628,14 +1794,12 @@ void __cpuset_memory_pressure_bump(void) { - struct cpuset *cs; - task_lock(current); - cs = current->cpuset; - fmeter_markevent(&cs->fmeter); + fmeter_markevent(&task_cs(current)->fmeter); task_unlock(current); } +#ifdef CONFIG_PROC_PID_CPUSET /* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. @@ -2652,6 +1816,7 @@ struct pid *pid; struct task_struct *tsk; char *buf; + struct container_subsys_state *css; int retval; retval = -ENOMEM; @@ -2666,15 +1831,15 @@ goto out_free; retval = -EINVAL; - mutex_lock(&manage_mutex); - - retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); + container_lock(); + css = task_subsys_state(tsk, cpuset_subsys_id); + retval = container_path(css->container, buf, PAGE_SIZE); if (retval < 0) goto out_unlock; seq_puts(m, buf); seq_putc(m, '\n'); out_unlock: - mutex_unlock(&manage_mutex); + container_unlock(); put_task_struct(tsk); out_free: kfree(buf); @@ -2694,6 +1859,7 @@ .llseek = seq_lseek, .release = single_release, }; +#endif /* CONFIG_PROC_PID_CPUSET */ /* Display task cpus_allowed, mems_allowed in /proc//status file. */ char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) diff -Nurb linux-2.6.22-570/kernel/exit.c linux-2.6.22-591/kernel/exit.c --- linux-2.6.22-570/kernel/exit.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/exit.c 2007-12-21 15:36:12.000000000 -0500 @@ -31,7 +31,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -393,6 +394,11 @@ * they would be locked into memory. */ exit_mm(current); + /* + * We don't want to have TIF_FREEZE set if the system-wide hibernation + * or suspend transition begins right now. + */ + current->flags |= PF_NOFREEZE; set_special_pids(1, 1); proc_clear_tty(current); @@ -875,6 +881,34 @@ release_task(tsk); } +#ifdef CONFIG_DEBUG_STACK_USAGE +static void check_stack_usage(void) +{ + static DEFINE_SPINLOCK(low_water_lock); + static int lowest_to_date = THREAD_SIZE; + unsigned long *n = end_of_stack(current); + unsigned long free; + + while (*n == 0) + n++; + free = (unsigned long)n - (unsigned long)end_of_stack(current); + + if (free >= lowest_to_date) + return; + + spin_lock(&low_water_lock); + if (free < lowest_to_date) { + printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " + "left\n", + current->comm, free); + lowest_to_date = free; + } + spin_unlock(&low_water_lock); +} +#else +static inline void check_stack_usage(void) {} +#endif + fastcall NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; @@ -966,8 +1000,9 @@ exit_sem(tsk); __exit_files(tsk); __exit_fs(tsk); + check_stack_usage(); exit_thread(); - cpuset_exit(tsk); + container_exit(tsk, 1); exit_keys(tsk); if (group_dead && tsk->signal->leader) diff -Nurb linux-2.6.22-570/kernel/fork.c linux-2.6.22-591/kernel/fork.c --- linux-2.6.22-570/kernel/fork.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/fork.c 2007-12-21 15:36:15.000000000 -0500 @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -342,6 +342,8 @@ atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); + mm->flags = (current->mm) ? current->mm->flags + : MMF_DUMP_FILTER_DEFAULT; mm->core_waiters = 0; mm->nr_ptes = 0; __set_mm_counter(mm, file_rss, 0); @@ -936,7 +938,7 @@ { unsigned long new_flags = p->flags; - new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); + new_flags &= ~PF_SUPERPRIV; new_flags |= PF_FORKNOEXEC; if (!(clone_flags & CLONE_PTRACE)) p->ptrace = 0; @@ -977,6 +979,7 @@ { int retval; struct task_struct *p = NULL; + int container_callbacks_done = 0; struct vx_info *vxi; struct nx_info *nxi; @@ -1061,11 +1064,6 @@ delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); p->pid = pid_nr(pid); - retval = -EFAULT; - if (clone_flags & CLONE_PARENT_SETTID) - if (put_user(p->pid, parent_tidptr)) - goto bad_fork_cleanup_delays_binfmt; - INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; @@ -1095,17 +1093,19 @@ p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); + p->real_start_time = p->start_time; + monotonic_to_bootbased(&p->real_start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; - cpuset_fork(p); + container_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; - goto bad_fork_cleanup_cpuset; + goto bad_fork_cleanup_container; } mpol_fix_fork_child_flag(p); #endif @@ -1215,6 +1215,12 @@ /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); + /* Now that the task is set up, run container callbacks if + * necessary. We need to run them before the task is visible + * on the tasklist. */ + container_fork_callbacks(p); + container_callbacks_done = 1; + /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); @@ -1314,6 +1320,14 @@ if (nxi) claim_nx_info(nxi, p); write_unlock_irq(&tasklist_lock); + + /* + * Now that we know the fork has succeeded, record the new + * TID. It's too late to back out if this fails. + */ + if (clone_flags & CLONE_PARENT_SETTID) + put_user(p->pid, parent_tidptr); + proc_fork_connector(p); return p; @@ -1341,10 +1355,9 @@ bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); -bad_fork_cleanup_cpuset: +bad_fork_cleanup_container: #endif - cpuset_exit(p); -bad_fork_cleanup_delays_binfmt: + container_exit(p, container_callbacks_done); delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); @@ -1661,7 +1674,7 @@ err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) goto bad_unshare_out; if ((err = unshare_thread(unshare_flags))) diff -Nurb linux-2.6.22-570/kernel/kgdb.c linux-2.6.22-591/kernel/kgdb.c --- linux-2.6.22-570/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/kgdb.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,1866 @@ +/* + * kernel/kgdb.c + * + * Maintainer: Jason Wessel + * + * Copyright (C) 2000-2001 VERITAS Software Corporation. + * Copyright (C) 2002-2004 Timesys Corporation + * Copyright (C) 2003-2004 Amit S. Kale + * Copyright (C) 2004 Pavel Machek + * Copyright (C) 2004-2006 Tom Rini + * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. + * Copyright (C) 2005-2007 Wind River Systems, Inc. + * + * Contributors at various stages not listed above: + * Jason Wessel ( jason.wessel@windriver.com ) + * George Anzinger + * Anurekh Saxena (anurekh.saxena@timesys.com) + * Lake Stevens Instrument Division (Glenn Engel) + * Jim Kingdon, Cygnus Support. + * + * Original KGDB stub: David Grothe , + * Tigran Aivazian + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int pid_max; +/* How many times to count all of the waiting CPUs */ +#define ROUNDUP_WAIT 640000 /* Arbitrary, increase if needed. */ +#define BUF_THREAD_ID_SIZE 16 + +/* + * kgdb_initialized with a value of 1 indicates that kgdb is setup and is + * all ready to serve breakpoints and other kernel exceptions. A value of + * -1 indicates that we have tried to initialize early, and need to try + * again later. + */ +int kgdb_initialized; +/* Is a host GDB connected to us? */ +int kgdb_connected; +/* Could we be about to try and access a bad memory location? If so we + * also need to flag this has happend. */ +int kgdb_may_fault; +#ifdef CONFIG_PREEMPT +static int kgdb_fault_preempt_count; +#endif + +/* All the KGDB handlers are installed */ +int kgdb_from_module_registered = 0; +/* Guard for recursive entry */ +static int exception_level = 0; + +/* We provide a kgdb_io_ops structure that may be overriden. */ +struct kgdb_io __attribute__ ((weak)) kgdb_io_ops; + +static struct kgdb_io kgdb_io_ops_prev[MAX_KGDB_IO_HANDLERS]; +static int kgdb_io_handler_cnt = 0; + +/* Export the following symbols for use with kernel modules */ +EXPORT_SYMBOL(kgdb_io_ops); +EXPORT_SYMBOL(kgdb_tasklet_breakpoint); +EXPORT_SYMBOL(kgdb_connected); +EXPORT_SYMBOL(kgdb_register_io_module); +EXPORT_SYMBOL(kgdb_unregister_io_module); +EXPORT_SYMBOL(debugger_active); + +/* + * Holds information about breakpoints in a kernel. These breakpoints are + * added and removed by gdb. + */ +struct kgdb_bkpt kgdb_break[MAX_BREAKPOINTS]; + +struct kgdb_arch *kgdb_ops = &arch_kgdb_ops; + +static const char hexchars[] = "0123456789abcdef"; + +static spinlock_t slavecpulocks[NR_CPUS]; +static atomic_t procindebug[NR_CPUS]; +atomic_t kgdb_setting_breakpoint; +EXPORT_SYMBOL(kgdb_setting_breakpoint); +struct task_struct *kgdb_usethread, *kgdb_contthread; + +int debugger_step; +atomic_t debugger_active; + +/* Our I/O buffers. */ +static char remcom_in_buffer[BUFMAX]; +static char remcom_out_buffer[BUFMAX]; +/* Storage for the registers, in GDB format. */ +static unsigned long gdb_regs[(NUMREGBYTES + sizeof(unsigned long) - 1) / + sizeof(unsigned long)]; +/* Storage of registers for handling a fault. */ +unsigned long kgdb_fault_jmp_regs[NUMCRITREGBYTES / sizeof(unsigned long)] + JMP_REGS_ALIGNMENT; +static int kgdb_notify_reboot(struct notifier_block *this, + unsigned long code ,void *x); +struct debuggerinfo_struct { + void *debuggerinfo; + struct task_struct *task; +} kgdb_info[NR_CPUS]; + +/* to keep track of the CPU which is doing the single stepping*/ +atomic_t cpu_doing_single_step = ATOMIC_INIT(-1); + +atomic_t kgdb_sync_softlockup[NR_CPUS] = {ATOMIC_INIT(0)}; + +/* reboot notifier block */ +static struct notifier_block kgdb_reboot_notifier = { + .notifier_call = kgdb_notify_reboot, + .next = NULL, + .priority = INT_MAX, +}; + +int __attribute__ ((weak)) + kgdb_validate_break_address(unsigned long addr) +{ + int error = 0; + char tmp_variable[BREAK_INSTR_SIZE]; + error = kgdb_get_mem((char *)addr, tmp_variable, BREAK_INSTR_SIZE); + return error; +} + +int __attribute__ ((weak)) + kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) +{ + int error = 0; + if ((error = kgdb_get_mem((char *)addr, + saved_instr, BREAK_INSTR_SIZE)) < 0) + return error; + + if ((error = kgdb_set_mem((char *)addr, kgdb_ops->gdb_bpt_instr, + BREAK_INSTR_SIZE)) < 0) + return error; + return 0; +} + +int __attribute__ ((weak)) + kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) +{ + + int error = 0; + if ((error =kgdb_set_mem((char *)addr, (char *)bundle, + BREAK_INSTR_SIZE)) < 0) + return error; + return 0; +} + +unsigned long __attribute__ ((weak)) + kgdb_arch_pc(int exception, struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +static int hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +static void get_packet(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int count; + char ch; + if (!kgdb_io_ops.read_char) + return; + do { + /* Spin and wait around for the start character, ignore all + * other characters */ + while ((ch = (kgdb_io_ops.read_char())) != '$') ; + kgdb_connected = 1; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < (BUFMAX - 1)) { + ch = kgdb_io_ops.read_char(); + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(kgdb_io_ops.read_char()) << 4; + xmitcsum += hex(kgdb_io_ops.read_char()); + + if (checksum != xmitcsum) + /* failed checksum */ + kgdb_io_ops.write_char('-'); + else + /* successful transfer */ + kgdb_io_ops.write_char('+'); + if (kgdb_io_ops.flush) + kgdb_io_ops.flush(); + } + } while (checksum != xmitcsum); +} + +static void kgdb_set_may_fault(void) { + kgdb_may_fault = 1; +#ifdef CONFIG_PREEMPT + kgdb_fault_preempt_count = preempt_count(); +#endif +} + +static void kgdb_unset_may_fault(void) { + kgdb_may_fault = 0; +#ifdef CONFIG_PREEMPT + preempt_count() = kgdb_fault_preempt_count; +#endif +} + +/* + * Send the packet in buffer. + * Check for gdb connection if asked for. + */ +static void put_packet(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + if (!kgdb_io_ops.write_char) + return; + /* $#. */ + while (1) { + kgdb_io_ops.write_char('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + kgdb_io_ops.write_char(ch); + checksum += ch; + count++; + } + + kgdb_io_ops.write_char('#'); + kgdb_io_ops.write_char(hexchars[checksum >> 4]); + kgdb_io_ops.write_char(hexchars[checksum % 16]); + if (kgdb_io_ops.flush) + kgdb_io_ops.flush(); + + /* Now see what we get in reply. */ + ch = kgdb_io_ops.read_char(); + + if (ch == 3) + ch = kgdb_io_ops.read_char(); + + /* If we get an ACK, we are done. */ + if (ch == '+') + return; + + /* If we get the start of another packet, this means + * that GDB is attempting to reconnect. We will NAK + * the packet being sent, and stop trying to send this + * packet. */ + if (ch == '$') { + kgdb_io_ops.write_char('-'); + if (kgdb_io_ops.flush) + kgdb_io_ops.flush(); + return; + } + } +} + +/* + * convert the memory pointed to by mem into hex, placing result in buf + * return a pointer to the last char put in buf (null). May return an error. + */ +char *kgdb_mem2hex(char *mem, char *buf, int count) +{ + kgdb_set_may_fault(); + if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { + kgdb_unset_may_fault(); + return ERR_PTR(-EINVAL); + } + /* Accessing some registers in a single load instruction is + * required to avoid bad side effects for some I/O registers. + */ + if ((count == 2) && (((long)mem & 1) == 0)) { + unsigned short tmp_s = *(unsigned short *)mem; + mem += 2; +#ifdef __BIG_ENDIAN + *buf++ = hexchars[(tmp_s >> 12) & 0xf]; + *buf++ = hexchars[(tmp_s >> 8) & 0xf]; + *buf++ = hexchars[(tmp_s >> 4) & 0xf]; + *buf++ = hexchars[tmp_s & 0xf]; +#else + *buf++ = hexchars[(tmp_s >> 4) & 0xf]; + *buf++ = hexchars[tmp_s & 0xf]; + *buf++ = hexchars[(tmp_s >> 12) & 0xf]; + *buf++ = hexchars[(tmp_s >> 8) & 0xf]; +#endif + } else if ((count == 4) && (((long)mem & 3) == 0)) { + unsigned long tmp_l = *(unsigned int *)mem; + mem += 4; +#ifdef __BIG_ENDIAN + *buf++ = hexchars[(tmp_l >> 28) & 0xf]; + *buf++ = hexchars[(tmp_l >> 24) & 0xf]; + *buf++ = hexchars[(tmp_l >> 20) & 0xf]; + *buf++ = hexchars[(tmp_l >> 16) & 0xf]; + *buf++ = hexchars[(tmp_l >> 12) & 0xf]; + *buf++ = hexchars[(tmp_l >> 8) & 0xf]; + *buf++ = hexchars[(tmp_l >> 4) & 0xf]; + *buf++ = hexchars[tmp_l & 0xf]; +#else + *buf++ = hexchars[(tmp_l >> 4) & 0xf]; + *buf++ = hexchars[tmp_l & 0xf]; + *buf++ = hexchars[(tmp_l >> 12) & 0xf]; + *buf++ = hexchars[(tmp_l >> 8) & 0xf]; + *buf++ = hexchars[(tmp_l >> 20) & 0xf]; + *buf++ = hexchars[(tmp_l >> 16) & 0xf]; + *buf++ = hexchars[(tmp_l >> 28) & 0xf]; + *buf++ = hexchars[(tmp_l >> 24) & 0xf]; +#endif +#ifdef CONFIG_64BIT + } else if ((count == 8) && (((long)mem & 7) == 0)) { + unsigned long long tmp_ll = *(unsigned long long *)mem; + mem += 8; +#ifdef __BIG_ENDIAN + *buf++ = hexchars[(tmp_ll >> 60) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 56) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 52) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 48) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 44) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 40) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 36) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 32) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 28) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 24) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 20) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 16) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 12) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 8) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 4) & 0xf]; + *buf++ = hexchars[tmp_ll & 0xf]; +#else + *buf++ = hexchars[(tmp_ll >> 4) & 0xf]; + *buf++ = hexchars[tmp_ll & 0xf]; + *buf++ = hexchars[(tmp_ll >> 12) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 8) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 20) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 16) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 28) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 24) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 36) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 32) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 44) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 40) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 52) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 48) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 60) & 0xf]; + *buf++ = hexchars[(tmp_ll >> 56) & 0xf]; +#endif +#endif + } else { + while (count-- > 0) { + unsigned char ch = *mem++; + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch & 0xf]; + } + } + kgdb_unset_may_fault(); + *buf = 0; + return (buf); +} + +/* + * Copy the binary array pointed to by buf into mem. Fix $, #, and + * 0x7d escaped with 0x7d. Return a pointer to the character after + * the last byte written. + */ +static char *kgdb_ebin2mem(char *buf, char *mem, int count) +{ + kgdb_set_may_fault(); + if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { + kgdb_unset_may_fault(); + return ERR_PTR(-EINVAL); + } + for (; count > 0; count--, buf++) { + if (*buf == 0x7d) + *mem++ = *(++buf) ^ 0x20; + else + *mem++ = *buf; + } + kgdb_unset_may_fault(); + return mem; +} + +/* + * convert the hex array pointed to by buf into binary to be placed in mem + * return a pointer to the character AFTER the last byte written + * May return an error. + */ +char *kgdb_hex2mem(char *buf, char *mem, int count) +{ + kgdb_set_may_fault(); + if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { + kgdb_unset_may_fault(); + return ERR_PTR(-EINVAL); + } + if ((count == 2) && (((long)mem & 1) == 0)) { + unsigned short tmp_s = 0; +#ifdef __BIG_ENDIAN + tmp_s |= hex(*buf++) << 12; + tmp_s |= hex(*buf++) << 8; + tmp_s |= hex(*buf++) << 4; + tmp_s |= hex(*buf++); +#else + tmp_s |= hex(*buf++) << 4; + tmp_s |= hex(*buf++); + tmp_s |= hex(*buf++) << 12; + tmp_s |= hex(*buf++) << 8; +#endif + *(unsigned short *)mem = tmp_s; + mem += 2; + } else if ((count == 4) && (((long)mem & 3) == 0)) { + unsigned long tmp_l = 0; +#ifdef __BIG_ENDIAN + tmp_l |= hex(*buf++) << 28; + tmp_l |= hex(*buf++) << 24; + tmp_l |= hex(*buf++) << 20; + tmp_l |= hex(*buf++) << 16; + tmp_l |= hex(*buf++) << 12; + tmp_l |= hex(*buf++) << 8; + tmp_l |= hex(*buf++) << 4; + tmp_l |= hex(*buf++); +#else + tmp_l |= hex(*buf++) << 4; + tmp_l |= hex(*buf++); + tmp_l |= hex(*buf++) << 12; + tmp_l |= hex(*buf++) << 8; + tmp_l |= hex(*buf++) << 20; + tmp_l |= hex(*buf++) << 16; + tmp_l |= hex(*buf++) << 28; + tmp_l |= hex(*buf++) << 24; +#endif + *(unsigned long *)mem = tmp_l; + mem += 4; + } else { + int i; + for (i = 0; i < count; i++) { + unsigned char ch = hex(*buf++) << 4; + ch |= hex(*buf++); + *mem++ = ch; + } + } + kgdb_unset_may_fault(); + return (mem); +} + +/* + * While we find nice hex chars, build a long_val. + * Return number of chars processed. + */ +int kgdb_hex2long(char **ptr, long *long_val) +{ + int hex_val, num = 0; + + *long_val = 0; + + while (**ptr) { + hex_val = hex(**ptr); + if (hex_val >= 0) { + *long_val = (*long_val << 4) | hex_val; + num++; + } else + break; + + (*ptr)++; + } + + return (num); +} + +/* Write memory due to an 'M' or 'X' packet. */ +static char *write_mem_msg(int binary) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr, length; + + if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' && + kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') { + if (binary) + ptr = kgdb_ebin2mem(ptr, (char *)addr, length); + else + ptr = kgdb_hex2mem(ptr, (char *)addr, length); + if (CACHE_FLUSH_IS_SAFE) + flush_icache_range(addr, addr + length + 1); + if (IS_ERR(ptr)) + return ptr; + return NULL; + } + + return ERR_PTR(-EINVAL); +} + +static inline char *pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +static inline void error_packet(char *pkt, int error) +{ + error = -error; + pkt[0] = 'E'; + pkt[1] = hexchars[(error / 10)]; + pkt[2] = hexchars[(error % 10)]; + pkt[3] = '\0'; +} + +static char *pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *)id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + + return pkt; +} + +void int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + int i = 4; + + scan = (unsigned char *)id; + while (i--) + *scan++ = 0; + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} + +static struct task_struct *getthread(struct pt_regs *regs, int tid) +{ + if (init_pid_ns.last_pid == 0) + return current; + + if (num_online_cpus() && + (tid >= pid_max + num_online_cpus() + kgdb_ops->shadowth)) + return NULL; + + if (kgdb_ops->shadowth && (tid >= pid_max + num_online_cpus())) + return kgdb_get_shadow_thread(regs, tid - pid_max - + num_online_cpus()); + + if (tid >= pid_max) + return idle_task(tid - pid_max); + + if (!tid) + return NULL; + + return find_task_by_pid(tid); +} + +#ifdef CONFIG_SMP +static void kgdb_wait(struct pt_regs *regs) +{ + unsigned long flags; + int processor; + + local_irq_save(flags); + processor = raw_smp_processor_id(); + kgdb_info[processor].debuggerinfo = regs; + kgdb_info[processor].task = current; + atomic_set(&procindebug[processor], 1); + atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1); + + /* Wait till master processor goes completely into the debugger. + * FIXME: this looks racy */ + while (!atomic_read(&procindebug[atomic_read(&debugger_active) - 1])) { + int i = 10; /* an arbitrary number */ + + while (--i) + cpu_relax(); + } + + /* Wait till master processor is done with debugging */ + spin_lock(&slavecpulocks[processor]); + + kgdb_info[processor].debuggerinfo = NULL; + kgdb_info[processor].task = NULL; + + /* fix up hardware debug registers on local cpu */ + if (kgdb_ops->correct_hw_break) + kgdb_ops->correct_hw_break(); + /* Signal the master processor that we are done */ + atomic_set(&procindebug[processor], 0); + spin_unlock(&slavecpulocks[processor]); + local_irq_restore(flags); +} +#endif + +int kgdb_get_mem(char *addr, unsigned char *buf, int count) +{ + kgdb_set_may_fault(); + if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { + kgdb_unset_may_fault(); + return -EINVAL; + } + while (count) { + if ((unsigned long)addr < TASK_SIZE) { + kgdb_unset_may_fault(); + return -EINVAL; + } + *buf++ = *addr++; + count--; + } + kgdb_unset_may_fault(); + return 0; +} + +int kgdb_set_mem(char *addr, unsigned char *buf, int count) +{ + kgdb_set_may_fault(); + if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { + kgdb_unset_may_fault(); + return -EINVAL; + } + while (count) { + if ((unsigned long)addr < TASK_SIZE) { + kgdb_unset_may_fault(); + return -EINVAL; + } + *addr++ = *buf++; + count--; + } + kgdb_unset_may_fault(); + return 0; +} +int kgdb_activate_sw_breakpoints(void) +{ + int i; + int error = 0; + unsigned long addr; + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state != bp_set) + continue; + addr = kgdb_break[i].bpt_addr; + if ((error = kgdb_arch_set_breakpoint(addr, + kgdb_break[i].saved_instr))) + return error; + + if (CACHE_FLUSH_IS_SAFE) { + if (current->mm && addr < TASK_SIZE) + flush_cache_range(current->mm->mmap_cache, + addr, addr + BREAK_INSTR_SIZE); + else + flush_icache_range(addr, addr + + BREAK_INSTR_SIZE); + } + + kgdb_break[i].state = bp_active; + } + return 0; +} + +static int kgdb_set_sw_break(unsigned long addr) +{ + int i, breakno = -1; + int error = 0; + if ((error = kgdb_validate_break_address(addr)) < 0) + return error; + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if ((kgdb_break[i].state == bp_set) && + (kgdb_break[i].bpt_addr == addr)) + return -EEXIST; + } + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state == bp_removed && + kgdb_break[i].bpt_addr == addr) { + breakno = i; + break; + } + } + + if (breakno == -1) { + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state == bp_none) { + breakno = i; + break; + } + } + } + if (breakno == -1) + return -E2BIG; + + kgdb_break[breakno].state = bp_set; + kgdb_break[breakno].type = bp_breakpoint; + kgdb_break[breakno].bpt_addr = addr; + + return 0; +} + +int kgdb_deactivate_sw_breakpoints(void) +{ + int i; + int error = 0; + unsigned long addr; + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state != bp_active) + continue; + addr = kgdb_break[i].bpt_addr; + if ((error = kgdb_arch_remove_breakpoint(addr, + kgdb_break[i].saved_instr))) + return error; + + if (CACHE_FLUSH_IS_SAFE && current->mm && + addr < TASK_SIZE) + flush_cache_range(current->mm->mmap_cache, + addr, addr + BREAK_INSTR_SIZE); + else if (CACHE_FLUSH_IS_SAFE) + flush_icache_range(addr, + addr + BREAK_INSTR_SIZE); + kgdb_break[i].state = bp_set; + } + return 0; +} + +static int kgdb_remove_sw_break(unsigned long addr) +{ + int i; + + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if ((kgdb_break[i].state == bp_set) && + (kgdb_break[i].bpt_addr == addr)) { + kgdb_break[i].state = bp_removed; + return 0; + } + } + return -ENOENT; +} + +int kgdb_isremovedbreak(unsigned long addr) +{ + int i; + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if ((kgdb_break[i].state == bp_removed) && + (kgdb_break[i].bpt_addr == addr)) { + return 1; + } + } + return 0; +} + +int remove_all_break(void) +{ + int i; + int error; + unsigned long addr; + + /* Clear memory breakpoints. */ + for (i = 0; i < MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state != bp_set) + continue; + addr = kgdb_break[i].bpt_addr; + if ((error = kgdb_arch_remove_breakpoint(addr, + kgdb_break[i].saved_instr))) + return error; + kgdb_break[i].state = bp_removed; + } + + /* Clear hardware breakpoints. */ + if (kgdb_ops->remove_all_hw_break) + kgdb_ops->remove_all_hw_break(); + + return 0; +} + +static inline int shadow_pid(int realpid) +{ + if (realpid) { + return realpid; + } + return pid_max + raw_smp_processor_id(); +} + +static char gdbmsgbuf[BUFMAX + 1]; +static void kgdb_msg_write(const char *s, int len) +{ + int i; + int wcount; + char *bufptr; + + /* 'O'utput */ + gdbmsgbuf[0] = 'O'; + + /* Fill and send buffers... */ + while (len > 0) { + bufptr = gdbmsgbuf + 1; + + /* Calculate how many this time */ + if ((len << 1) > (BUFMAX - 2)) + wcount = (BUFMAX - 2) >> 1; + else + wcount = len; + + /* Pack in hex chars */ + for (i = 0; i < wcount; i++) + bufptr = pack_hex_byte(bufptr, s[i]); + *bufptr = '\0'; + + /* Move up */ + s += wcount; + len -= wcount; + + /* Write packet */ + put_packet(gdbmsgbuf); + } +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * Locking hierarchy: + * interface locks, if any (begin_session) + * kgdb lock (debugger_active) + * + * Note that since we can be in here prior to our cpumask being filled + * out, we err on the side of caution and loop over NR_CPUS instead + * of a for_each_online_cpu. + * + */ +int kgdb_handle_exception(int ex_vector, int signo, int err_code, + struct pt_regs *linux_regs) +{ + unsigned long length, addr; + char *ptr; + unsigned long flags; + unsigned i; + long threadid; + threadref thref; + struct task_struct *thread = NULL; + unsigned procid; + int numshadowth = num_online_cpus() + kgdb_ops->shadowth; + long kgdb_usethreadid = 0; + int error = 0, all_cpus_synced = 0; + struct pt_regs *shadowregs; + int processor = raw_smp_processor_id(); + void *local_debuggerinfo; + + /* Panic on recursive debugger calls. */ + if (atomic_read(&debugger_active) == raw_smp_processor_id() + 1) { + exception_level++; + addr = kgdb_arch_pc(ex_vector, linux_regs); + kgdb_deactivate_sw_breakpoints(); + if (kgdb_remove_sw_break(addr) == 0) { + /* If the break point removed ok at the place exception + * occurred, try to recover and print a warning to the end + * user because the user planted a breakpoint in a place + * that KGDB needs in order to function. + */ + exception_level = 0; + kgdb_skipexception(ex_vector, linux_regs); + kgdb_activate_sw_breakpoints(); + printk(KERN_CRIT "KGDB: re-enter exception: breakpoint removed\n"); + WARN_ON(1); + return 0; + } + remove_all_break(); + kgdb_skipexception(ex_vector, linux_regs); + if (exception_level > 1) + panic("Recursive entry to debugger"); + + printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints removed\n"); + panic("Recursive entry to debugger"); + return 0; + } + + acquirelock: + + /* + * Interrupts will be restored by the 'trap return' code, except when + * single stepping. + */ + local_irq_save(flags); + + /* Hold debugger_active */ + procid = raw_smp_processor_id(); + + while (cmpxchg(&atomic_read(&debugger_active), 0, (procid + 1)) != 0) { + int i = 25; /* an arbitrary number */ + + while (--i) + cpu_relax(); + + if (atomic_read(&cpu_doing_single_step) != -1 && + atomic_read(&cpu_doing_single_step) != procid) + udelay(1); + } + + atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1); + + /* + * Don't enter if the last instance of the exception handler wanted to + * come into the debugger again. + */ + if (atomic_read(&cpu_doing_single_step) != -1 && + atomic_read(&cpu_doing_single_step) != procid) { + atomic_set(&debugger_active, 0); + local_irq_restore(flags); + goto acquirelock; + } + + /* + * Don't enter if we have hit a removed breakpoint. + */ + if (kgdb_skipexception(ex_vector, linux_regs)) + goto kgdb_restore; + + /* + * Call the I/O drivers pre_exception routine + * if the I/O driver defined one + */ + if (kgdb_io_ops.pre_exception) + kgdb_io_ops.pre_exception(); + + kgdb_info[processor].debuggerinfo = linux_regs; + kgdb_info[processor].task = current; + + kgdb_disable_hw_debug(linux_regs); + + if (!debugger_step || !kgdb_contthread) + for (i = 0; i < NR_CPUS; i++) + spin_lock(&slavecpulocks[i]); + +#ifdef CONFIG_SMP + /* Make sure we get the other CPUs */ + if (!debugger_step || !kgdb_contthread) + kgdb_roundup_cpus(flags); +#endif + + /* spin_lock code is good enough as a barrier so we don't + * need one here */ + atomic_set(&procindebug[processor], 1); + + /* Wait a reasonable time for the other CPUs to be notified and + * be waiting for us. Very early on this could be imperfect + * as num_online_cpus() could be 0.*/ + for (i = 0; i < ROUNDUP_WAIT; i++) { + int cpu, num = 0; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (atomic_read(&procindebug[cpu])) + num++; + } + if (num >= num_online_cpus()) { + all_cpus_synced = 1; + break; + } + } + + /* Clear the out buffer. */ + memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); + + /* Master processor is completely in the debugger */ + kgdb_post_master_code(linux_regs, ex_vector, err_code); + kgdb_deactivate_sw_breakpoints(); + debugger_step = 0; + kgdb_contthread = NULL; + exception_level = 0; + + if (kgdb_connected) { + /* If we're still unable to roundup all of the CPUs, + * send an 'O' packet informing the user again. */ + if (!all_cpus_synced) + kgdb_msg_write("Not all CPUs have been synced for " + "KGDB\n", 39); + /* Reply to host that an exception has occurred */ + ptr = remcom_out_buffer; + *ptr++ = 'T'; + *ptr++ = hexchars[(signo >> 4) % 16]; + *ptr++ = hexchars[signo % 16]; + ptr += strlen(strcpy(ptr, "thread:")); + int_to_threadref(&thref, shadow_pid(current->pid)); + ptr = pack_threadid(ptr, &thref); + *ptr++ = ';'; + + put_packet(remcom_out_buffer); + } + + kgdb_usethread = kgdb_info[processor].task; + kgdb_usethreadid = shadow_pid(kgdb_info[processor].task->pid); + + while (kgdb_io_ops.read_char) { + char *bpt_type; + error = 0; + + /* Clear the out buffer. */ + memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); + + get_packet(remcom_in_buffer); + + switch (remcom_in_buffer[0]) { + case '?': + /* We know that this packet is only sent + * during initial connect. So to be safe, + * we clear out our breakpoints now incase + * GDB is reconnecting. */ + remove_all_break(); + /* Also, if we haven't been able to roundup all + * CPUs, send an 'O' packet informing the user + * as much. Only need to do this once. */ + if (!all_cpus_synced) + kgdb_msg_write("Not all CPUs have been " + "synced for KGDB\n", 39); + remcom_out_buffer[0] = 'S'; + remcom_out_buffer[1] = hexchars[signo >> 4]; + remcom_out_buffer[2] = hexchars[signo % 16]; + break; + + case 'g': /* return the value of the CPU registers */ + thread = kgdb_usethread; + + if (!thread) { + thread = kgdb_info[processor].task; + local_debuggerinfo = + kgdb_info[processor].debuggerinfo; + } else { + local_debuggerinfo = NULL; + for (i = 0; i < NR_CPUS; i++) { + /* Try to find the task on some other + * or possibly this node if we do not + * find the matching task then we try + * to approximate the results. + */ + if (thread == kgdb_info[i].task) + local_debuggerinfo = + kgdb_info[i].debuggerinfo; + } + } + + /* All threads that don't have debuggerinfo should be + * in __schedule() sleeping, since all other CPUs + * are in kgdb_wait, and thus have debuggerinfo. */ + if (kgdb_ops->shadowth && + kgdb_usethreadid >= pid_max + num_online_cpus()) { + shadowregs = kgdb_shadow_regs(linux_regs, + kgdb_usethreadid - + pid_max - + num_online_cpus + ()); + if (!shadowregs) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + regs_to_gdb_regs(gdb_regs, shadowregs); + } else if (local_debuggerinfo) + regs_to_gdb_regs(gdb_regs, local_debuggerinfo); + else { + /* Pull stuff saved during + * switch_to; nothing else is + * accessible (or even particularly relevant). + * This should be enough for a stack trace. */ + sleeping_thread_to_gdb_regs(gdb_regs, thread); + } + kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, + NUMREGBYTES); + break; + + /* set the value of the CPU registers - return OK */ + case 'G': + kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, + NUMREGBYTES); + + if (kgdb_usethread && kgdb_usethread != current) + error_packet(remcom_out_buffer, -EINVAL); + else { + gdb_regs_to_regs(gdb_regs, linux_regs); + strcpy(remcom_out_buffer, "OK"); + } + break; + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' && + kgdb_hex2long(&ptr, &length) > 0) { + if (IS_ERR(ptr = kgdb_mem2hex((char *)addr, + remcom_out_buffer, + length))) + error_packet(remcom_out_buffer, + PTR_ERR(ptr)); + } else + error_packet(remcom_out_buffer, -EINVAL); + break; + + /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */ + case 'M': + if (IS_ERR(ptr = write_mem_msg(0))) + error_packet(remcom_out_buffer, PTR_ERR(ptr)); + else + strcpy(remcom_out_buffer, "OK"); + break; + /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */ + case 'X': + if (IS_ERR(ptr = write_mem_msg(1))) + error_packet(remcom_out_buffer, PTR_ERR(ptr)); + else + strcpy(remcom_out_buffer, "OK"); + break; + + /* kill or detach. KGDB should treat this like a + * continue. + */ + case 'D': + if ((error = remove_all_break()) < 0) { + error_packet(remcom_out_buffer, error); + } else { + strcpy(remcom_out_buffer, "OK"); + kgdb_connected = 0; + } + put_packet(remcom_out_buffer); + goto default_handle; + + case 'k': + /* Don't care about error from remove_all_break */ + remove_all_break(); + kgdb_connected = 0; + goto default_handle; + + /* Reboot */ + case 'R': + /* For now, only honor R0 */ + if (strcmp(remcom_in_buffer, "R0") == 0) { + printk(KERN_CRIT "Executing reboot\n"); + strcpy(remcom_out_buffer, "OK"); + put_packet(remcom_out_buffer); + emergency_sync(); + /* Execution should not return from + * machine_restart() + */ + machine_restart(NULL); + kgdb_connected = 0; + goto default_handle; + } + + /* query */ + case 'q': + switch (remcom_in_buffer[1]) { + case 's': + case 'f': + if (memcmp(remcom_in_buffer + 2, "ThreadInfo", + 10)) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + + /* + * If we have not yet completed in + * pidhash_init() there isn't much we + * can give back. + */ + if (init_pid_ns.last_pid == 0) { + if (remcom_in_buffer[1] == 'f') + strcpy(remcom_out_buffer, + "m0000000000000001"); + break; + } + + if (remcom_in_buffer[1] == 'f') { + threadid = 1; + } + remcom_out_buffer[0] = 'm'; + ptr = remcom_out_buffer + 1; + for (i = 0; i < 17 && threadid < pid_max + + numshadowth; threadid++) { + thread = getthread(linux_regs, + threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(ptr, &thref); + ptr += 16; + *(ptr++) = ','; + i++; + } + } + *(--ptr) = '\0'; + break; + + case 'C': + /* Current thread id */ + strcpy(remcom_out_buffer, "QC"); + + threadid = shadow_pid(current->pid); + + int_to_threadref(&thref, threadid); + pack_threadid(remcom_out_buffer + 2, &thref); + break; + case 'T': + if (memcmp(remcom_in_buffer + 1, + "ThreadExtraInfo,", 16)) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + threadid = 0; + ptr = remcom_in_buffer + 17; + kgdb_hex2long(&ptr, &threadid); + if (!getthread(linux_regs, threadid)) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + if (threadid < pid_max) { + kgdb_mem2hex(getthread(linux_regs, + threadid)->comm, + remcom_out_buffer, 16); + } else if (threadid >= pid_max + + num_online_cpus()) { + kgdb_shadowinfo(linux_regs, + remcom_out_buffer, + threadid - pid_max - + num_online_cpus()); + } else { + static char tmpstr[23 + + BUF_THREAD_ID_SIZE]; + sprintf(tmpstr, "Shadow task %d" + " for pid 0", + (int)(threadid - pid_max)); + kgdb_mem2hex(tmpstr, remcom_out_buffer, + strlen(tmpstr)); + } + break; + } + break; + + /* task related */ + case 'H': + switch (remcom_in_buffer[1]) { + case 'g': + ptr = &remcom_in_buffer[2]; + kgdb_hex2long(&ptr, &threadid); + thread = getthread(linux_regs, threadid); + if (!thread && threadid > 0) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + kgdb_usethread = thread; + kgdb_usethreadid = threadid; + strcpy(remcom_out_buffer, "OK"); + break; + + case 'c': + ptr = &remcom_in_buffer[2]; + kgdb_hex2long(&ptr, &threadid); + if (!threadid) { + kgdb_contthread = NULL; + } else { + thread = getthread(linux_regs, + threadid); + if (!thread && threadid > 0) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + kgdb_contthread = thread; + } + strcpy(remcom_out_buffer, "OK"); + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcom_in_buffer[1]; + kgdb_hex2long(&ptr, &threadid); + thread = getthread(linux_regs, threadid); + if (thread) + strcpy(remcom_out_buffer, "OK"); + else + error_packet(remcom_out_buffer, -EINVAL); + break; + /* Since GDB-5.3, it's been drafted that '0' is a software + * breakpoint, '1' is a hardware breakpoint, so let's do + * that. + */ + case 'z': + case 'Z': + bpt_type = &remcom_in_buffer[1]; + ptr = &remcom_in_buffer[2]; + + if (kgdb_ops->set_hw_breakpoint && *bpt_type >= '1') { + /* Unsupported */ + if (*bpt_type > '4') + break; + } else if (*bpt_type != '0' && *bpt_type != '1') + /* Unsupported. */ + break; + /* Test if this is a hardware breakpoint, and + * if we support it. */ + if (*bpt_type == '1' && + !(kgdb_ops->flags & KGDB_HW_BREAKPOINT)) + /* Unsupported. */ + break; + + if (*(ptr++) != ',') { + error_packet(remcom_out_buffer, -EINVAL); + break; + } else if (kgdb_hex2long(&ptr, &addr)) { + if (*(ptr++) != ',' || + !kgdb_hex2long(&ptr, &length)) { + error_packet(remcom_out_buffer, + -EINVAL); + break; + } + } else { + error_packet(remcom_out_buffer, -EINVAL); + break; + } + + if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0') + error = kgdb_set_sw_break(addr); + else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0') + error = kgdb_remove_sw_break(addr); + else if (remcom_in_buffer[0] == 'Z') + error = kgdb_ops->set_hw_breakpoint(addr, + (int)length, + *bpt_type); + else if (remcom_in_buffer[0] == 'z') + error = kgdb_ops->remove_hw_breakpoint(addr, + (int) + length, + *bpt_type); + + if (error == 0) + strcpy(remcom_out_buffer, "OK"); + else + error_packet(remcom_out_buffer, error); + + break; + case 'c': + case 's': + if (kgdb_contthread && kgdb_contthread != current) { + /* Can't switch threads in kgdb */ + error_packet(remcom_out_buffer, -EINVAL); + break; + } + kgdb_activate_sw_breakpoints(); + /* Followthrough to default processing */ + default: + default_handle: + error = kgdb_arch_handle_exception(ex_vector, signo, + err_code, + remcom_in_buffer, + remcom_out_buffer, + linux_regs); + + if (error >= 0 || remcom_in_buffer[0] == 'D' || + remcom_in_buffer[0] == 'k') + goto kgdb_exit; + + } /* switch */ + + /* reply to the request */ + put_packet(remcom_out_buffer); + } + + kgdb_exit: + /* + * Call the I/O driver's post_exception routine + * if the I/O driver defined one. + */ + if (kgdb_io_ops.post_exception) + kgdb_io_ops.post_exception(); + + kgdb_info[processor].debuggerinfo = NULL; + kgdb_info[processor].task = NULL; + atomic_set(&procindebug[processor], 0); + + if (!debugger_step || !kgdb_contthread) { + for (i = 0; i < NR_CPUS; i++) + spin_unlock(&slavecpulocks[i]); + /* Wait till all the processors have quit + * from the debugger. */ + for (i = 0; i < NR_CPUS; i++) { + while (atomic_read(&procindebug[i])) { + int j = 10; /* an arbitrary number */ + + while (--j) + cpu_relax(); + } + } + } + +#ifdef CONFIG_SMP + /* This delay has a real purpose. The problem is that if you + * are single-stepping, you are sending an NMI to all the + * other processors to stop them. Interrupts come in, but + * don't get handled. Then you let them go just long enough + * to get into their interrupt routines and use up some stack. + * You stop them again, and then do the same thing. After a + * while you blow the stack on the other processors. This + * delay gives some time for interrupts to be cleared out on + * the other processors. + */ + if (debugger_step) + mdelay(2); +#endif + kgdb_restore: + /* Free debugger_active */ + atomic_set(&debugger_active, 0); + local_irq_restore(flags); + + return error; +} + +/* + * GDB places a breakpoint at this function to know dynamically + * loaded objects. It's not defined static so that only one instance with this + * name exists in the kernel. + */ + +int module_event(struct notifier_block *self, unsigned long val, void *data) +{ + return 0; +} + +static struct notifier_block kgdb_module_load_nb = { + .notifier_call = module_event, +}; + +void kgdb_nmihook(int cpu, void *regs) +{ +#ifdef CONFIG_SMP + if (!atomic_read(&procindebug[cpu]) && atomic_read(&debugger_active) != (cpu + 1)) + kgdb_wait((struct pt_regs *)regs); +#endif +} + +/* + * This is called when a panic happens. All we need to do is + * breakpoint(). + */ +static int kgdb_panic_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + breakpoint(); + + return 0; +} + +static struct notifier_block kgdb_panic_notifier = { + .notifier_call = kgdb_panic_notify, +}; + +/* + * Initialization that needs to be done in either of our entry points. + */ +static void __init kgdb_internal_init(void) +{ + int i; + + /* Initialize our spinlocks. */ + for (i = 0; i < NR_CPUS; i++) + spin_lock_init(&slavecpulocks[i]); + + for (i = 0; i < MAX_BREAKPOINTS; i++) + kgdb_break[i].state = bp_none; + + /* Initialize the I/O handles */ + memset(&kgdb_io_ops_prev, 0, sizeof(kgdb_io_ops_prev)); + + /* We can't do much if this fails */ + register_module_notifier(&kgdb_module_load_nb); + + kgdb_initialized = 1; +} + +static void kgdb_register_for_panic(void) +{ + /* Register for panics(). */ + /* The registration is done in the kgdb_register_for_panic + * routine because KGDB should not try to handle a panic when + * there are no kgdb_io_ops setup. It is assumed that the + * kgdb_io_ops are setup at the time this method is called. + */ + if (!kgdb_from_module_registered) { + atomic_notifier_chain_register(&panic_notifier_list, + &kgdb_panic_notifier); + kgdb_from_module_registered = 1; + } +} + +static void kgdb_unregister_for_panic(void) +{ + /* When this routine is called KGDB should unregister from the + * panic handler and clean up, making sure it is not handling any + * break exceptions at the time. + */ + if (kgdb_from_module_registered) { + kgdb_from_module_registered = 0; + atomic_notifier_chain_unregister(&panic_notifier_list, + &kgdb_panic_notifier); + } +} + +int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops) +{ + + if (kgdb_connected) { + printk(KERN_ERR "kgdb: Cannot load I/O module while KGDB " + "connected.\n"); + return -EINVAL; + } + + /* Save the old values so they can be restored */ + if (kgdb_io_handler_cnt >= MAX_KGDB_IO_HANDLERS) { + printk(KERN_ERR "kgdb: No more I/O handles available.\n"); + return -EINVAL; + } + + /* Check to see if there is an existing driver and if so save its + * values. Also check to make sure the same driver was not trying + * to re-register. + */ + if (kgdb_io_ops.read_char != NULL && + kgdb_io_ops.read_char != local_kgdb_io_ops->read_char) { + memcpy(&kgdb_io_ops_prev[kgdb_io_handler_cnt], + &kgdb_io_ops, sizeof(struct kgdb_io)); + kgdb_io_handler_cnt++; + } + + /* Initialize the io values for this module */ + memcpy(&kgdb_io_ops, local_kgdb_io_ops, sizeof(struct kgdb_io)); + + /* Make the call to register kgdb if is not initialized */ + kgdb_register_for_panic(); + + return 0; +} + +void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops) +{ + int i; + + /* Unregister KGDB if there were no other prior io hooks, else + * restore the io hooks. + */ + if (kgdb_io_handler_cnt > 0 && kgdb_io_ops_prev[0].read_char != NULL) { + /* First check if the hook that is in use is the one being + * removed */ + if (kgdb_io_ops.read_char == local_kgdb_io_ops->read_char) { + /* Set 'i' to the value of where the list should be + * shifed */ + i = kgdb_io_handler_cnt - 1; + memcpy(&kgdb_io_ops, &kgdb_io_ops_prev[i], + sizeof(struct kgdb_io)); + } else { + /* Simple case to remove an entry for an I/O handler + * that is not in use */ + for (i = 0; i < kgdb_io_handler_cnt; i++) { + if (kgdb_io_ops_prev[i].read_char == + local_kgdb_io_ops->read_char) + break; + } + } + + /* Shift all the entries in the handler array so it is + * ordered from oldest to newest. + */ + kgdb_io_handler_cnt--; + for (; i < kgdb_io_handler_cnt; i++) { + memcpy(&kgdb_io_ops_prev[i], &kgdb_io_ops_prev[i + 1], + sizeof(struct kgdb_io)); + } + /* Handle the case if we are on the last element and set it + * to NULL; */ + memset(&kgdb_io_ops_prev[kgdb_io_handler_cnt], 0, + sizeof(struct kgdb_io)); + + if (kgdb_connected) + printk(KERN_ERR "kgdb: WARNING: I/O method changed " + "while kgdb was connected state.\n"); + } else { + /* KGDB is no longer able to communicate out, so + * unregister our hooks and reset state. */ + kgdb_unregister_for_panic(); + if (kgdb_connected) { + printk(KERN_CRIT "kgdb: I/O module was unloaded while " + "a debugging session was running. " + "KGDB will be reset.\n"); + if (remove_all_break() < 0) + printk(KERN_CRIT "kgdb: Reset failed.\n"); + kgdb_connected = 0; + } + memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io)); + } +} + +/* + * There are times we need to call a tasklet to cause a breakpoint + * as calling breakpoint() at that point might be fatal. We have to + * check that the exception stack is setup, as tasklets may be scheduled + * prior to this. When that happens, it is up to the architecture to + * schedule this when it is safe to run. + */ +static void kgdb_tasklet_bpt(unsigned long ing) +{ + if (CHECK_EXCEPTION_STACK()) + breakpoint(); +} + +DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0); + +/* + * This function can be called very early, either via early_param() or + * an explicit breakpoint() early on. + */ +static void __init kgdb_early_entry(void) +{ + /* Let the architecture do any setup that it needs to. */ + kgdb_arch_init(); + + /* + * Don't try and do anything until the architecture is able to + * setup the exception stack. In this case, it is up to the + * architecture to hook in and look at us when they are ready. + */ + + if (!CHECK_EXCEPTION_STACK()) { + kgdb_initialized = -1; + /* any kind of break point is deferred to late_init */ + return; + } + + /* Now try the I/O. */ + /* For early entry kgdb_io_ops.init must be defined */ + if (!kgdb_io_ops.init || kgdb_io_ops.init()) { + /* Try again later. */ + kgdb_initialized = -1; + return; + } + + /* Finish up. */ + kgdb_internal_init(); + + /* KGDB can assume that if kgdb_io_ops.init was defined that the + * panic registion should be performed at this time. This means + * kgdb_io_ops.init did not come from a kernel module and was + * initialized statically by a built in. + */ + if (kgdb_io_ops.init) + kgdb_register_for_panic(); +} + +/* + * This function will always be invoked to make sure that KGDB will grab + * what it needs to so that if something happens while the system is + * running, KGDB will get involved. If kgdb_early_entry() has already + * been invoked, there is little we need to do. + */ +static int __init kgdb_late_entry(void) +{ + int need_break = 0; + + /* If kgdb_initialized is -1 then we were passed kgdbwait. */ + if (kgdb_initialized == -1) + need_break = 1; + + /* + * If we haven't tried to initialize KGDB yet, we need to call + * kgdb_arch_init before moving onto the I/O. + */ + if (!kgdb_initialized) + kgdb_arch_init(); + + if (kgdb_initialized != 1) { + if (kgdb_io_ops.init && kgdb_io_ops.init()) { + /* When KGDB allows I/O via modules and the core + * I/O init fails KGDB must default to defering the + * I/O setup, and appropriately print an error about + * it. + */ + printk(KERN_ERR "kgdb: Could not setup core I/O " + "for KGDB.\n"); + printk(KERN_INFO "kgdb: Defering I/O setup to kernel " + "module.\n"); + memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io)); + } + + kgdb_internal_init(); + + /* KGDB can assume that if kgdb_io_ops.init was defined that + * panic registion should be performed at this time. This means + * kgdb_io_ops.init did not come from a kernel module and was + * initialized statically by a built in. + */ + if (kgdb_io_ops.init) + kgdb_register_for_panic(); + } + + /* Registering to reboot notifier list*/ + register_reboot_notifier(&kgdb_reboot_notifier); + + /* Now do any late init of the I/O. */ + if (kgdb_io_ops.late_init) + kgdb_io_ops.late_init(); + + if (need_break) { + printk(KERN_CRIT "kgdb: Waiting for connection from remote" + " gdb...\n"); + breakpoint(); + } + + return 0; +} + +late_initcall(kgdb_late_entry); + +/* + * This function will generate a breakpoint exception. It is used at the + * beginning of a program to sync up with a debugger and can be used + * otherwise as a quick means to stop program execution and "break" into + * the debugger. + */ +void breakpoint(void) +{ + atomic_set(&kgdb_setting_breakpoint, 1); + wmb(); + BREAKPOINT(); + wmb(); + atomic_set(&kgdb_setting_breakpoint, 0); +} + +EXPORT_SYMBOL(breakpoint); + +#ifdef CONFIG_MAGIC_SYSRQ +static void sysrq_handle_gdb(int key, struct tty_struct *tty) +{ + printk("Entering GDB stub\n"); + breakpoint(); +} +static struct sysrq_key_op sysrq_gdb_op = { + .handler = sysrq_handle_gdb, + .help_msg = "Gdb", + .action_msg = "GDB", +}; + +static int gdb_register_sysrq(void) +{ + printk("Registering GDB sysrq handler\n"); + register_sysrq_key('g', &sysrq_gdb_op); + return 0; +} + +module_init(gdb_register_sysrq); +#endif + +static int kgdb_notify_reboot(struct notifier_block *this, + unsigned long code, void *x) +{ + + unsigned long flags; + + /* If we're debugging, or KGDB has not connected, don't try + * and print. */ + if (!kgdb_connected || atomic_read(&debugger_active) != 0) + return 0; + if ((code == SYS_RESTART) || (code == SYS_HALT) || (code == SYS_POWER_OFF)){ + local_irq_save(flags); + put_packet("X00"); + local_irq_restore(flags); + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_KGDB_CONSOLE +void kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + unsigned long flags; + + /* If we're debugging, or KGDB has not connected, don't try + * and print. */ + if (!kgdb_connected || atomic_read(&debugger_active) != 0) + return; + + local_irq_save(flags); + kgdb_msg_write(s, count); + local_irq_restore(flags); +} + +struct console kgdbcons = { + .name = "kgdb", + .write = kgdb_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED, +}; +static int __init kgdb_console_init(void) +{ + register_console(&kgdbcons); + return 0; +} + +console_initcall(kgdb_console_init); +#endif + +static int __init opt_kgdb_enter(char *str) +{ + /* We've already done this by an explicit breakpoint() call. */ + if (kgdb_initialized) + return 0; + + kgdb_early_entry(); + if (kgdb_initialized == 1) + printk(KERN_CRIT "Waiting for connection from remote " + "gdb...\n"); + else { + printk(KERN_CRIT "KGDB cannot initialize I/O yet.\n"); + return 0; + } + + breakpoint(); + + return 0; +} + +early_param("kgdbwait", opt_kgdb_enter); diff -Nurb linux-2.6.22-570/kernel/kmod.c linux-2.6.22-591/kernel/kmod.c --- linux-2.6.22-570/kernel/kmod.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/kmod.c 2007-12-21 15:36:12.000000000 -0500 @@ -119,9 +119,10 @@ char **argv; char **envp; struct key *ring; - int wait; + enum umh_wait wait; int retval; struct file *stdin; + void (*cleanup)(char **argv, char **envp); }; /* @@ -180,6 +181,14 @@ do_exit(0); } +void call_usermodehelper_freeinfo(struct subprocess_info *info) +{ + if (info->cleanup) + (*info->cleanup)(info->argv, info->envp); + kfree(info); +} +EXPORT_SYMBOL(call_usermodehelper_freeinfo); + /* Keventd can't block, but this (a child) can. */ static int wait_for_helper(void *data) { @@ -216,8 +225,8 @@ sub_info->retval = ret; } - if (sub_info->wait < 0) - kfree(sub_info); + if (sub_info->wait == UMH_NO_WAIT) + call_usermodehelper_freeinfo(sub_info); else complete(sub_info->complete); return 0; @@ -229,101 +238,102 @@ struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); pid_t pid; - int wait = sub_info->wait; + enum umh_wait wait = sub_info->wait; /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ - if (wait) + if (wait == UMH_WAIT_PROC) pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); else pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD); - if (wait < 0) - return; - - if (pid < 0) { + switch(wait) { + case UMH_NO_WAIT: + break; + + case UMH_WAIT_PROC: + if (pid > 0) + break; sub_info->retval = pid; + /* FALLTHROUGH */ + + case UMH_WAIT_EXEC: complete(sub_info->complete); - } else if (!wait) - complete(sub_info->complete); + } } /** - * call_usermodehelper_keys - start a usermode application - * @path: pathname for the application - * @argv: null-terminated argument list - * @envp: null-terminated environment list - * @session_keyring: session keyring for process (NULL for an empty keyring) - * @wait: wait for the application to finish and return status. - * when -1 don't wait at all, but you get no useful error back when - * the program couldn't be exec'ed. This makes it safe to call - * from interrupt context. + * call_usermodehelper_setup - prepare to call a usermode helper + * @path - path to usermode executable + * @argv - arg vector for process + * @envp - environment for process * - * Runs a user-space application. The application is started - * asynchronously if wait is not set, and runs as a child of keventd. - * (ie. it runs with full root capabilities). - * - * Must be called from process context. Returns a negative error code - * if program was not execed successfully, or 0. + * Returns either NULL on allocation failure, or a subprocess_info + * structure. This should be passed to call_usermodehelper_exec to + * exec the process and free the structure. */ -int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, int wait) +struct subprocess_info *call_usermodehelper_setup(char *path, + char **argv, char **envp) { - DECLARE_COMPLETION_ONSTACK(done); struct subprocess_info *sub_info; - int retval; - - if (!khelper_wq) - return -EBUSY; - - if (path[0] == '\0') - return 0; - sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); if (!sub_info) - return -ENOMEM; + goto out; INIT_WORK(&sub_info->work, __call_usermodehelper); - sub_info->complete = &done; sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; - sub_info->ring = session_keyring; - sub_info->wait = wait; - queue_work(khelper_wq, &sub_info->work); - if (wait < 0) /* task has freed sub_info */ - return 0; - wait_for_completion(&done); - retval = sub_info->retval; - kfree(sub_info); - return retval; + out: + return sub_info; } -EXPORT_SYMBOL(call_usermodehelper_keys); +EXPORT_SYMBOL(call_usermodehelper_setup); -int call_usermodehelper_pipe(char *path, char **argv, char **envp, - struct file **filp) +/** + * call_usermodehelper_setkeys - set the session keys for usermode helper + * @info: a subprocess_info returned by call_usermodehelper_setup + * @session_keyring: the session keyring for the process + */ +void call_usermodehelper_setkeys(struct subprocess_info *info, + struct key *session_keyring) { - DECLARE_COMPLETION(done); - struct subprocess_info sub_info = { - .work = __WORK_INITIALIZER(sub_info.work, - __call_usermodehelper), - .complete = &done, - .path = path, - .argv = argv, - .envp = envp, - .retval = 0, - }; - struct file *f; + info->ring = session_keyring; +} +EXPORT_SYMBOL(call_usermodehelper_setkeys); - if (!khelper_wq) - return -EBUSY; +/** + * call_usermodehelper_setcleanup - set a cleanup function + * @info: a subprocess_info returned by call_usermodehelper_setup + * @cleanup: a cleanup function + * + * The cleanup function is just befor ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. + */ +void call_usermodehelper_setcleanup(struct subprocess_info *info, + void (*cleanup)(char **argv, char **envp)) +{ + info->cleanup = cleanup; +} +EXPORT_SYMBOL(call_usermodehelper_setcleanup); - if (path[0] == '\0') - return 0; +/** + * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin + * @sub_info: a subprocess_info returned by call_usermodehelper_setup + * @filp: set to the write-end of a pipe + * + * This constructs a pipe, and sets the read end to be the stdin of the + * subprocess, and returns the write-end in *@filp. + */ +int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, + struct file **filp) +{ + struct file *f; f = create_write_pipe(); if (IS_ERR(f)) @@ -335,11 +345,85 @@ free_write_pipe(*filp); return PTR_ERR(f); } - sub_info.stdin = f; + sub_info->stdin = f; + + return 0; +} +EXPORT_SYMBOL(call_usermodehelper_stdinpipe); - queue_work(khelper_wq, &sub_info.work); +/** + * call_usermodehelper_exec - start a usermode application + * @sub_info: information about the subprocessa + * @wait: wait for the application to finish and return status. + * when -1 don't wait at all, but you get no useful error back when + * the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * Runs a user-space application. The application is started + * asynchronously if wait is not set, and runs as a child of keventd. + * (ie. it runs with full root capabilities). + */ +int call_usermodehelper_exec(struct subprocess_info *sub_info, + enum umh_wait wait) +{ + DECLARE_COMPLETION_ONSTACK(done); + int retval; + + if (sub_info->path[0] == '\0') { + retval = 0; + goto out; + } + + if (!khelper_wq) { + retval = -EBUSY; + goto out; + } + + sub_info->complete = &done; + sub_info->wait = wait; + + queue_work(khelper_wq, &sub_info->work); + if (wait == UMH_NO_WAIT) /* task has freed sub_info */ + return 0; wait_for_completion(&done); - return sub_info.retval; + retval = sub_info->retval; + + out: + call_usermodehelper_freeinfo(sub_info); + return retval; +} +EXPORT_SYMBOL(call_usermodehelper_exec); + +/** + * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @filp: set to the write-end of a pipe + * + * This is a simple wrapper which executes a usermode-helper function + * with a pipe as stdin. It is implemented entirely in terms of + * lower-level call_usermodehelper_* functions. + */ +int call_usermodehelper_pipe(char *path, char **argv, char **envp, + struct file **filp) +{ + struct subprocess_info *sub_info; + int ret; + + sub_info = call_usermodehelper_setup(path, argv, envp); + if (sub_info == NULL) + return -ENOMEM; + + ret = call_usermodehelper_stdinpipe(sub_info, filp); + if (ret < 0) + goto out; + + return call_usermodehelper_exec(sub_info, 1); + + out: + call_usermodehelper_freeinfo(sub_info); + return ret; } EXPORT_SYMBOL(call_usermodehelper_pipe); diff -Nurb linux-2.6.22-570/kernel/module.c linux-2.6.22-591/kernel/module.c --- linux-2.6.22-570/kernel/module.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/module.c 2007-12-21 15:36:12.000000000 -0500 @@ -67,6 +67,7 @@ /* List of modules, protected by module_mutex AND modlist_lock */ static DEFINE_MUTEX(module_mutex); static LIST_HEAD(modules); +static DECLARE_MUTEX(notify_mutex); static BLOCKING_NOTIFIER_HEAD(module_notify_list); @@ -488,8 +489,7 @@ mod->field = NULL; \ } \ static struct module_attribute modinfo_##field = { \ - .attr = { .name = __stringify(field), .mode = 0444, \ - .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(field), .mode = 0444 }, \ .show = show_modinfo_##field, \ .setup = setup_modinfo_##field, \ .test = modinfo_##field##_exists, \ @@ -713,6 +713,12 @@ if (ret != 0) goto out; + down(¬ify_mutex); + blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, + mod); + up(¬ify_mutex); + + /* Never wait if forced. */ if (!forced && module_refcount(mod) != 0) wait_for_zero_refcount(mod); @@ -725,6 +731,11 @@ } free_module(mod); + down(¬ify_mutex); + blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GONE, + NULL); + up(¬ify_mutex); + out: mutex_unlock(&module_mutex); return ret; @@ -793,7 +804,7 @@ } static struct module_attribute refcnt = { - .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, + .attr = { .name = "refcnt", .mode = 0444 }, .show = show_refcnt, }; @@ -846,12 +857,15 @@ case MODULE_STATE_GOING: state = "going"; break; + case MODULE_STATE_GONE: + state = "gone"; + break; } return sprintf(buffer, "%s\n", state); } static struct module_attribute initstate = { - .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, + .attr = { .name = "initstate", .mode = 0444 }, .show = show_initstate, }; @@ -1032,7 +1046,6 @@ sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; - sattr->mattr.attr.owner = mod; sattr->mattr.attr.mode = S_IRUGO; *(gattr++) = &(sattr++)->mattr.attr; } @@ -1090,7 +1103,6 @@ if (!attr->test || (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); - temp_attr->attr.owner = mod; error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); ++temp_attr; } @@ -1212,6 +1224,11 @@ /* Arch-specific cleanup. */ module_arch_cleanup(mod); +#ifdef CONFIG_KGDB + /* kgdb info */ + vfree(mod->mod_sections); +#endif + /* Module unload stuff */ module_unload_free(mod); @@ -1471,6 +1488,31 @@ } } +#ifdef CONFIG_KGDB +int add_modsects (struct module *mod, Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const + char *secstrings) +{ + int i; + + mod->num_sections = hdr->e_shnum - 1; + mod->mod_sections = vmalloc((hdr->e_shnum - 1)* + sizeof (struct mod_section)); + + if (mod->mod_sections == NULL) { + return -ENOMEM; + } + + for (i = 1; i < hdr->e_shnum; i++) { + mod->mod_sections[i - 1].address = (void *)sechdrs[i].sh_addr; + strncpy(mod->mod_sections[i - 1].name, secstrings + + sechdrs[i].sh_name, MAX_SECTNAME); + mod->mod_sections[i - 1].name[MAX_SECTNAME] = '\0'; + } + + return 0; +} +#endif + #ifdef CONFIG_KALLSYMS static int is_exported(const char *name, const struct module *mod) { @@ -1886,6 +1928,12 @@ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); +#ifdef CONFIG_KGDB + if ((err = add_modsects(mod, hdr, sechdrs, secstrings)) < 0) { + goto nomodsectinfo; + } +#endif + err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -1946,6 +1994,11 @@ arch_cleanup: module_arch_cleanup(mod); cleanup: + +#ifdef CONFIG_KGDB +nomodsectinfo: + vfree(mod->mod_sections); +#endif module_unload_free(mod); module_free(mod, mod->module_init); free_core: @@ -2017,6 +2070,10 @@ /* Init routine failed: abort. Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; + down(¬ify_mutex); + blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, + mod); + up(¬ify_mutex); synchronize_sched(); if (mod->unsafe) printk(KERN_ERR "%s: module is now stuck!\n", diff -Nurb linux-2.6.22-570/kernel/ns_container.c linux-2.6.22-591/kernel/ns_container.c --- linux-2.6.22-570/kernel/ns_container.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/ns_container.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,99 @@ +/* + * ns_container.c - namespace container subsystem + * + * Copyright 2006, 2007 IBM Corp + */ + +#include +#include +#include + +struct ns_container { + struct container_subsys_state css; + spinlock_t lock; +}; + +struct container_subsys ns_subsys; + +static inline struct ns_container *container_to_ns( + struct container *container) +{ + return container_of(container_subsys_state(container, ns_subsys_id), + struct ns_container, css); +} + +int ns_container_clone(struct task_struct *task) +{ + return container_clone(task, &ns_subsys); +} + +/* + * Rules: + * 1. you can only enter a container which is a child of your current + * container + * 2. you can only place another process into a container if + * a. you have CAP_SYS_ADMIN + * b. your container is an ancestor of task's destination container + * (hence either you are in the same container as task, or in an + * ancestor container thereof) + */ +static int ns_can_attach(struct container_subsys *ss, + struct container *new_container, struct task_struct *task) +{ + struct container *orig; + + if (current != task) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!container_is_descendant(new_container)) + return -EPERM; + } + + if (atomic_read(&new_container->count) != 0) + return -EPERM; + + orig = task_container(task, ns_subsys_id); + if (orig && orig != new_container->parent) + return -EPERM; + + return 0; +} + +/* + * Rules: you can only create a container if + * 1. you are capable(CAP_SYS_ADMIN) + * 2. the target container is a descendant of your own container + */ +static int ns_create(struct container_subsys *ss, struct container *container) +{ + struct ns_container *ns_container; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!container_is_descendant(container)) + return -EPERM; + + ns_container = kzalloc(sizeof(*ns_container), GFP_KERNEL); + if (!ns_container) return -ENOMEM; + spin_lock_init(&ns_container->lock); + container->subsys[ns_subsys.subsys_id] = &ns_container->css; + return 0; +} + +static void ns_destroy(struct container_subsys *ss, + struct container *container) +{ + struct ns_container *ns_container; + + ns_container = container_to_ns(container); + kfree(ns_container); +} + +struct container_subsys ns_subsys = { + .name = "ns", + .can_attach = ns_can_attach, + .create = ns_create, + .destroy = ns_destroy, + .subsys_id = ns_subsys_id, +}; diff -Nurb linux-2.6.22-570/kernel/nsproxy.c linux-2.6.22-591/kernel/nsproxy.c --- linux-2.6.22-570/kernel/nsproxy.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/nsproxy.c 2007-12-21 15:36:15.000000000 -0500 @@ -19,10 +19,13 @@ #include #include #include +#include #include #include #include +static struct kmem_cache *nsproxy_cachep; + struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); void get_task_namespaces(struct task_struct *tsk) @@ -58,6 +61,7 @@ struct fs_struct *new_fs) { struct nsproxy *new_nsp; + int err = -ENOMEM; vxdprintk(VXD_CBIT(space, 4), "unshare_namespaces(0x%08x,%p,%p)", @@ -83,8 +87,24 @@ if (IS_ERR(new_nsp->pid_ns)) goto out_pid; + new_nsp->user_ns = copy_user_ns(flags, orig->user_ns); + if (IS_ERR(new_nsp->user_ns)) + goto out_user; + + new_nsp->net_ns = copy_net_ns(flags, orig->net_ns); + if (IS_ERR(new_nsp->net_ns)) + goto out_net; + return new_nsp; +out_net: + if (new_nsp->user_ns) + put_user_ns(new_nsp->user_ns); + if (new_nsp->net_ns) + put_net(new_nsp->net_ns); +out_user: + if (new_nsp->pid_ns) + put_pid_ns(new_nsp->pid_ns); out_pid: if (new_nsp->ipc_ns) put_ipc_ns(new_nsp->ipc_ns); @@ -95,11 +115,11 @@ if (new_nsp->mnt_ns) put_mnt_ns(new_nsp->mnt_ns); out_ns: - kfree(new_nsp); - return ERR_PTR(-ENOMEM); + kmem_cache_free(nsproxy_cachep, new_nsp); + return ERR_PTR(err); } -static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, +static struct nsproxy *create_new_namespaces(unsigned long flags, struct task_struct *tsk, struct fs_struct *new_fs) { return unshare_namespaces(flags, tsk->nsproxy, new_fs); @@ -130,7 +150,7 @@ * called from clone. This now handles copy for nsproxy and all * namespaces therein. */ -int copy_namespaces(int flags, struct task_struct *tsk) +int copy_namespaces(unsigned long flags, struct task_struct *tsk) { struct nsproxy *old_ns = tsk->nsproxy; struct nsproxy *new_ns = NULL; @@ -143,10 +163,17 @@ return 0; get_nsproxy(old_ns); + return 0; - if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) + if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) return 0; + #ifndef CONFIG_NET_NS + if (unshare_flags & CLONE_NEWNET) + return -EINVAL; + #endif + + if (!capable(CAP_SYS_ADMIN)) { err = -EPERM; goto out; @@ -158,7 +185,14 @@ goto out; } + err = ns_container_clone(tsk); + if (err) { + put_nsproxy(new_ns); + goto out; + } + tsk->nsproxy = new_ns; + out: put_nsproxy(old_ns); vxdprintk(VXD_CBIT(space, 3), @@ -194,25 +228,37 @@ "unshare_nsproxy_namespaces(0x%08lx,[%p])", unshare_flags, current->nsproxy); - if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) + if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | + CLONE_NEWUSER | CLONE_NEWNET))) return 0; -#ifndef CONFIG_IPC_NS - if (unshare_flags & CLONE_NEWIPC) +#ifndef CONFIG_NET_NS + if (unshare_flags & CLONE_NEWNET) return -EINVAL; #endif - -#ifndef CONFIG_UTS_NS - if (unshare_flags & CLONE_NEWUTS) - return -EINVAL; -#endif - if (!capable(CAP_SYS_ADMIN)) return -EPERM; *new_nsp = create_new_namespaces(unshare_flags, current, new_fs ? new_fs : current->fs); - if (IS_ERR(*new_nsp)) + if (IS_ERR(*new_nsp)) { err = PTR_ERR(*new_nsp); + goto out; + } + + err = ns_container_clone(current); + if (err) + put_nsproxy(*new_nsp); + +out: return err; } + +static int __init nsproxy_cache_init(void) +{ + nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), + 0, SLAB_PANIC, NULL, NULL); + return 0; +} + +module_init(nsproxy_cache_init); diff -Nurb linux-2.6.22-570/kernel/params.c linux-2.6.22-591/kernel/params.c --- linux-2.6.22-570/kernel/params.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/kernel/params.c 2007-12-21 15:36:12.000000000 -0500 @@ -491,7 +491,6 @@ pattr->mattr.show = param_attr_show; pattr->mattr.store = param_attr_store; pattr->mattr.attr.name = (char *)&kp->name[name_skip]; - pattr->mattr.attr.owner = mk->mod; pattr->mattr.attr.mode = kp->perm; *(gattr++) = &(pattr++)->mattr.attr; } diff -Nurb linux-2.6.22-570/kernel/pid.c linux-2.6.22-591/kernel/pid.c --- linux-2.6.22-570/kernel/pid.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/pid.c 2007-12-21 15:36:12.000000000 -0500 @@ -379,7 +379,7 @@ } EXPORT_SYMBOL_GPL(find_get_pid); -struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) +struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) { BUG_ON(!old_ns); get_pid_ns(old_ns); diff -Nurb linux-2.6.22-570/kernel/ptrace.c linux-2.6.22-591/kernel/ptrace.c --- linux-2.6.22-570/kernel/ptrace.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/ptrace.c 2007-12-21 15:36:12.000000000 -0500 @@ -143,7 +143,7 @@ return -EPERM; smp_rmb(); if (task->mm) - dumpable = task->mm->dumpable; + dumpable = get_dumpable(task->mm); if (!dumpable && !capable(CAP_SYS_PTRACE)) return -EPERM; if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT)) diff -Nurb linux-2.6.22-570/kernel/rcutorture.c linux-2.6.22-591/kernel/rcutorture.c --- linux-2.6.22-570/kernel/rcutorture.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/rcutorture.c 2007-12-21 15:36:12.000000000 -0500 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -518,7 +519,6 @@ VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; do { schedule_timeout_uninterruptible(1); @@ -558,7 +558,6 @@ VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; do { schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); @@ -589,7 +588,6 @@ VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; do { idx = cur_ops->readlock(); diff -Nurb linux-2.6.22-570/kernel/rtmutex-tester.c linux-2.6.22-591/kernel/rtmutex-tester.c --- linux-2.6.22-570/kernel/rtmutex-tester.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/rtmutex-tester.c 2007-12-21 15:36:12.000000000 -0500 @@ -260,6 +260,7 @@ int ret; current->flags |= PF_MUTEX_TESTER; + set_freezable(); allow_signal(SIGHUP); for(;;) { diff -Nurb linux-2.6.22-570/kernel/sched.c linux-2.6.22-591/kernel/sched.c --- linux-2.6.22-570/kernel/sched.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/sched.c 2007-12-21 15:36:12.000000000 -0500 @@ -51,8 +51,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -3399,9 +3401,16 @@ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; struct vx_info *vxi = p->vx_info; /* p is _always_ current */ cputime64_t tmp; + struct rq *rq = this_rq(); int nice = (TASK_NICE(p) > 0); p->utime = cputime_add(p->utime, cputime); + + + if (p != rq->idle) + cpuacct_charge(p, cputime); + + vx_account_user(vxi, cputime, nice); /* Add user time to cpustat. */ @@ -3435,9 +3444,10 @@ cpustat->irq = cputime64_add(cpustat->irq, tmp); else if (softirq_count()) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); - else if (p != rq->idle) + else if (p != rq->idle) { cpustat->system = cputime64_add(cpustat->system, tmp); - else if (atomic_read(&rq->nr_iowait) > 0) + cpuacct_charge(p, cputime); + } else if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); @@ -3462,8 +3472,10 @@ cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); - } else + } else { cpustat->steal = cputime64_add(cpustat->steal, tmp); + cpuacct_charge(p, -tmp); + } } static void task_running_tick(struct rq *rq, struct task_struct *p, int cpu) @@ -5287,8 +5299,6 @@ struct migration_req *req; struct list_head *head; - try_to_freeze(); - spin_lock_irq(&rq->lock); if (cpu_is_offline(cpu)) { @@ -5522,7 +5532,6 @@ p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); if (IS_ERR(p)) return NOTIFY_BAD; - p->flags |= PF_NOFREEZE; kthread_bind(p, cpu); /* Must be high prio: stop_machine expects to yield to it. */ rq = task_rq_lock(p, &flags); @@ -6926,33 +6935,6 @@ arch_destroy_sched_domains(cpu_map); } -/* - * Partition sched domains as specified by the cpumasks below. - * This attaches all cpus from the cpumasks to the NULL domain, - * waits for a RCU quiescent period, recalculates sched - * domain information and then attaches them back to the - * correct sched domains - * Call with hotplug lock held - */ -int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) -{ - cpumask_t change_map; - int err = 0; - - cpus_and(*partition1, *partition1, cpu_online_map); - cpus_and(*partition2, *partition2, cpu_online_map); - cpus_or(change_map, *partition1, *partition2); - - /* Detach sched domains from all of the affected cpus */ - detach_destroy_domains(&change_map); - if (!cpus_empty(*partition1)) - err = build_sched_domains(partition1); - if (!err && !cpus_empty(*partition2)) - err = build_sched_domains(partition2); - - return err; -} - #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) int arch_reinit_sched_domains(void) { @@ -7177,6 +7159,9 @@ #ifdef in_atomic static unsigned long prev_jiffy; /* ratelimiting */ + if (atomic_read(&debugger_active)) + return; + if ((in_atomic() || irqs_disabled()) && system_state == SYSTEM_RUNNING && !oops_in_progress) { if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) diff -Nurb linux-2.6.22-570/kernel/seccomp.c linux-2.6.22-591/kernel/seccomp.c --- linux-2.6.22-570/kernel/seccomp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/seccomp.c 2007-12-21 15:36:12.000000000 -0500 @@ -10,6 +10,7 @@ #include /* #define SECCOMP_DEBUG 1 */ +#define NR_SECCOMP_MODES 1 /* * Secure computing mode 1 allows only read/write/exit/sigreturn. @@ -54,3 +55,28 @@ #endif do_exit(SIGKILL); } + +long prctl_get_seccomp(void) +{ + return current->seccomp.mode; +} + +long prctl_set_seccomp(unsigned long seccomp_mode) +{ + long ret; + + /* can set it only once to be even more secure */ + ret = -EPERM; + if (unlikely(current->seccomp.mode)) + goto out; + + ret = -EINVAL; + if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { + current->seccomp.mode = seccomp_mode; + set_thread_flag(TIF_SECCOMP); + ret = 0; + } + + out: + return ret; +} diff -Nurb linux-2.6.22-570/kernel/signal.c linux-2.6.22-591/kernel/signal.c --- linux-2.6.22-570/kernel/signal.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/signal.c 2007-12-21 15:36:12.000000000 -0500 @@ -257,6 +257,16 @@ } } +int unhandled_signal(struct task_struct *tsk, int sig) +{ + if (is_init(tsk)) + return 1; + if (tsk->ptrace & PT_PTRACED) + return 0; + return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || + (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); +} + /* Notify the system that a driver wants to block all signals for this * process, and wants to be notified if any signals at all were to be diff -Nurb linux-2.6.22-570/kernel/softirq.c linux-2.6.22-591/kernel/softirq.c --- linux-2.6.22-570/kernel/softirq.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/softirq.c 2007-12-21 15:36:12.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -304,11 +305,6 @@ if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ - /* Make sure that timer wheel updates are propagated */ - if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) - tick_nohz_stop_sched_tick(); -#endif preempt_enable_no_resched(); } @@ -490,7 +486,6 @@ static int ksoftirqd(void * __bind_cpu) { set_user_nice(current, 19); - current->flags |= PF_NOFREEZE; set_current_state(TASK_INTERRUPTIBLE); diff -Nurb linux-2.6.22-570/kernel/softlockup.c linux-2.6.22-591/kernel/softlockup.c --- linux-2.6.22-570/kernel/softlockup.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/softlockup.c 2007-12-21 15:36:12.000000000 -0500 @@ -10,9 +10,11 @@ #include #include #include +#include #include #include #include +#include static DEFINE_SPINLOCK(print_lock); @@ -47,6 +49,9 @@ void touch_softlockup_watchdog(void) { __raw_get_cpu_var(touch_timestamp) = get_timestamp(); +#ifdef CONFIG_KGDB + atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 0); +#endif } EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -116,7 +121,6 @@ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; sched_setscheduler(current, SCHED_FIFO, ¶m); - current->flags |= PF_NOFREEZE; /* initialize timestamp */ touch_softlockup_watchdog(); diff -Nurb linux-2.6.22-570/kernel/sys.c linux-2.6.22-591/kernel/sys.c --- linux-2.6.22-570/kernel/sys.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/sys.c 2007-12-21 15:36:12.000000000 -0500 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -1043,7 +1044,7 @@ return -EPERM; } if (new_egid != old_egid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } if (rgid != (gid_t) -1 || @@ -1073,13 +1074,13 @@ if (capable(CAP_SETGID)) { if (old_egid != gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->gid = current->egid = current->sgid = current->fsgid = gid; } else if ((gid == current->gid) || (gid == current->sgid)) { if (old_egid != gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->egid = current->fsgid = gid; @@ -1110,7 +1111,7 @@ switch_uid(new_user); if (dumpclear) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->uid = new_ruid; @@ -1166,7 +1167,7 @@ return -EAGAIN; if (new_euid != old_euid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = current->euid = new_euid; @@ -1216,7 +1217,7 @@ return -EPERM; if (old_euid != uid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = current->euid = uid; @@ -1261,7 +1262,7 @@ } if (euid != (uid_t) -1) { if (euid != current->euid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->euid = euid; @@ -1311,7 +1312,7 @@ } if (egid != (gid_t) -1) { if (egid != current->egid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->egid = egid; @@ -1357,7 +1358,7 @@ uid == current->suid || uid == current->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = uid; @@ -1386,7 +1387,7 @@ gid == current->sgid || gid == current->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsgid = gid; @@ -2185,14 +2186,14 @@ error = put_user(current->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = current->mm->dumpable; + error = get_dumpable(current->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - current->mm->dumpable = arg2; + set_dumpable(current->mm, arg2); break; case PR_SET_UNALIGN: @@ -2261,6 +2262,13 @@ error = SET_ENDIAN(current, arg2); break; + case PR_GET_SECCOMP: + error = prctl_get_seccomp(); + break; + case PR_SET_SECCOMP: + error = prctl_set_seccomp(arg2); + break; + default: error = -EINVAL; break; @@ -2297,3 +2305,61 @@ } return err ? -EFAULT : 0; } + +char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; + +static void argv_cleanup(char **argv, char **envp) +{ + argv_free(argv); +} + +/** + * Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + int argc; + char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); + static char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL + }; + int ret = -ENOMEM; + struct subprocess_info *info; + + if (argv == NULL) { + printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", + __func__, poweroff_cmd); + goto out; + } + + info = call_usermodehelper_setup(argv[0], argv, envp); + if (info == NULL) { + argv_free(argv); + goto out; + } + + call_usermodehelper_setcleanup(info, argv_cleanup); + + ret = call_usermodehelper_exec(info, UMH_NO_WAIT); + + out: + if (ret && force) { + printk(KERN_WARNING "Failed to start orderly shutdown: " + "forcing the issue\n"); + + /* I guess this should try to kick off some daemon to + sync and poweroff asap. Or not even bother syncing + if we're doing an emergency shutdown? */ + emergency_sync(); + kernel_power_off(); + } + + return ret; +} +EXPORT_SYMBOL_GPL(orderly_poweroff); diff -Nurb linux-2.6.22-570/kernel/sysctl.c linux-2.6.22-591/kernel/sysctl.c --- linux-2.6.22-570/kernel/sysctl.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/sysctl.c 2007-12-21 15:36:15.000000000 -0500 @@ -45,13 +45,13 @@ #include #include #include +#include +#include +#include #include #include -extern int proc_nr_files(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos); - #ifdef CONFIG_X86 #include #include @@ -140,6 +140,10 @@ void __user *buffer, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_NET +static void sysctl_net_init(struct net *net); +#endif + static ctl_table root_table[]; static struct ctl_table_header root_table_header = { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; @@ -203,7 +207,10 @@ .mode = 0555, .child = dev_table, }, - +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; @@ -217,6 +224,15 @@ .proc_handler = &proc_dointvec, }, { + .ctl_name = KERN_POWEROFF_CMD, + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, + { .ctl_name = KERN_CORE_USES_PID, .procname = "core_uses_pid", .data = &core_uses_pid, @@ -625,7 +641,20 @@ .proc_handler = &proc_dointvec, }, #endif - +#ifdef CONFIG_SECURITY + { + .ctl_name = CTL_UNNUMBERED, + .procname = "mmap_min_addr", + .data = &mmap_min_addr, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, +#endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; @@ -744,6 +773,14 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = VM_HUGETLB_TREAT_MOVABLE, + .procname = "hugepages_treat_as_movable", + .data = &hugepages_treat_as_movable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &hugetlb_treat_movable_handler, + }, #endif { .ctl_name = VM_LOWMEM_RESERVE_RATIO, @@ -892,6 +929,10 @@ .extra1 = &zero, }, #endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; @@ -1032,10 +1073,28 @@ .child = binfmt_misc_table, }, #endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; static ctl_table debug_table[] = { +#ifdef CONFIG_X86 + { + .ctl_name = DEBUG_UNHANDLED_SIGNALS, + .procname = "show-unhandled-signals", + .data = &show_unhandled_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; @@ -1097,6 +1156,11 @@ { struct ctl_table_header *head; struct list_head *tmp; + struct net *net = current->nsproxy->net_ns; + + if (!net->net_table_header.ctl_table) + sysctl_net_init(net); + spin_lock(&sysctl_lock); if (prev) { tmp = &prev->ctl_entry; @@ -1114,6 +1178,10 @@ next: tmp = tmp->next; if (tmp == &root_table_header.ctl_entry) +#ifdef CONFIG_NET + tmp = &net->net_table_header.ctl_entry; + else if (tmp == &net->net_table_header.ctl_entry) +#endif break; } spin_unlock(&sysctl_lock); @@ -1229,7 +1297,6 @@ void __user *newval, size_t newlen) { int op = 0, rc; - size_t len; if (oldval) op |= 004; @@ -1250,25 +1317,10 @@ /* If there is no strategy routine, or if the strategy returns * zero, proceed with automatic r/w */ if (table->data && table->maxlen) { - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > table->maxlen) - len = table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - } + rc = sysctl_data(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; } return 0; } @@ -1359,7 +1411,8 @@ * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *register_sysctl_table(ctl_table * table) +static struct ctl_table_header *__register_sysctl_table( + struct ctl_table_header *root, ctl_table * table) { struct ctl_table_header *tmp; tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); @@ -1371,11 +1424,16 @@ tmp->unregistering = NULL; sysctl_set_parent(NULL, table); spin_lock(&sysctl_lock); - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + list_add_tail(&tmp->ctl_entry, &root->ctl_entry); spin_unlock(&sysctl_lock); return tmp; } +struct ctl_table_header *register_sysctl_table(ctl_table *table) +{ + return __register_sysctl_table(&root_table_header, table); +} + /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl_table @@ -1392,6 +1450,92 @@ kfree(header); } +#ifdef CONFIG_NET + +static void *fixup_table_addr(void *addr, + const char *start, size_t size, const char *new) +{ + char *ptr = addr; + if ((ptr >= start) && (ptr < (start + size))) + ptr += new - start; + return ptr; +} + +static void table_fixup(struct ctl_table *table, + const void *start, size_t size, const void *new) +{ + for (; table->ctl_name || table->procname; table++) { + table->data = fixup_table_addr(table->data, start, size, new); + table->extra1 = fixup_table_addr(table->extra1, start, size, new); + table->extra2 = fixup_table_addr(table->extra2, start, size, new); + + /* Whee recursive functions on the kernel stack */ + if (table->child) + table_fixup(table->child, start, size, new); + } +} + +static unsigned count_table_entries(struct ctl_table *table) +{ + unsigned entries = 0; + for (; table->ctl_name || table->procname; table++) { + entries += 1; + + if (table->child) + entries += count_table_entries(table->child); + } + entries += 1; /* Null terminating entry */ + return entries; +} + +static struct ctl_table *copy_table_entries( + struct ctl_table *dest, struct ctl_table *src) +{ + struct ctl_table *table = dest; + for (; src->ctl_name || src->procname; src++) { + *dest++ = *table; + } + dest++; /* Null terminating entry */ + for (; table->ctl_name || table->procname; table++) { + if (table->child) + dest = copy_table_entries(dest, table->child); + } + return dest; +} + +static void sysctl_net_init(struct net *net) +{ + unsigned entries; + struct ctl_table *table; + + entries = count_table_entries(net_root_table); + table = kzalloc(GFP_KERNEL, sizeof(*table)*entries); + /* FIXME free table... */ + + copy_table_entries(table, net_root_table); + table_fixup(table, &init_net, sizeof(init_net), net); + + net->net_table_header.ctl_table = table; + INIT_LIST_HEAD(&net->net_table_header.ctl_entry); +} + +struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table) +{ + if (!net->net_table_header.ctl_table) + sysctl_net_init(net); + table_fixup(table, &init_net, sizeof(init_net), net); + return __register_sysctl_table(&net->net_table_header, table); +} +EXPORT_SYMBOL_GPL(register_net_sysctl_table); + +void unregister_net_sysctl_table(struct ctl_table_header *header) +{ + return unregister_sysctl_table(header); +} +EXPORT_SYMBOL_GPL(unregister_net_sysctl_table); +#endif + + #else /* !CONFIG_SYSCTL */ struct ctl_table_header *register_sysctl_table(ctl_table * table) { @@ -2167,6 +2311,40 @@ * General sysctl support routines */ +/* The generic sysctl data routine (used if no strategy routine supplied) */ +int sysctl_data(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + size_t len; + + /* Get out of I don't have a variable */ + if (!table->data || !table->maxlen) + return -ENOTDIR; + + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, table->data, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } + + if (newval && newlen) { + if (newlen > table->maxlen) + newlen = table->maxlen; + + if (copy_from_user(table->data, newval, newlen)) + return -EFAULT; + } + return 1; +} + /* The generic string strategy routine: */ int sysctl_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, @@ -2355,6 +2533,13 @@ return -ENOSYS; } +int sysctl_data(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + return -ENOSYS; +} + int sysctl_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -2402,4 +2587,5 @@ EXPORT_SYMBOL(sysctl_jiffies); EXPORT_SYMBOL(sysctl_ms_jiffies); EXPORT_SYMBOL(sysctl_string); +EXPORT_SYMBOL(sysctl_data); EXPORT_SYMBOL(unregister_sysctl_table); diff -Nurb linux-2.6.22-570/kernel/taskstats.c linux-2.6.22-591/kernel/taskstats.c --- linux-2.6.22-570/kernel/taskstats.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/taskstats.c 2007-12-21 15:36:12.000000000 -0500 @@ -196,6 +196,8 @@ /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; + stats->nvcsw = tsk->nvcsw; + stats->nivcsw = tsk->nivcsw; bacct_add_tsk(stats, tsk); /* fill in extended acct fields */ @@ -242,6 +244,8 @@ */ delayacct_add_tsk(stats, tsk); + stats->nvcsw += tsk->nvcsw; + stats->nivcsw += tsk->nivcsw; } while_each_thread(first, tsk); unlock_task_sighand(first, &flags); diff -Nurb linux-2.6.22-570/kernel/time/tick-sched.c linux-2.6.22-591/kernel/time/tick-sched.c --- linux-2.6.22-570/kernel/time/tick-sched.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/time/tick-sched.c 2007-12-21 15:36:12.000000000 -0500 @@ -153,6 +153,7 @@ unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; struct tick_sched *ts; ktime_t last_update, expires, now, delta; + struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; int cpu; local_irq_save(flags); @@ -290,11 +291,34 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; + ts->sleep_length = ktime_sub(dev->next_event, now); end: local_irq_restore(flags); } /** + * tick_nohz_get_sleep_length - return the length of the current sleep + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_sleep_length(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + return ts->sleep_length; +} + +/** + * tick_nohz_get_idle_jiffies - returns the current idle jiffie count + */ +unsigned long tick_nohz_get_idle_jiffies(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + return ts->idle_jiffies; +} + +/** * nohz_restart_sched_tick - restart the idle tick from the idle task * * Restart the idle tick when the CPU is woken up from idle diff -Nurb linux-2.6.22-570/kernel/time/timekeeping.c linux-2.6.22-591/kernel/time/timekeeping.c --- linux-2.6.22-570/kernel/time/timekeeping.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/time/timekeeping.c 2007-12-21 15:36:15.000000000 -0500 @@ -39,7 +39,7 @@ */ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); - +static unsigned long total_sleep_time; EXPORT_SYMBOL(xtime); @@ -251,6 +251,7 @@ xtime.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); + total_sleep_time = 0; write_sequnlock_irqrestore(&xtime_lock, flags); } @@ -282,6 +283,7 @@ xtime.tv_sec += sleep_length; wall_to_monotonic.tv_sec -= sleep_length; + total_sleep_time += sleep_length; } /* re-base the last cycle value */ clock->cycle_last = clocksource_read(clock); @@ -476,3 +478,34 @@ change_clocksource(); update_vsyscall(&xtime, clock); } + +/** + * getboottime - Return the real time of system boot. + * @ts: pointer to the timespec to be set + * + * Returns the time of day in a timespec. + * + * This is based on the wall_to_monotonic offset and the total suspend + * time. Calls to settimeofday will affect the value returned (which + * basically means that however wrong your real time clock is at boot time, + * you get the right time here). + */ +void getboottime(struct timespec *ts) +{ + set_normalized_timespec(ts, + - (wall_to_monotonic.tv_sec + total_sleep_time), + - wall_to_monotonic.tv_nsec); +} + +EXPORT_SYMBOL(getboottime); + +/** + * monotonic_to_bootbased - Convert the monotonic time to boot based. + * @ts: pointer to the timespec to be converted + */ +void monotonic_to_bootbased(struct timespec *ts) +{ + ts->tv_sec += total_sleep_time; +} + +EXPORT_SYMBOL(monotonic_to_bootbased); diff -Nurb linux-2.6.22-570/kernel/timer.c linux-2.6.22-591/kernel/timer.c --- linux-2.6.22-570/kernel/timer.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/timer.c 2007-12-21 15:36:15.000000000 -0500 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -886,7 +887,11 @@ */ void run_local_timers(void) { + int this_cpu = smp_processor_id(); raise_softirq(TIMER_SOFTIRQ); +#ifdef CONFIG_KGDB + if(!atomic_read(&kgdb_sync_softlockup[this_cpu])) +#endif softlockup_tick(); } @@ -1125,6 +1130,7 @@ getnstimeofday(&tp); tp.tv_sec += wall_to_monotonic.tv_sec; tp.tv_nsec += wall_to_monotonic.tv_nsec; + monotonic_to_bootbased(&tp); if (tp.tv_nsec - NSEC_PER_SEC >= 0) { tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; diff -Nurb linux-2.6.22-570/kernel/unwind.c linux-2.6.22-591/kernel/unwind.c --- linux-2.6.22-570/kernel/unwind.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/unwind.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,1288 @@ +/* + * Copyright (C) 2002-2006 Novell, Inc. + * Jan Beulich + * This code is released under version 2 of the GNU GPL. + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whistles, so there + * is not much point in implementing the full Dwarf2 unwind API. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char __start_unwind[], __end_unwind[]; +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; + +#define MAX_STACK_DEPTH 8 + +#define EXTRA_INFO(f) { \ + BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ + % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + + offsetof(struct unwind_frame_info, f) \ + / FIELD_SIZEOF(struct unwind_frame_info, f), \ + FIELD_SIZEOF(struct unwind_frame_info, f) \ + } +#define PTREGS_INFO(f) EXTRA_INFO(regs.f) + +static const struct { + unsigned offs:BITS_PER_LONG / 2; + unsigned width:BITS_PER_LONG / 2; +} reg_info[] = { + UNW_REGISTER_INFO +}; + +#undef PTREGS_INFO +#undef EXTRA_INFO + +#ifndef REG_INVALID +#define REG_INVALID(r) (reg_info[r].width == 0) +#endif + +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_GNU_window_save 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CFA_hi_user 0x3f + +#define DW_EH_PE_FORM 0x07 +#define DW_EH_PE_native 0x00 +#define DW_EH_PE_leb128 0x01 +#define DW_EH_PE_data2 0x02 +#define DW_EH_PE_data4 0x03 +#define DW_EH_PE_data8 0x04 +#define DW_EH_PE_signed 0x08 +#define DW_EH_PE_ADJUST 0x70 +#define DW_EH_PE_abs 0x00 +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 +#define DW_EH_PE_omit 0xff + +typedef unsigned long uleb128_t; +typedef signed long sleb128_t; +#define sleb128abs __builtin_labs + +static struct unwind_table { + struct { + unsigned long pc; + unsigned long range; + } core, init; + const void *address; + unsigned long size; + const unsigned char *header; + unsigned long hdrsz; + struct unwind_table *link; + const char *name; +} root_table; + +struct unwind_item { + enum item_location { + Nowhere, + Memory, + Register, + Value + } where; + uleb128_t value; +}; + +struct unwind_state { + uleb128_t loc, org; + const u8 *cieStart, *cieEnd; + uleb128_t codeAlign; + sleb128_t dataAlign; + struct cfa { + uleb128_t reg, offs; + } cfa; + struct unwind_item regs[ARRAY_SIZE(reg_info)]; + unsigned stackDepth:8; + unsigned version:8; + const u8 *label; + const u8 *stack[MAX_STACK_DEPTH]; +}; + +static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; + +static unsigned unwind_debug; +static int __init unwind_debug_setup(char *s) +{ + unwind_debug = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("unwind_debug=", unwind_debug_setup); +#define dprintk(lvl, fmt, args...) \ + ((void)(lvl > unwind_debug \ + || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) + +static struct unwind_table *find_table(unsigned long pc) +{ + struct unwind_table *table; + + for (table = &root_table; table; table = table->link) + if ((pc >= table->core.pc + && pc < table->core.pc + table->core.range) + || (pc >= table->init.pc + && pc < table->init.pc + table->init.range)) + break; + + return table; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType, + unsigned long text_base, + unsigned long data_base); + +static void init_unwind_table(struct unwind_table *table, + const char *name, + const void *core_start, + unsigned long core_size, + const void *init_start, + unsigned long init_size, + const void *table_start, + unsigned long table_size, + const u8 *header_start, + unsigned long header_size) +{ + const u8 *ptr = header_start + 4; + const u8 *end = header_start + header_size; + + table->core.pc = (unsigned long)core_start; + table->core.range = core_size; + table->init.pc = (unsigned long)init_start; + table->init.range = init_size; + table->address = table_start; + table->size = table_size; + /* See if the linker provided table looks valid. */ + if (header_size <= 4 + || header_start[0] != 1 + || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) + != table_start + || !read_pointer(&ptr, end, header_start[2], 0, 0) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start) + || !read_pointer(&ptr, end, header_start[3], 0, + (unsigned long)header_start)) + header_start = NULL; + table->hdrsz = header_size; + smp_wmb(); + table->header = header_start; + table->link = NULL; + table->name = name; +} + +void __init unwind_init(void) +{ + init_unwind_table(&root_table, "kernel", + _text, _end - _text, + NULL, 0, + __start_unwind, __end_unwind - __start_unwind, + __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr); +} + +static const u32 bad_cie, not_fde; +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); +static signed fde_pointer_type(const u32 *cie); + +struct eh_frame_hdr_table_entry { + unsigned long start, fde; +}; + +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) +{ + const struct eh_frame_hdr_table_entry *e1 = p1; + const struct eh_frame_hdr_table_entry *e2 = p2; + + return (e1->start > e2->start) - (e1->start < e2->start); +} + +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) +{ + struct eh_frame_hdr_table_entry *e1 = p1; + struct eh_frame_hdr_table_entry *e2 = p2; + unsigned long v; + + v = e1->start; + e1->start = e2->start; + e2->start = v; + v = e1->fde; + e1->fde = e2->fde; + e2->fde = v; +} + +static void __init setup_unwind_table(struct unwind_table *table, + void *(*alloc)(unsigned long)) +{ + const u8 *ptr; + unsigned long tableSize = table->size, hdrSize; + unsigned n; + const u32 *fde; + struct { + u8 version; + u8 eh_frame_ptr_enc; + u8 fde_count_enc; + u8 table_enc; + unsigned long eh_frame_ptr; + unsigned int fde_count; + struct eh_frame_hdr_table_entry table[]; + } __attribute__((__packed__)) *header; + + if (table->header) + return; + + if (table->hdrsz) + printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n", + table->name); + + if (tableSize & (sizeof(*fde) - 1)) + return; + + for (fde = table->address, n = 0; + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = cie_for_fde(fde, table); + signed ptrType; + + if (cie == ¬_fde) + continue; + if (cie == NULL + || cie == &bad_cie + || (ptrType = fde_pointer_type(cie)) < 0) + return; + ptr = (const u8 *)(fde + 2); + if (!read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0)) + return; + ++n; + } + + if (tableSize || !n) + return; + + hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + + 2 * n * sizeof(unsigned long); + dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); + header = alloc(hdrSize); + if (!header) + return; + header->version = 1; + header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native; + header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4; + header->table_enc = DW_EH_PE_abs|DW_EH_PE_native; + put_unaligned((unsigned long)table->address, &header->eh_frame_ptr); + BUILD_BUG_ON(offsetof(typeof(*header), fde_count) + % __alignof(typeof(header->fde_count))); + header->fde_count = n; + + BUILD_BUG_ON(offsetof(typeof(*header), table) + % __alignof(typeof(*header->table))); + for (fde = table->address, tableSize = table->size, n = 0; + tableSize; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); + + if (!fde[1]) + continue; /* this is a CIE */ + ptr = (const u8 *)(fde + 2); + header->table[n].start = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + fde_pointer_type(cie), 0, 0); + header->table[n].fde = (unsigned long)fde; + ++n; + } + WARN_ON(n != header->fde_count); + + sort(header->table, + n, + sizeof(*header->table), + cmp_eh_frame_hdr_table_entries, + swap_eh_frame_hdr_table_entries); + + table->hdrsz = hdrSize; + smp_wmb(); + table->header = (const void *)header; +} + +static void *__init balloc(unsigned long sz) +{ + return __alloc_bootmem_nopanic(sz, + sizeof(unsigned int), + __pa(MAX_DMA_ADDRESS)); +} + +void __init unwind_setup(void) +{ + setup_unwind_table(&root_table, balloc); +} + +#ifdef CONFIG_MODULES + +static struct unwind_table *last_table; + +/* Must be called with module_mutex held. */ +void *unwind_add_table(struct module *module, + const void *table_start, + unsigned long table_size) +{ + struct unwind_table *table; + + if (table_size <= 0) + return NULL; + + table = kmalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return NULL; + + init_unwind_table(table, module->name, + module->module_core, module->core_size, + module->module_init, module->init_size, + table_start, table_size, + NULL, 0); + + if (last_table) + last_table->link = table; + else + root_table.link = table; + last_table = table; + + return table; +} + +struct unlink_table_info +{ + struct unwind_table *table; + int init_only; +}; + +static int unlink_table(void *arg) +{ + struct unlink_table_info *info = arg; + struct unwind_table *table = info->table, *prev; + + for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) + ; + + if (prev->link) { + if (info->init_only) { + table->init.pc = 0; + table->init.range = 0; + info->table = NULL; + } else { + prev->link = table->link; + if (!prev->link) + last_table = prev; + } + } else + info->table = NULL; + + return 0; +} + +/* Must be called with module_mutex held. */ +void unwind_remove_table(void *handle, int init_only) +{ + struct unwind_table *table = handle; + struct unlink_table_info info; + + if (!table || table == &root_table) + return; + + if (init_only && table == last_table) { + table->init.pc = 0; + table->init.range = 0; + return; + } + + info.table = table; + info.init_only = init_only; + stop_machine_run(unlink_table, &info, NR_CPUS); + + if (info.table) + kfree(table); +} + +#endif /* CONFIG_MODULES */ + +static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + uleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (uleb128_t)(*cur & 0x7f) << shift; + if (!(*cur++ & 0x80)) + break; + } + *pcur = cur; + + return value; +} + +static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) +{ + const u8 *cur = *pcur; + sleb128_t value; + unsigned shift; + + for (shift = 0, value = 0; cur < end; shift += 7) { + if (shift + 7 > 8 * sizeof(value) + && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { + cur = end + 1; + break; + } + value |= (sleb128_t)(*cur & 0x7f) << shift; + if (!(*cur & 0x80)) { + value |= -(*cur++ & 0x40) << shift; + break; + } + } + *pcur = cur; + + return value; +} + +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) +{ + const u32 *cie; + + if (!*fde || (*fde & (sizeof(*fde) - 1))) + return &bad_cie; + if (!fde[1]) + return ¬_fde; /* this is a CIE */ + if ((fde[1] & (sizeof(*fde) - 1)) + || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) + return NULL; /* this is not a valid FDE */ + cie = fde + 1 - fde[1] / sizeof(*fde); + if (*cie <= sizeof(*cie) + 4 + || *cie >= fde[1] - sizeof(*fde) + || (*cie & (sizeof(*cie) - 1)) + || cie[1]) + return NULL; /* this is not a (valid) CIE */ + return cie; +} + +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType, + unsigned long text_base, + unsigned long data_base) +{ + unsigned long value = 0; + union { + const u8 *p8; + const u16 *p16u; + const s16 *p16s; + const u32 *p32u; + const s32 *p32s; + const unsigned long *pul; + } ptr; + + if (ptrType < 0 || ptrType == DW_EH_PE_omit) { + dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); + return 0; + } + ptr.p8 = *pLoc; + switch(ptrType & DW_EH_PE_FORM) { + case DW_EH_PE_data2: + if (end < (const void *)(ptr.p16u + 1)) { + dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); + return 0; + } + if(ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p16s++); + else + value = get_unaligned(ptr.p16u++); + break; + case DW_EH_PE_data4: +#ifdef CONFIG_64BIT + if (end < (const void *)(ptr.p32u + 1)) { + dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); + return 0; + } + if(ptrType & DW_EH_PE_signed) + value = get_unaligned(ptr.p32s++); + else + value = get_unaligned(ptr.p32u++); + break; + case DW_EH_PE_data8: + BUILD_BUG_ON(sizeof(u64) != sizeof(value)); +#else + BUILD_BUG_ON(sizeof(u32) != sizeof(value)); +#endif + case DW_EH_PE_native: + if (end < (const void *)(ptr.pul + 1)) { + dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); + return 0; + } + value = get_unaligned(ptr.pul++); + break; + case DW_EH_PE_leb128: + BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); + value = ptrType & DW_EH_PE_signed + ? get_sleb128(&ptr.p8, end) + : get_uleb128(&ptr.p8, end); + if ((const void *)ptr.p8 > end) { + dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); + return 0; + } + break; + default: + dprintk(2, "Cannot decode pointer type %02X (%p,%p).", + ptrType, ptr.p8, end); + return 0; + } + switch(ptrType & DW_EH_PE_ADJUST) { + case DW_EH_PE_abs: + break; + case DW_EH_PE_pcrel: + value += (unsigned long)*pLoc; + break; + case DW_EH_PE_textrel: + if (likely(text_base)) { + value += text_base; + break; + } + dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", + ptrType, *pLoc, end); + return 0; + case DW_EH_PE_datarel: + if (likely(data_base)) { + value += data_base; + break; + } + dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", + ptrType, *pLoc, end); + return 0; + default: + dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", + ptrType, *pLoc, end); + return 0; + } + if ((ptrType & DW_EH_PE_indirect) + && probe_kernel_address((unsigned long *)value, value)) { + dprintk(1, "Cannot read indirect value %lx (%p,%p).", + value, *pLoc, end); + return 0; + } + *pLoc = ptr.p8; + + return value; +} + +static signed fde_pointer_type(const u32 *cie) +{ + const u8 *ptr = (const u8 *)(cie + 2); + unsigned version = *ptr; + + if (version != 1) + return -1; /* unsupported */ + if (*++ptr) { + const char *aug; + const u8 *end = (const u8 *)(cie + 1) + *cie; + uleb128_t len; + + /* check if augmentation size is first (and thus present) */ + if (*ptr != 'z') + return -1; + /* check if augmentation string is nul-terminated */ + if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) + return -1; + ++ptr; /* skip terminator */ + get_uleb128(&ptr, end); /* skip code alignment */ + get_sleb128(&ptr, end); /* skip data alignment */ + /* skip return address column */ + version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); + len = get_uleb128(&ptr, end); /* augmentation length */ + if (ptr + len < ptr || ptr + len > end) + return -1; + end = ptr + len; + while (*++aug) { + if (ptr >= end) + return -1; + switch(*aug) { + case 'L': + ++ptr; + break; + case 'P': { + signed ptrType = *ptr++; + + if (!read_pointer(&ptr, end, ptrType, 0, 0) + || ptr > end) + return -1; + } + break; + case 'R': + return *ptr; + default: + return -1; + } + } + } + return DW_EH_PE_native|DW_EH_PE_abs; +} + +static int advance_loc(unsigned long delta, struct unwind_state *state) +{ + state->loc += delta * state->codeAlign; + + return delta > 0; +} + +static void set_rule(uleb128_t reg, + enum item_location where, + uleb128_t value, + struct unwind_state *state) +{ + if (reg < ARRAY_SIZE(state->regs)) { + state->regs[reg].where = where; + state->regs[reg].value = value; + } +} + +static int processCFI(const u8 *start, + const u8 *end, + unsigned long targetLoc, + signed ptrType, + struct unwind_state *state) +{ + union { + const u8 *p8; + const u16 *p16; + const u32 *p32; + } ptr; + int result = 1; + + if (start != state->cieStart) { + state->loc = state->org; + result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); + if (targetLoc == 0 && state->label == NULL) + return result; + } + for (ptr.p8 = start; result && ptr.p8 < end; ) { + switch(*ptr.p8 >> 6) { + uleb128_t value; + + case 0: + switch(*ptr.p8++) { + case DW_CFA_nop: + break; + case DW_CFA_set_loc: + state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); + if (state->loc == 0) + result = 0; + break; + case DW_CFA_advance_loc1: + result = ptr.p8 < end && advance_loc(*ptr.p8++, state); + break; + case DW_CFA_advance_loc2: + result = ptr.p8 <= end + 2 + && advance_loc(*ptr.p16++, state); + break; + case DW_CFA_advance_loc4: + result = ptr.p8 <= end + 4 + && advance_loc(*ptr.p32++, state); + break; + case DW_CFA_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_offset_extended_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_val_offset_sf: + value = get_uleb128(&ptr.p8, end); + set_rule(value, Value, get_sleb128(&ptr.p8, end), state); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); + break; + case DW_CFA_register: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Register, + get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_remember_state: + if (ptr.p8 == state->label) { + state->label = NULL; + return 1; + } + if (state->stackDepth >= MAX_STACK_DEPTH) { + dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); + return 0; + } + state->stack[state->stackDepth++] = ptr.p8; + break; + case DW_CFA_restore_state: + if (state->stackDepth) { + const uleb128_t loc = state->loc; + const u8 *label = state->label; + + state->label = state->stack[state->stackDepth - 1]; + memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); + memset(state->regs, 0, sizeof(state->regs)); + state->stackDepth = 0; + result = processCFI(start, end, 0, ptrType, state); + state->loc = loc; + state->label = label; + } else { + dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); + return 0; + } + break; + case DW_CFA_def_cfa: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset: + state->cfa.offs = get_uleb128(&ptr.p8, end); + break; + case DW_CFA_def_cfa_sf: + state->cfa.reg = get_uleb128(&ptr.p8, end); + /*nobreak*/ + case DW_CFA_def_cfa_offset_sf: + state->cfa.offs = get_sleb128(&ptr.p8, end) + * state->dataAlign; + break; + case DW_CFA_def_cfa_register: + state->cfa.reg = get_uleb128(&ptr.p8, end); + break; + /*todo case DW_CFA_def_cfa_expression: */ + /*todo case DW_CFA_expression: */ + /*todo case DW_CFA_val_expression: */ + case DW_CFA_GNU_args_size: + get_uleb128(&ptr.p8, end); + break; + case DW_CFA_GNU_negative_offset_extended: + value = get_uleb128(&ptr.p8, end); + set_rule(value, + Memory, + (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); + break; + case DW_CFA_GNU_window_save: + default: + dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); + result = 0; + break; + } + break; + case 1: + result = advance_loc(*ptr.p8++ & 0x3f, state); + break; + case 2: + value = *ptr.p8++ & 0x3f; + set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); + break; + case 3: + set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); + break; + } + if (ptr.p8 > end) { + dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); + result = 0; + } + if (result && targetLoc != 0 && targetLoc < state->loc) + return 1; + } + + if (result && ptr.p8 < end) + dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); + + return result + && ptr.p8 == end + && (targetLoc == 0 + || (/*todo While in theory this should apply, gcc in practice omits + everything past the function prolog, and hence the location + never reaches the end of the function. + targetLoc < state->loc &&*/ state->label == NULL)); +} + +/* Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. */ +int unwind(struct unwind_frame_info *frame) +{ +#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) + const u32 *fde = NULL, *cie = NULL; + const u8 *ptr = NULL, *end = NULL; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; + unsigned long startLoc = 0, endLoc = 0, cfa; + unsigned i; + signed ptrType = -1; + uleb128_t retAddrReg = 0; + const struct unwind_table *table; + struct unwind_state state; + + if (UNW_PC(frame) == 0) + return -EINVAL; + if ((table = find_table(pc)) != NULL + && !(table->size & (sizeof(*fde) - 1))) { + const u8 *hdr = table->header; + unsigned long tableSize; + + smp_rmb(); + if (hdr && hdr[0] == 1) { + switch(hdr[3] & DW_EH_PE_FORM) { + case DW_EH_PE_native: tableSize = sizeof(unsigned long); break; + case DW_EH_PE_data2: tableSize = 2; break; + case DW_EH_PE_data4: tableSize = 4; break; + case DW_EH_PE_data8: tableSize = 8; break; + default: tableSize = 0; break; + } + ptr = hdr + 4; + end = hdr + table->hdrsz; + if (tableSize + && read_pointer(&ptr, end, hdr[1], 0, 0) + == (unsigned long)table->address + && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 + && i == (end - ptr) / (2 * tableSize) + && !((end - ptr) % (2 * tableSize))) { + do { + const u8 *cur = ptr + (i / 2) * (2 * tableSize); + + startLoc = read_pointer(&cur, + cur + tableSize, + hdr[3], 0, + (unsigned long)hdr); + if (pc < startLoc) + i /= 2; + else { + ptr = cur - tableSize; + i = (i + 1) / 2; + } + } while (startLoc && i > 1); + if (i == 1 + && (startLoc = read_pointer(&ptr, + ptr + tableSize, + hdr[3], 0, + (unsigned long)hdr)) != 0 + && pc >= startLoc) + fde = (void *)read_pointer(&ptr, + ptr + tableSize, + hdr[3], 0, + (unsigned long)hdr); + } + } + if(hdr && !fde) + dprintk(3, "Binary lookup for %lx failed.", pc); + + if (fde != NULL) { + cie = cie_for_fde(fde, table); + ptr = (const u8 *)(fde + 2); + if(cie != NULL + && cie != &bad_cie + && cie != ¬_fde + && (ptrType = fde_pointer_type(cie)) >= 0 + && read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0) == startLoc) { + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if(pc >= endLoc) + fde = NULL; + } else + fde = NULL; + if(!fde) + dprintk(1, "Binary lookup result for %lx discarded.", pc); + } + if (fde == NULL) { + for (fde = table->address, tableSize = table->size; + cie = NULL, tableSize > sizeof(*fde) + && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, + fde += 1 + *fde / sizeof(*fde)) { + cie = cie_for_fde(fde, table); + if (cie == &bad_cie) { + cie = NULL; + break; + } + if (cie == NULL + || cie == ¬_fde + || (ptrType = fde_pointer_type(cie)) < 0) + continue; + ptr = (const u8 *)(fde + 2); + startLoc = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (!startLoc) + continue; + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType, 0, 0); + if (pc >= startLoc && pc < endLoc) + break; + } + if(!fde) + dprintk(3, "Linear lookup for %lx failed.", pc); + } + } + if (cie != NULL) { + memset(&state, 0, sizeof(state)); + state.cieEnd = ptr; /* keep here temporarily */ + ptr = (const u8 *)(cie + 2); + end = (const u8 *)(cie + 1) + *cie; + frame->call_frame = 1; + if ((state.version = *ptr) != 1) + cie = NULL; /* unsupported version */ + else if (*++ptr) { + /* check if augmentation size is first (and thus present) */ + if (*ptr == 'z') { + while (++ptr < end && *ptr) { + switch(*ptr) { + /* check for ignorable (or already handled) + * nul-terminated augmentation string */ + case 'L': + case 'P': + case 'R': + continue; + case 'S': + frame->call_frame = 0; + continue; + default: + break; + } + break; + } + } + if (ptr >= end || *ptr) + cie = NULL; + } + if(!cie) + dprintk(1, "CIE unusable (%p,%p).", ptr, end); + ++ptr; + } + if (cie != NULL) { + /* get code aligment factor */ + state.codeAlign = get_uleb128(&ptr, end); + /* get data aligment factor */ + state.dataAlign = get_sleb128(&ptr, end); + if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) + cie = NULL; + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EPERM; + } else { + retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + ptr += augSize; + } + if (ptr > end + || retAddrReg >= ARRAY_SIZE(reg_info) + || REG_INVALID(retAddrReg) + || reg_info[retAddrReg].width != sizeof(unsigned long)) + cie = NULL; + } + if(!cie) + dprintk(1, "CIE validation failed (%p,%p).", ptr, end); + } + if (cie != NULL) { + state.cieStart = ptr; + ptr = state.cieEnd; + state.cieEnd = end; + end = (const u8 *)(fde + 1) + *fde; + /* skip augmentation */ + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + if ((ptr += augSize) > end) + fde = NULL; + } + if(!fde) + dprintk(1, "FDE validation failed (%p,%p).", ptr, end); + } + if (cie == NULL || fde == NULL) { +#ifdef CONFIG_FRAME_POINTER + unsigned long top, bottom; + + if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) + return -EPERM; + top = STACK_TOP(frame->task); + bottom = STACK_BOTTOM(frame->task); +# if FRAME_RETADDR_OFFSET < 0 + if (UNW_SP(frame) < top + && UNW_FP(frame) <= UNW_SP(frame) + && bottom < UNW_FP(frame) +# else + if (UNW_SP(frame) > top + && UNW_FP(frame) >= UNW_SP(frame) + && bottom > UNW_FP(frame) +# endif + && !((UNW_SP(frame) | UNW_FP(frame)) + & (sizeof(unsigned long) - 1))) { + unsigned long link; + + if (!probe_kernel_address( + (unsigned long *)(UNW_FP(frame) + + FRAME_LINK_OFFSET), + link) +# if FRAME_RETADDR_OFFSET < 0 + && link > bottom && link < UNW_FP(frame) +# else + && link > UNW_FP(frame) && link < bottom +# endif + && !(link & (sizeof(link) - 1)) + && !probe_kernel_address( + (unsigned long *)(UNW_FP(frame) + + FRAME_RETADDR_OFFSET), UNW_PC(frame))) { + UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET +# if FRAME_RETADDR_OFFSET < 0 + - +# else + + +# endif + sizeof(UNW_PC(frame)); + UNW_FP(frame) = link; + return 0; + } + } +#endif + return -ENXIO; + } + state.org = startLoc; + memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); + /* process instructions */ + if (!processCFI(ptr, end, pc, ptrType, &state) + || state.loc > endLoc + || state.regs[retAddrReg].where == Nowhere + || state.cfa.reg >= ARRAY_SIZE(reg_info) + || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) + || state.cfa.offs % sizeof(unsigned long)) { + dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); + return -EIO; + } + /* update frame */ +#ifndef CONFIG_AS_CFI_SIGNAL_FRAME + if(frame->call_frame + && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) + frame->call_frame = 0; +#endif + cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; + startLoc = min((unsigned long)UNW_SP(frame), cfa); + endLoc = max((unsigned long)UNW_SP(frame), cfa); + if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { + startLoc = min(STACK_LIMIT(cfa), cfa); + endLoc = max(STACK_LIMIT(cfa), cfa); + } +#ifndef CONFIG_64BIT +# define CASES CASE(8); CASE(16); CASE(32) +#else +# define CASES CASE(8); CASE(16); CASE(32); CASE(64) +#endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) { + if (state.regs[i].where == Nowhere) + continue; + dprintk(1, "Cannot restore register %u (%d).", + i, state.regs[i].where); + return -EIO; + } + switch(state.regs[i].where) { + default: + break; + case Register: + if (state.regs[i].value >= ARRAY_SIZE(reg_info) + || REG_INVALID(state.regs[i].value) + || reg_info[i].width > reg_info[state.regs[i].value].width) { + dprintk(1, "Cannot restore register %u from register %lu.", + i, state.regs[i].value); + return -EIO; + } + switch(reg_info[state.regs[i].value].width) { +#define CASE(n) \ + case sizeof(u##n): \ + state.regs[i].value = FRAME_REG(state.regs[i].value, \ + const u##n); \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported register size %u (%lu).", + reg_info[state.regs[i].value].width, + state.regs[i].value); + return -EIO; + } + break; + } + } + for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { + if (REG_INVALID(i)) + continue; + switch(state.regs[i].where) { + case Nowhere: + if (reg_info[i].width != sizeof(UNW_SP(frame)) + || &FRAME_REG(i, __typeof__(UNW_SP(frame))) + != &UNW_SP(frame)) + continue; + UNW_SP(frame) = cfa; + break; + case Register: + switch(reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + FRAME_REG(i, u##n) = state.regs[i].value; \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported register size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + break; + case Value: + if (reg_info[i].width != sizeof(unsigned long)) { + dprintk(1, "Unsupported value size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + FRAME_REG(i, unsigned long) = cfa + state.regs[i].value + * state.dataAlign; + break; + case Memory: { + unsigned long addr = cfa + state.regs[i].value + * state.dataAlign; + + if ((state.regs[i].value * state.dataAlign) + % sizeof(unsigned long) + || addr < startLoc + || addr + sizeof(unsigned long) < addr + || addr + sizeof(unsigned long) > endLoc) { + dprintk(1, "Bad memory location %lx (%lx).", + addr, state.regs[i].value); + return -EIO; + } + switch(reg_info[i].width) { +#define CASE(n) case sizeof(u##n): \ + probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ + break + CASES; +#undef CASE + default: + dprintk(1, "Unsupported memory size %u (%u).", + reg_info[i].width, i); + return -EIO; + } + } + break; + } + } + + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); + return -EIO; + } + if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { + dprintk(1, "No progress (%lx,%lx).", pc, sp); + return -EIO; + } + + return 0; +#undef CASES +#undef FRAME_REG +} +EXPORT_SYMBOL(unwind); + +int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + /*const*/ struct pt_regs *regs) +{ + info->task = tsk; + info->call_frame = 0; + arch_unw_init_frame_info(info, regs); + + return 0; +} +EXPORT_SYMBOL(unwind_init_frame_info); + +/* + * Prepare to unwind a blocked task. + */ +int unwind_init_blocked(struct unwind_frame_info *info, + struct task_struct *tsk) +{ + info->task = tsk; + info->call_frame = 0; + arch_unw_init_blocked(info); + + return 0; +} +EXPORT_SYMBOL(unwind_init_blocked); + +/* + * Prepare to unwind the currently running thread. + */ +int unwind_init_running(struct unwind_frame_info *info, + asmlinkage int (*callback)(struct unwind_frame_info *, + void *arg), + void *arg) +{ + info->task = current; + info->call_frame = 0; + + return arch_unwind_init_running(info, callback, arg); +} +EXPORT_SYMBOL(unwind_init_running); + diff -Nurb linux-2.6.22-570/kernel/user.c linux-2.6.22-591/kernel/user.c --- linux-2.6.22-570/kernel/user.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/user.c 2007-12-21 15:36:15.000000000 -0500 @@ -14,17 +14,17 @@ #include #include #include +#include +#include /* * UID task count cache, to get fast user lookup in "alloc_uid" * when changing user ID's (ie setuid() and friends). */ -#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) -#define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) #define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) -#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) +#define uidhashentry(ns, xid, uid) ((ns)->uidhash_table + __uidhashfn(xid, uid)) static struct kmem_cache *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -94,9 +94,10 @@ { struct user_struct *ret; unsigned long flags; + struct user_namespace *ns = current->nsproxy->user_ns; spin_lock_irqsave(&uidhash_lock, flags); - ret = uid_hash_find(xid, uid, uidhashentry(xid, uid)); + ret = uid_hash_find(xid, uid, uidhashentry(ns, xid, uid)); spin_unlock_irqrestore(&uidhash_lock, flags); return ret; } @@ -122,7 +123,8 @@ struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(xid, uid); + struct user_namespace *ns = current->nsproxy->user_ns; + struct list_head *hashent = uidhashentry(ns,xid, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); @@ -212,11 +214,11 @@ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); for(n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(uidhash_table + n); + INIT_LIST_HEAD(init_user_ns.uidhash_table + n); /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0,0)); + uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0, 0)); spin_unlock_irq(&uidhash_lock); return 0; diff -Nurb linux-2.6.22-570/kernel/user_namespace.c linux-2.6.22-591/kernel/user_namespace.c --- linux-2.6.22-570/kernel/user_namespace.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/kernel/user_namespace.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,87 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include +#include +#include +#include + +struct user_namespace init_user_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, + .root_user = &root_user, +}; + +EXPORT_SYMBOL_GPL(init_user_ns); + +#ifdef CONFIG_USER_NS + +/* + * Clone a new ns copying an original user ns, setting refcount to 1 + * @old_ns: namespace to clone + * Return NULL on error (failure to kmalloc), new ns otherwise + */ +static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) +{ + struct user_namespace *ns; + struct user_struct *new_user; + int n; + + ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + kref_init(&ns->kref); + + for (n = 0; n < UIDHASH_SZ; ++n) + INIT_LIST_HEAD(ns->uidhash_table + n); + + /* Insert new root user. */ + ns->root_user = alloc_uid(ns, 0); + if (!ns->root_user) { + kfree(ns); + return ERR_PTR(-ENOMEM); + } + + /* Reset current->user with a new one */ + new_user = alloc_uid(ns, current->uid); + if (!new_user) { + free_uid(ns->root_user); + kfree(ns); + return ERR_PTR(-ENOMEM); + } + + switch_uid(new_user); + return ns; +} + +struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) +{ + struct user_namespace *new_ns; + + BUG_ON(!old_ns); + get_user_ns(old_ns); + + if (!(flags & CLONE_NEWUSER)) + return old_ns; + + new_ns = clone_user_ns(old_ns); + + put_user_ns(old_ns); + return new_ns; +} + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns; + + ns = container_of(kref, struct user_namespace, kref); + kfree(ns); +} + +#endif /* CONFIG_USER_NS */ diff -Nurb linux-2.6.22-570/kernel/utsname.c linux-2.6.22-591/kernel/utsname.c --- linux-2.6.22-570/kernel/utsname.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/kernel/utsname.c 2007-12-21 15:36:15.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include /* * Clone a new ns copying an original utsname, setting refcount to 1 @@ -25,11 +26,12 @@ struct uts_namespace *ns; ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); - if (ns) { + if (!ns) + return ERR_PTR(-ENOMEM); + memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); kref_init(&ns->kref); - atomic_inc(&vs_global_uts_ns); - } + return ns; } @@ -39,7 +41,7 @@ * utsname of this process won't be seen by parent, and vice * versa. */ -struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) +struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) { struct uts_namespace *new_ns; diff -Nurb linux-2.6.22-570/kernel/utsname_sysctl.c linux-2.6.22-591/kernel/utsname_sysctl.c --- linux-2.6.22-570/kernel/utsname_sysctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/kernel/utsname_sysctl.c 2007-12-21 15:36:12.000000000 -0500 @@ -18,10 +18,7 @@ static void *get_uts(ctl_table *table, int write) { char *which = table->data; -#ifdef CONFIG_UTS_NS - struct uts_namespace *uts_ns = current->nsproxy->uts_ns; - which = (which - (char *)&init_uts_ns) + (char *)uts_ns; -#endif + if (!write) down_read(&uts_sem); else diff -Nurb linux-2.6.22-570/kernel/workqueue.c linux-2.6.22-591/kernel/workqueue.c --- linux-2.6.22-570/kernel/workqueue.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/kernel/workqueue.c 2007-12-21 15:36:12.000000000 -0500 @@ -282,8 +282,8 @@ struct cpu_workqueue_struct *cwq = __cwq; DEFINE_WAIT(wait); - if (!cwq->wq->freezeable) - current->flags |= PF_NOFREEZE; + if (cwq->wq->freezeable) + set_freezable(); set_user_nice(current, -5); diff -Nurb linux-2.6.22-570/lib/Kconfig.debug linux-2.6.22-591/lib/Kconfig.debug --- linux-2.6.22-570/lib/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/lib/Kconfig.debug 2007-12-21 15:36:12.000000000 -0500 @@ -364,6 +364,24 @@ some architectures or if you use external debuggers. If you don't debug the kernel, you can say N. +config UNWIND_INFO + bool "Compile the kernel with frame unwind information" + depends on !IA64 && !PARISC && !ARM + depends on !MODULES || !(MIPS || PPC || SUPERH || V850) + help + If you say Y here the resulting kernel image will be slightly larger + but not slower, and it will give very useful debugging information. + If you don't debug the kernel, you can say N, but we may not be able + to solve problems without frame unwind information or frame pointers. + +config STACK_UNWIND + bool "Stack unwind support" + depends on UNWIND_INFO + depends on X86 + help + This enables more precise stack traces, omitting all unrelated + occurrences of pointers into kernel code from the dump. + config FORCED_INLINING bool "Force gcc to inline functions marked 'inline'" depends on DEBUG_KERNEL @@ -409,6 +427,9 @@ config FAULT_INJECTION bool "Fault-injection framework" depends on DEBUG_KERNEL + # could support fp on X86_32 here too, but let's not + select UNWIND_INFO if X86 + select STACK_UNWIND if X86 help Provide fault-injection framework. For more details, see Documentation/fault-injection/. @@ -445,3 +466,5 @@ select FRAME_POINTER help Provide stacktrace filter for fault-injection capabilities + +source "lib/Kconfig.kgdb" diff -Nurb linux-2.6.22-570/lib/Kconfig.kgdb linux-2.6.22-591/lib/Kconfig.kgdb --- linux-2.6.22-570/lib/Kconfig.kgdb 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/lib/Kconfig.kgdb 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,255 @@ + +config WANT_EXTRA_DEBUG_INFORMATION + bool + select DEBUG_INFO + select UNWIND_INFO + select FRAME_POINTER if X86 || SUPERH + default n + +config UNWIND_INFO + bool + default n + +config KGDB + bool "KGDB: kernel debugging with remote gdb" + select WANT_EXTRA_DEBUG_INFORMATION + select KGDB_ARCH_HAS_SHADOW_INFO if X86_64 + depends on DEBUG_KERNEL && (ARM || X86 || MIPS || (SUPERH && !SUPERH64) || IA64 || PPC) + help + If you say Y here, it will be possible to remotely debug the + kernel using gdb. Documentation of kernel debugger is available + at http://kgdb.sourceforge.net as well as in DocBook form + in Documentation/DocBook/. If unsure, say N. + +config KGDB_ARCH_HAS_SHADOW_INFO + bool + +config KGDB_CONSOLE + bool "KGDB: Console messages through gdb" + depends on KGDB + help + If you say Y here, console messages will appear through gdb. + Other consoles such as tty or ttyS will continue to work as usual. + Note, that if you use this in conjunction with KGDB_ETH, if the + ethernet driver runs into an error condition during use with KGDB + it is possible to hit an infinite recusrion, causing the kernel + to crash, and typically reboot. For this reason, it is preferable + to use NETCONSOLE in conjunction with KGDB_ETH instead of + KGDB_CONSOLE. + +choice + prompt "Method for KGDB communication" + depends on KGDB + default KGDB_MPSC if SERIAL_MPSC + default KGDB_CPM_UART if (CPM2 || 8xx) + default KGDB_SIBYTE if SIBYTE_SB1xxx_SOC + default KGDB_TXX9 if CPU_TX49XX + default KGDB_SH_SCI if SERIAL_SH_SCI + default KGDB_PXA_SERIAL if ARCH_PXA + default KGDB_AMBA_PL011 if ARM_AMBA + default KGDB_8250_NOMODULE + help + There are a number of different ways in which you can communicate + with KGDB. The most common is via serial, with the 8250 driver + (should your hardware have an 8250, or ns1655x style uart). + Another option is to use the NETPOLL framework and UDP, should + your ethernet card support this. Other options may exist. + You can elect to have one core I/O driver that is built into the + kernel for debugging as the kernel is booting, or using only + kernel modules. + +config KGDB_ONLY_MODULES + bool "KGDB: Use only kernel modules for I/O" + depends on MODULES + help + Use only kernel modules to configure KGDB I/O after the + kernel is booted. + +config KGDB_8250_NOMODULE + bool "KGDB: On generic serial port (8250)" + select KGDB_8250 + help + Uses generic serial port (8250) to communicate with the host + GDB. This is independent of the normal (SERIAL_8250) driver + for this chipset. + +config KGDBOE_NOMODULE + bool "KGDB: On ethernet - in kernel" + select KGDBOE + help + Uses the NETPOLL API to communicate with the host GDB via UDP. + In order for this to work, the ethernet interface specified must + support the NETPOLL API, and this must be initialized at boot. + See the documentation for syntax. + +config KGDB_MPSC + bool "KGDB: On MV64x60 MPSC" + depends on SERIAL_MPSC + help + Uses a Marvell GT64260B or MV64x60 Multi-Purpose Serial + Controller (MPSC) channel. Note that the GT64260A is not + supported. + +config KGDB_CPM_UART + bool "KGDB: On CPM UART" + depends on PPC && (CPM2 || 8xx) + help + Uses CPM UART to communicate with the host GDB. + +config KGDB_SIBYTE + bool "KGDB: On Broadcom SB1xxx serial port" + depends on MIPS && SIBYTE_SB1xxx_SOC + +config KGDB_TXX9 + bool "KGDB: On TX49xx serial port" + depends on MIPS && CPU_TX49XX + help + Uses TX49xx serial port to communicate with the host KGDB. + +config KGDB_SH_SCI + bool "KGDB: On SH SCI(F) serial port" + depends on SUPERH && SERIAL_SH_SCI + help + Uses the SH SCI(F) serial port to communicate with the host GDB. + +config KGDB_AMBA_PL011 + bool "KGDB: On ARM AMBA PL011 Serial Port" + depends on ARM && ARCH_VERSATILE + help + Enables the KGDB serial driver for the AMBA bus PL011 serial + devices from ARM. + +config KGDB_PXA_SERIAL + bool "KGDB: On the PXA2xx serial port" + depends on ARCH_PXA + help + Enables the KGDB serial driver for Intel PXA SOC +endchoice + +choice + prompt "PXA UART to use for KGDB" + depends on KGDB_PXA_SERIAL + default KGDB_PXA_FFUART + +config KGDB_PXA_FFUART + bool "FFUART" + +config KGDB_PXA_BTUART + bool "BTUART" + +config KGDB_PXA_STUART + bool "STUART" +endchoice + +choice + prompt "SCC/SMC to use for KGDB" + depends on KGDB_CPM_UART + default KGDB_CPM_UART_SCC4 if ADS8272 + +config KGDB_CPM_UART_SCC1 + bool "SCC1" + depends on SERIAL_CPM_SCC1 + +config KGDB_CPM_UART_SCC2 + bool "SCC2" + depends on SERIAL_CPM_SCC2 + +config KGDB_CPM_UART_SCC3 + bool "SCC3" + depends on SERIAL_CPM_SCC3 + +config KGDB_CPM_UART_SCC4 + bool "SCC4" + depends on SERIAL_CPM_SCC4 + +config KGDB_CPM_UART_SMC1 + bool "SMC1" + depends on SERIAL_CPM_SMC1 + +config KGDB_CPM_UART_SMC2 + bool "SMC2" + depends on SERIAL_CPM_SMC2 +endchoice + +config KGDBOE + tristate "KGDB: On ethernet" if !KGDBOE_NOMODULE + depends on m && KGDB + select NETPOLL + select NETPOLL_TRAP + help + Uses the NETPOLL API to communicate with the host GDB via UDP. + In order for this to work, the ethernet interface specified must + support the NETPOLL API, and this must be initialized at boot. + See the documentation for syntax. + +config KGDB_8250 + tristate "KGDB: On generic serial port (8250)" if !KGDB_8250_NOMODULE + depends on m && KGDB_ONLY_MODULES + help + Uses generic serial port (8250) to communicate with the host + GDB. This is independent of the normal (SERIAL_8250) driver + for this chipset. + +config KGDB_SIMPLE_SERIAL + bool "Simple selection of KGDB serial port" + depends on KGDB_8250_NOMODULE + default y + help + If you say Y here, you will only have to pick the baud rate + and port number that you wish to use for KGDB. Note that this + only works on architectures that register known serial ports + early on. If you say N, you will have to provide, either here + or on the command line, the type (I/O or MMIO), IRQ and + address to use. If in doubt, say Y. + +config KGDB_BAUDRATE + int "Debug serial port baud rate" + depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || \ + KGDB_MPSC || KGDB_CPM_UART || \ + KGDB_TXX9 || KGDB_PXA_SERIAL || KGDB_AMBA_PL011 + default "115200" + help + gdb and the kernel stub need to agree on the baud rate to be + used. Standard rates from 9600 to 115200 are allowed, and this + may be overridden via the commandline. + +config KGDB_PORT_NUM + int "Serial port number for KGDB" + range 0 1 if KGDB_MPSC + range 0 3 + depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || KGDB_MPSC || KGDB_TXX9 + default "1" + help + Pick the port number (0 based) for KGDB to use. + +config KGDB_AMBA_BASE + hex "AMBA PL011 Serial Port Base Address" + default 0x101f2000 if ARCH_VERSATILE + depends on KGDB_AMBA_PL011 + help + Base address of the AMBA port that KGDB will use. + +config KGDB_AMBA_UARTCLK + int "AMBAPL011 Serial UART Clock Frequency" + default 24000000 if ARCH_VERSATILE + depends on KGDB_AMBA_PL011 + help + Frequency (in HZ) of the ARM AMBA UART clock + +config KGDB_AMBA_IRQ + int "AMBA PL011 Serial Port IRQ" + default 13 if ARCH_VERSATILE + depends on KGDB_AMBA_PL011 + help + Pick the IRQ of the AMBA port that KGDB will use. + +config KGDB_8250_CONF_STRING + string "Configuration string for KGDB" + depends on KGDB_8250_NOMODULE && !KGDB_SIMPLE_SERIAL + default "io,2f8,115200,3" if X86 + help + The format of this string should be ,
,,. For example, to use the + serial port on an i386 box located at 0x2f8 and 115200 baud + on IRQ 3 at use: + io,2f8,115200,3 diff -Nurb linux-2.6.22-570/lib/Makefile linux-2.6.22-591/lib/Makefile --- linux-2.6.22-570/lib/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/lib/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -5,9 +5,10 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o irq_regs.o reciprocal_div.o + sha1.o irq_regs.o reciprocal_div.o argv_split.o \ + check_signature.o -lib-$(CONFIG_MMU) += ioremap.o +lib-$(CONFIG_MMU) += ioremap.o pagewalk.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o kref.o kobject_uevent.o klist.o diff -Nurb linux-2.6.22-570/lib/argv_split.c linux-2.6.22-591/lib/argv_split.c --- linux-2.6.22-570/lib/argv_split.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/lib/argv_split.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,159 @@ +/* + * Helper function for splitting a string into an argv-like array. + */ + +#ifndef TEST +#include +#include +#include +#endif + +static const char *skip_sep(const char *cp) +{ + while (*cp && isspace(*cp)) + cp++; + + return cp; +} + +static const char *skip_arg(const char *cp) +{ + while (*cp && !isspace(*cp)) + cp++; + + return cp; +} + +static int count_argc(const char *str) +{ + int count = 0; + + while (*str) { + str = skip_sep(str); + if (*str) { + count++; + str = skip_arg(str); + } + } + + return count; +} + +/** + * argv_free - free an argv + * + * @argv - the argument vector to be freed + * + * Frees an argv and the strings it points to. + */ +void argv_free(char **argv) +{ + char **p; + for (p = argv; *p; p++) + kfree(*p); + + kfree(argv); +} +EXPORT_SYMBOL(argv_free); + +/** + * argv_split - split a string at whitespace, returning an argv + * @gfp: the GFP mask used to allocate memory + * @str: the string to be split + * @argcp: returned argument count + * + * Returns an array of pointers to strings which are split out from + * @str. This is performed by strictly splitting on white-space; no + * quote processing is performed. Multiple whitespace characters are + * considered to be a single argument separator. The returned array + * is always NULL-terminated. Returns NULL on memory allocation + * failure. + */ +char **argv_split(gfp_t gfp, const char *str, int *argcp) +{ + int argc = count_argc(str); + char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp); + char **argvp; + + if (argv == NULL) + goto out; + + *argcp = argc; + argvp = argv; + + while (*str) { + str = skip_sep(str); + + if (*str) { + const char *p = str; + char *t; + + str = skip_arg(str); + + t = kstrndup(p, str-p, gfp); + if (t == NULL) + goto fail; + *argvp++ = t; + } + } + *argvp = NULL; + + out: + return argv; + + fail: + argv_free(argv); + return NULL; +} +EXPORT_SYMBOL(argv_split); + +#ifdef TEST +#define _GNU_SOURCE +#include +#include +#include +#include + +typedef enum { + GFP_KERNEL, +} gfp_t; +#define kzalloc(size, x) malloc(size) +#define kfree(x) free(x) +#define kstrndup(s, n, gfp) strndup(s, n) +#define BUG() abort() + +int main() { + const char *testvec[] = { + "", + "x", + "\"", + "\\\0", + "\"", + "test one two three", + "arg\"foo\"bar biff", + "one two\\ three four", + "one \"two three\" four", + NULL, + }; + const char **t; + + for (t = testvec; *t; t++) { + char **argv; + int argc; + char **a; + + printf("%d: test [%s]\n", t-testvec, *t); + + argv = argv_split(GFP_KERNEL, *t, &argc); + + printf("argc=%d vec=", argc); + for (a = argv; *a; a++) + printf("[%s] ", *a); + printf("\n"); + + argv_free(argv); + } + + return 0; +} +#endif diff -Nurb linux-2.6.22-570/lib/check_signature.c linux-2.6.22-591/lib/check_signature.c --- linux-2.6.22-570/lib/check_signature.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/lib/check_signature.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,26 @@ +#include +#include + +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +int check_signature(const volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + while (length--) { + if (readb(io_addr) != *signature) + return 0; + io_addr++; + signature++; + } + return 1; +} +EXPORT_SYMBOL(check_signature); diff -Nurb linux-2.6.22-570/lib/idr.c linux-2.6.22-591/lib/idr.c --- linux-2.6.22-570/lib/idr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/lib/idr.c 2007-12-21 15:36:15.000000000 -0500 @@ -70,6 +70,26 @@ spin_unlock_irqrestore(&idp->lock, flags); } +static void idr_mark_full(struct idr_layer **pa, int id) +{ + struct idr_layer *p = pa[0]; + int l = 0; + + __set_bit(id & IDR_MASK, &p->bitmap); + /* + * If this layer is full mark the bit in the layer above to + * show that this part of the radix tree is full. This may + * complete the layer above and require walking up the radix + * tree. + */ + while (p->bitmap == IDR_FULL) { + if (!(p = pa[++l])) + break; + id = id >> IDR_BITS; + __set_bit((id & IDR_MASK), &p->bitmap); + } +} + /** * idr_pre_get - reserver resources for idr allocation * @idp: idr handle @@ -95,11 +115,10 @@ } EXPORT_SYMBOL(idr_pre_get); -static int sub_alloc(struct idr *idp, void *ptr, int *starting_id) +static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) { int n, m, sh; struct idr_layer *p, *new; - struct idr_layer *pa[MAX_LEVEL]; int l, id; long bm; @@ -144,30 +163,13 @@ pa[l--] = p; p = p->ary[m]; } - /* - * We have reached the leaf node, plant the - * users pointer and return the raw id. - */ - p->ary[m] = (struct idr_layer *)ptr; - __set_bit(m, &p->bitmap); - p->count++; - /* - * If this layer is full mark the bit in the layer above - * to show that this part of the radix tree is full. - * This may complete the layer above and require walking - * up the radix tree. - */ - n = id; - while (p->bitmap == IDR_FULL) { - if (!(p = pa[++l])) - break; - n = n >> IDR_BITS; - __set_bit((n & IDR_MASK), &p->bitmap); - } - return(id); + + pa[l] = p; + return id; } -static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) +static int idr_get_empty_slot(struct idr *idp, int starting_id, + struct idr_layer **pa) { struct idr_layer *p, *new; int layers, v, id; @@ -213,12 +215,31 @@ } idp->top = p; idp->layers = layers; - v = sub_alloc(idp, ptr, &id); + v = sub_alloc(idp, &id, pa); if (v == -2) goto build_up; return(v); } +static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) +{ + struct idr_layer *pa[MAX_LEVEL]; + int id; + + id = idr_get_empty_slot(idp, starting_id, pa); + if (id >= 0) { + /* + * Successfully found an empty slot. Install the user + * pointer and mark the slot full. + */ + pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr; + pa[0]->count++; + idr_mark_full(pa, id); + } + + return id; +} + /** * idr_get_new_above - allocate new idr entry above or equal to a start id * @idp: idr handle @@ -473,3 +494,248 @@ spin_lock_init(&idp->lock); } EXPORT_SYMBOL(idr_init); + + +/* + * IDA - IDR based ID allocator + * + * this is id allocator without id -> pointer translation. Memory + * usage is much lower than full blown idr because each id only + * occupies a bit. ida uses a custom leaf node which contains + * IDA_BITMAP_BITS slots. + * + * 2007-04-25 written by Tejun Heo + */ + +static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) +{ + unsigned long flags; + + if (!ida->free_bitmap) { + spin_lock_irqsave(&ida->idr.lock, flags); + if (!ida->free_bitmap) { + ida->free_bitmap = bitmap; + bitmap = NULL; + } + spin_unlock_irqrestore(&ida->idr.lock, flags); + } + + kfree(bitmap); +} + +/** + * ida_pre_get - reserve resources for ida allocation + * @ida: ida handle + * @gfp_mask: memory allocation flag + * + * This function should be called prior to locking and calling the + * following function. It preallocates enough memory to satisfy the + * worst possible allocation. + * + * If the system is REALLY out of memory this function returns 0, + * otherwise 1. + */ +int ida_pre_get(struct ida *ida, gfp_t gfp_mask) +{ + /* allocate idr_layers */ + if (!idr_pre_get(&ida->idr, gfp_mask)) + return 0; + + /* allocate free_bitmap */ + if (!ida->free_bitmap) { + struct ida_bitmap *bitmap; + + bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask); + if (!bitmap) + return 0; + + free_bitmap(ida, bitmap); + } + + return 1; +} +EXPORT_SYMBOL(ida_pre_get); + +/** + * ida_get_new_above - allocate new ID above or equal to a start id + * @ida: ida handle + * @staring_id: id to start search at + * @p_id: pointer to the allocated handle + * + * Allocate new ID above or equal to @ida. It should be called with + * any required locks. + * + * If memory is required, it will return -EAGAIN, you should unlock + * and go back to the ida_pre_get() call. If the ida is full, it will + * return -ENOSPC. + * + * @p_id returns a value in the range 0 ... 0x7fffffff. + */ +int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) +{ + struct idr_layer *pa[MAX_LEVEL]; + struct ida_bitmap *bitmap; + unsigned long flags; + int idr_id = starting_id / IDA_BITMAP_BITS; + int offset = starting_id % IDA_BITMAP_BITS; + int t, id; + + restart: + /* get vacant slot */ + t = idr_get_empty_slot(&ida->idr, idr_id, pa); + if (t < 0) { + if (t == -1) + return -EAGAIN; + else /* will be -3 */ + return -ENOSPC; + } + + if (t * IDA_BITMAP_BITS >= MAX_ID_BIT) + return -ENOSPC; + + if (t != idr_id) + offset = 0; + idr_id = t; + + /* if bitmap isn't there, create a new one */ + bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK]; + if (!bitmap) { + spin_lock_irqsave(&ida->idr.lock, flags); + bitmap = ida->free_bitmap; + ida->free_bitmap = NULL; + spin_unlock_irqrestore(&ida->idr.lock, flags); + + if (!bitmap) + return -EAGAIN; + + memset(bitmap, 0, sizeof(struct ida_bitmap)); + pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap; + pa[0]->count++; + } + + /* lookup for empty slot */ + t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset); + if (t == IDA_BITMAP_BITS) { + /* no empty slot after offset, continue to the next chunk */ + idr_id++; + offset = 0; + goto restart; + } + + id = idr_id * IDA_BITMAP_BITS + t; + if (id >= MAX_ID_BIT) + return -ENOSPC; + + __set_bit(t, bitmap->bitmap); + if (++bitmap->nr_busy == IDA_BITMAP_BITS) + idr_mark_full(pa, idr_id); + + *p_id = id; + + /* Each leaf node can handle nearly a thousand slots and the + * whole idea of ida is to have small memory foot print. + * Throw away extra resources one by one after each successful + * allocation. + */ + if (ida->idr.id_free_cnt || ida->free_bitmap) { + struct idr_layer *p = alloc_layer(&ida->idr); + if (p) + kmem_cache_free(idr_layer_cache, p); + } + + return 0; +} +EXPORT_SYMBOL(ida_get_new_above); + +/** + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Allocate new ID. It should be called with any required locks. + * + * If memory is required, it will return -EAGAIN, you should unlock + * and go back to the idr_pre_get() call. If the idr is full, it will + * return -ENOSPC. + * + * @id returns a value in the range 0 ... 0x7fffffff. + */ +int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} +EXPORT_SYMBOL(ida_get_new); + +/** + * ida_remove - remove the given ID + * @ida: ida handle + * @id: ID to free + */ +void ida_remove(struct ida *ida, int id) +{ + struct idr_layer *p = ida->idr.top; + int shift = (ida->idr.layers - 1) * IDR_BITS; + int idr_id = id / IDA_BITMAP_BITS; + int offset = id % IDA_BITMAP_BITS; + int n; + struct ida_bitmap *bitmap; + + /* clear full bits while looking up the leaf idr_layer */ + while ((shift > 0) && p) { + n = (idr_id >> shift) & IDR_MASK; + __clear_bit(n, &p->bitmap); + p = p->ary[n]; + shift -= IDR_BITS; + } + + if (p == NULL) + goto err; + + n = idr_id & IDR_MASK; + __clear_bit(n, &p->bitmap); + + bitmap = (void *)p->ary[n]; + if (!test_bit(offset, bitmap->bitmap)) + goto err; + + /* update bitmap and remove it if empty */ + __clear_bit(offset, bitmap->bitmap); + if (--bitmap->nr_busy == 0) { + __set_bit(n, &p->bitmap); /* to please idr_remove() */ + idr_remove(&ida->idr, idr_id); + free_bitmap(ida, bitmap); + } + + return; + + err: + printk(KERN_WARNING + "ida_remove called for id=%d which is not allocated.\n", id); +} +EXPORT_SYMBOL(ida_remove); + +/** + * ida_destroy - release all cached layers within an ida tree + * ida: ida handle + */ +void ida_destroy(struct ida *ida) +{ + idr_destroy(&ida->idr); + kfree(ida->free_bitmap); +} +EXPORT_SYMBOL(ida_destroy); + +/** + * ida_init - initialize ida handle + * @ida: ida handle + * + * This function is use to set up the handle (@ida) that you will pass + * to the rest of the functions. + */ +void ida_init(struct ida *ida) +{ + memset(ida, 0, sizeof(struct ida)); + idr_init(&ida->idr); + +} +EXPORT_SYMBOL(ida_init); diff -Nurb linux-2.6.22-570/lib/kobject.c linux-2.6.22-591/lib/kobject.c --- linux-2.6.22-570/lib/kobject.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/lib/kobject.c 2007-12-21 15:36:15.000000000 -0500 @@ -44,11 +44,11 @@ return error; } -static int create_dir(struct kobject * kobj, struct dentry *shadow_parent) +static int create_dir(struct kobject * kobj) { int error = 0; if (kobject_name(kobj)) { - error = sysfs_create_dir(kobj, shadow_parent); + error = sysfs_create_dir(kobj); if (!error) { if ((error = populate_dir(kobj))) sysfs_remove_dir(kobj); @@ -157,12 +157,11 @@ } /** - * kobject_shadow_add - add an object to the hierarchy. + * kobject_add - add an object to the hierarchy. * @kobj: object. - * @shadow_parent: sysfs directory to add to. */ -int kobject_shadow_add(struct kobject * kobj, struct dentry *shadow_parent) +int kobject_add(struct kobject * kobj) { int error = 0; struct kobject * parent; @@ -194,7 +193,7 @@ kobj->parent = parent; } - error = create_dir(kobj, shadow_parent); + error = create_dir(kobj); if (error) { /* unlink does the kobject_put() for us */ unlink(kobj); @@ -216,16 +215,6 @@ } /** - * kobject_add - add an object to the hierarchy. - * @kobj: object. - */ -int kobject_add(struct kobject * kobj) -{ - return kobject_shadow_add(kobj, NULL); -} - - -/** * kobject_register - initialize and add an object. * @kobj: object in question. */ @@ -338,7 +327,7 @@ /* Note : if we want to send the new name alone, not the full path, * we could probably use kobject_name(kobj); */ - error = sysfs_rename_dir(kobj, kobj->parent->dentry, new_name); + error = sysfs_rename_dir(kobj, new_name); /* This function is mostly/only used for network interface. * Some hotplug package track interfaces by their name and @@ -355,27 +344,6 @@ } /** - * kobject_rename - change the name of an object - * @kobj: object in question. - * @new_parent: object's new parent - * @new_name: object's new name - */ - -int kobject_shadow_rename(struct kobject * kobj, struct dentry *new_parent, - const char *new_name) -{ - int error = 0; - - kobj = kobject_get(kobj); - if (!kobj) - return -EINVAL; - error = sysfs_rename_dir(kobj, new_parent, new_name); - kobject_put(kobj); - - return error; -} - -/** * kobject_move - move object to another parent * @kobj: object in question. * @new_parent: object's new parent (can be NULL) diff -Nurb linux-2.6.22-570/lib/kobject_uevent.c linux-2.6.22-591/lib/kobject_uevent.c --- linux-2.6.22-570/lib/kobject_uevent.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/lib/kobject_uevent.c 2007-12-21 15:36:15.000000000 -0500 @@ -208,7 +208,7 @@ argv [0] = uevent_helper; argv [1] = (char *)subsystem; argv [2] = NULL; - call_usermodehelper (argv[0], argv, envp, 0); + call_usermodehelper (argv[0], argv, envp, UMH_WAIT_EXEC); } exit: @@ -290,9 +290,8 @@ #if defined(CONFIG_NET) static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, - NULL, THIS_MODULE); - + uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); diff -Nurb linux-2.6.22-570/lib/pagewalk.c linux-2.6.22-591/lib/pagewalk.c --- linux-2.6.22-570/lib/pagewalk.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/lib/pagewalk.c 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,112 @@ +#include +#include + +static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk, void *private) +{ + pte_t *pte; + int err; + + for (pte = pte_offset_map(pmd, addr); addr != end; + addr += PAGE_SIZE, pte++) { + if (pte_none(*pte)) + continue; + err = walk->pte_entry(pte, addr, addr, private); + if (err) { + pte_unmap(pte); + return err; + } + } + pte_unmap(pte); + return 0; +} + +static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, + struct mm_walk *walk, void *private) +{ + pmd_t *pmd; + unsigned long next; + int err; + + for (pmd = pmd_offset(pud, addr); addr != end; + pmd++, addr = next) { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + if (walk->pmd_entry) { + err = walk->pmd_entry(pmd, addr, next, private); + if (err) + return err; + } + if (walk->pte_entry) { + err = walk_pte_range(pmd, addr, next, walk, private); + if (err) + return err; + } + } + return 0; +} + +static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, + struct mm_walk *walk, void *private) +{ + pud_t *pud; + unsigned long next; + int err; + + for (pud = pud_offset(pgd, addr); addr != end; + pud++, addr = next) { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + if (walk->pud_entry) { + err = walk->pud_entry(pud, addr, next, private); + if (err) + return err; + } + if (walk->pmd_entry || walk->pte_entry) { + err = walk_pmd_range(pud, addr, next, walk, private); + if (err) + return err; + } + } + return 0; +} + +/* + * walk_page_range - walk a memory map's page tables with a callback + * @mm - memory map to walk + * @addr - starting address + * @end - ending address + * @walk - set of callbacks to invoke for each level of the tree + * @private - private data passed to the callback function + * + * Recursively walk the page table for the memory area in a VMA, calling + * a callback for every bottom-level (PTE) page table. + */ +int walk_page_range(struct mm_struct *mm, + unsigned long addr, unsigned long end, + struct mm_walk *walk, void *private) +{ + pgd_t *pgd; + unsigned long next; + int err; + + for (pgd = pgd_offset(mm, addr); addr != end; + pgd++, addr = next) { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + if (walk->pgd_entry) { + err = walk->pgd_entry(pgd, addr, next, private); + if (err) + return err; + } + if (walk->pud_entry || walk->pmd_entry || walk->pte_entry) { + err = walk_pud_range(pgd, addr, next, walk, private); + if (err) + return err; + } + } + return 0; +} diff -Nurb linux-2.6.22-570/lib/radix-tree.c linux-2.6.22-591/lib/radix-tree.c --- linux-2.6.22-570/lib/radix-tree.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/lib/radix-tree.c 2007-12-21 15:36:12.000000000 -0500 @@ -93,7 +93,8 @@ struct radix_tree_node *ret; gfp_t gfp_mask = root_gfp_mask(root); - ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); + ret = kmem_cache_alloc(radix_tree_node_cachep, + set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); if (ret == NULL && !(gfp_mask & __GFP_WAIT)) { struct radix_tree_preload *rtp; @@ -137,7 +138,8 @@ rtp = &__get_cpu_var(radix_tree_preloads); while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { preempt_enable(); - node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); + node = kmem_cache_alloc(radix_tree_node_cachep, + set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); if (node == NULL) goto out; preempt_disable(); diff -Nurb linux-2.6.22-570/mm/filemap.c linux-2.6.22-591/mm/filemap.c --- linux-2.6.22-570/mm/filemap.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/filemap.c 2007-12-21 15:36:12.000000000 -0500 @@ -1334,39 +1334,38 @@ #define MMAP_LOTSAMISS (100) /** - * filemap_nopage - read in file data for page fault handling - * @area: the applicable vm_area - * @address: target address to read in - * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL + * filemap_fault - read in file data for page fault handling + * @vma: user vma (not used) + * @fdata: the applicable fault_data * - * filemap_nopage() is invoked via the vma operations vector for a + * filemap_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */ -struct page *filemap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) { int error; - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; struct page *page; - unsigned long size, pgoff; - int did_readaround = 0, majmin = VM_FAULT_MINOR; + unsigned long size; + int did_readaround = 0; - pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + fdata->type = VM_FAULT_MINOR; + + BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); -retry_all: size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) + if (fdata->pgoff >= size) goto outside_data_content; /* If we don't want any read-ahead, don't bother */ - if (VM_RandomReadHint(area)) + if (VM_RandomReadHint(vma)) goto no_cached_page; /* @@ -1375,19 +1374,19 @@ * * For sequential accesses, we use the generic readahead logic. */ - if (VM_SequentialReadHint(area)) - page_cache_readahead(mapping, ra, file, pgoff, 1); + if (VM_SequentialReadHint(vma)) + page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); /* * Do we have something in the page cache already? */ retry_find: - page = find_get_page(mapping, pgoff); + page = find_lock_page(mapping, fdata->pgoff); if (!page) { unsigned long ra_pages; - if (VM_SequentialReadHint(area)) { - handle_ra_miss(mapping, ra, pgoff); + if (VM_SequentialReadHint(vma)) { + handle_ra_miss(mapping, ra, fdata->pgoff); goto no_cached_page; } ra->mmap_miss++; @@ -1404,7 +1403,7 @@ * check did_readaround, as this is an inner loop. */ if (!did_readaround) { - majmin = VM_FAULT_MAJOR; + fdata->type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } did_readaround = 1; @@ -1412,11 +1411,11 @@ if (ra_pages) { pgoff_t start = 0; - if (pgoff > ra_pages / 2) - start = pgoff - ra_pages / 2; + if (fdata->pgoff > ra_pages / 2) + start = fdata->pgoff - ra_pages / 2; do_page_cache_readahead(mapping, file, start, ra_pages); } - page = find_get_page(mapping, pgoff); + page = find_lock_page(mapping, fdata->pgoff); if (!page) goto no_cached_page; } @@ -1425,19 +1424,23 @@ ra->mmap_hit++; /* - * Ok, found a page in the page cache, now we need to check - * that it's up-to-date. + * We have a locked page in the page cache, now we need to check + * that it's up-to-date. If not, it is going to be due to an error. */ - if (!PageUptodate(page)) + if (unlikely(!PageUptodate(page))) goto page_not_uptodate; -success: + /* Must recheck i_size under page lock */ + size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(fdata->pgoff >= size)) { + unlock_page(page); + goto outside_data_content; + } + /* * Found the page and have a reference on it. */ mark_page_accessed(page); - if (type) - *type = majmin; return page; outside_data_content: @@ -1445,15 +1448,17 @@ * An external ptracer can access pages that normally aren't * accessible.. */ - if (area->vm_mm == current->mm) - return NOPAGE_SIGBUS; + if (vma->vm_mm == current->mm) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; + } /* Fall through to the non-read-ahead case */ no_cached_page: /* * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, pgoff); + error = page_cache_read(file, fdata->pgoff); /* * The page we want has now been added to the page cache. @@ -1469,12 +1474,15 @@ * to schedule I/O. */ if (error == -ENOMEM) - return NOPAGE_OOM; - return NOPAGE_SIGBUS; + fdata->type = VM_FAULT_OOM; + else + fdata->type = VM_FAULT_SIGBUS; + return NULL; page_not_uptodate: + /* IO error path */ if (!did_readaround) { - majmin = VM_FAULT_MAJOR; + fdata->type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } @@ -1484,38 +1492,39 @@ * because there really aren't any performance issues here * and we need to check for errors. */ - lock_page(page); - - /* Somebody truncated the page on us? */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto retry_all; - } - - /* Somebody else successfully read it in? */ - if (PageUptodate(page)) { - unlock_page(page); - goto success; - } ClearPageError(page); error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (PageUptodate(page)) - goto success; - } else if (error == AOP_TRUNCATED_PAGE) { page_cache_release(page); + + if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; - } - /* - * Things didn't work out. Return zero to tell the - * mm layer so, possibly freeing the page cache page first. - */ + /* Things didn't work out. Return zero to tell the mm layer so. */ shrink_readahead_size_eio(file, ra); - page_cache_release(page); - return NOPAGE_SIGBUS; + fdata->type = VM_FAULT_SIGBUS; + return NULL; +} +EXPORT_SYMBOL(filemap_fault); + +/* + * filemap_nopage and filemap_populate are legacy exports that are not used + * in tree. Scheduled for removal. + */ +struct page *filemap_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct page *page; + struct fault_data fdata; + fdata.address = address; + fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + + area->vm_pgoff; + fdata.flags = 0; + + page = filemap_fault(area, &fdata); + if (type) + *type = fdata.type; + + return page; } EXPORT_SYMBOL(filemap_nopage); @@ -1693,8 +1702,7 @@ EXPORT_SYMBOL(filemap_populate); struct vm_operations_struct generic_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, + .fault = filemap_fault, }; /* This is used for a general mmap of a disk file */ @@ -1707,6 +1715,7 @@ return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; return 0; } diff -Nurb linux-2.6.22-570/mm/filemap_xip.c linux-2.6.22-591/mm/filemap_xip.c --- linux-2.6.22-570/mm/filemap_xip.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/filemap_xip.c 2007-12-21 15:36:12.000000000 -0500 @@ -228,62 +228,67 @@ } /* - * xip_nopage() is invoked via the vma operations vector for a + * xip_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * - * This function is derived from filemap_nopage, but used for execute in place + * This function is derived from filemap_fault, but used for execute in place */ -static struct page * -xip_file_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static struct page *xip_file_fault(struct vm_area_struct *area, + struct fault_data *fdata) { struct file *file = area->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct page *page; - unsigned long size, pgoff, endoff; + pgoff_t size; - pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; - endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; + /* XXX: are VM_FAULT_ codes OK? */ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) - return NOPAGE_SIGBUS; + if (fdata->pgoff >= size) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; + } - page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); + page = mapping->a_ops->get_xip_page(mapping, + fdata->pgoff*(PAGE_SIZE/512), 0); if (!IS_ERR(page)) goto out; - if (PTR_ERR(page) != -ENODATA) - return NOPAGE_SIGBUS; + if (PTR_ERR(page) != -ENODATA) { + fdata->type = VM_FAULT_OOM; + return NULL; + } /* sparse block */ if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { /* maybe shared writable, allocate new block */ - page = mapping->a_ops->get_xip_page (mapping, - pgoff*(PAGE_SIZE/512), 1); - if (IS_ERR(page)) - return NOPAGE_SIGBUS; + page = mapping->a_ops->get_xip_page(mapping, + fdata->pgoff*(PAGE_SIZE/512), 1); + if (IS_ERR(page)) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; + } /* unmap page at pgoff from all other vmas */ - __xip_unmap(mapping, pgoff); + __xip_unmap(mapping, fdata->pgoff); } else { /* not shared and writable, use xip_sparse_page() */ page = xip_sparse_page(); - if (!page) - return NOPAGE_OOM; + if (!page) { + fdata->type = VM_FAULT_OOM; + return NULL; + } } out: + fdata->type = VM_FAULT_MINOR; page_cache_get(page); return page; } static struct vm_operations_struct xip_file_vm_ops = { - .nopage = xip_file_nopage, + .fault = xip_file_fault, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -292,6 +297,7 @@ file_accessed(file); vma->vm_ops = &xip_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } EXPORT_SYMBOL_GPL(xip_file_mmap); diff -Nurb linux-2.6.22-570/mm/fremap.c linux-2.6.22-591/mm/fremap.c --- linux-2.6.22-570/mm/fremap.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/fremap.c 2007-12-21 15:36:12.000000000 -0500 @@ -129,6 +129,25 @@ return err; } +static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long size, pgoff_t pgoff) +{ + int err; + + do { + err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); + if (err) + return err; + + size -= PAGE_SIZE; + addr += PAGE_SIZE; + pgoff++; + } while (size); + + return 0; + +} + /*** * sys_remap_file_pages - remap arbitrary pages of a shared backing store * file within an existing vma. @@ -186,15 +205,27 @@ * the single existing vma. vm_private_data is used as a * swapout cursor in a VM_NONLINEAR vma. */ - if (vma && (vma->vm_flags & VM_SHARED) && - (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && - vma->vm_ops && vma->vm_ops->populate && - end > start && start >= vma->vm_start && - end <= vma->vm_end) { + if (!vma || !(vma->vm_flags & VM_SHARED)) + goto out; + + if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) + goto out; + + if ((!vma->vm_ops || !vma->vm_ops->populate) && + !(vma->vm_flags & VM_CAN_NONLINEAR)) + goto out; + + if (end <= start || start < vma->vm_start || end > vma->vm_end) + goto out; /* Must set VM_NONLINEAR before any pages are populated. */ - if (pgoff != linear_page_index(vma, start) && - !(vma->vm_flags & VM_NONLINEAR)) { + if (!(vma->vm_flags & VM_NONLINEAR)) { + /* Don't need a nonlinear mapping, exit success */ + if (pgoff == linear_page_index(vma, start)) { + err = 0; + goto out; + } + if (!has_write_lock) { up_read(&mm->mmap_sem); down_write(&mm->mmap_sem); @@ -211,8 +242,17 @@ spin_unlock(&mapping->i_mmap_lock); } - err = vma->vm_ops->populate(vma, start, size, - vma->vm_page_prot, + if (vma->vm_flags & VM_CAN_NONLINEAR) { + err = populate_range(mm, vma, start, size, pgoff); + if (!err && !(flags & MAP_NONBLOCK)) { + if (unlikely(has_write_lock)) { + downgrade_write(&mm->mmap_sem); + has_write_lock = 0; + } + make_pages_present(start, start+size); + } + } else + err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, pgoff, flags & MAP_NONBLOCK); /* @@ -220,7 +260,8 @@ * it after ->populate completes, and that would prevent * downgrading the lock. (Locks can't be upgraded). */ - } + +out: if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else diff -Nurb linux-2.6.22-570/mm/hugetlb.c linux-2.6.22-591/mm/hugetlb.c --- linux-2.6.22-570/mm/hugetlb.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/hugetlb.c 2007-12-21 15:36:12.000000000 -0500 @@ -28,6 +28,9 @@ static struct list_head hugepage_freelists[MAX_NUMNODES]; static unsigned int nr_huge_pages_node[MAX_NUMNODES]; static unsigned int free_huge_pages_node[MAX_NUMNODES]; +gfp_t htlb_alloc_mask = GFP_HIGHUSER; +unsigned long hugepages_treat_as_movable; + /* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages */ @@ -67,14 +70,15 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, unsigned long address) { - int nid = numa_node_id(); + int nid; struct page *page = NULL; - struct zonelist *zonelist = huge_zonelist(vma, address); + struct zonelist *zonelist = huge_zonelist(vma, address, + htlb_alloc_mask); struct zone **z; for (z = zonelist->zones; *z; z++) { nid = zone_to_nid(*z); - if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) && + if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && !list_empty(&hugepage_freelists[nid])) break; } @@ -114,7 +118,7 @@ prev_nid = nid; spin_unlock(&nid_lock); - page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, + page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, HUGETLB_PAGE_ORDER); if (page) { set_compound_page_dtor(page, free_huge_page); @@ -264,6 +268,19 @@ max_huge_pages = set_max_huge_pages(max_huge_pages); return 0; } + +int hugetlb_treat_movable_handler(struct ctl_table *table, int write, + struct file *file, void __user *buffer, + size_t *length, loff_t *ppos) +{ + proc_dointvec(table, write, file, buffer, length, ppos); + if (hugepages_treat_as_movable) + htlb_alloc_mask = GFP_HIGH_MOVABLE; + else + htlb_alloc_mask = GFP_HIGHUSER; + return 0; +} + #endif /* CONFIG_SYSCTL */ int hugetlb_report_meminfo(char *buf) diff -Nurb linux-2.6.22-570/mm/memory.c linux-2.6.22-591/mm/memory.c --- linux-2.6.22-570/mm/memory.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/memory.c 2007-12-21 15:36:12.000000000 -0500 @@ -1052,7 +1052,8 @@ if (pages) foll_flags |= FOLL_GET; if (!write && !(vma->vm_flags & VM_LOCKED) && - (!vma->vm_ops || !vma->vm_ops->nopage)) + (!vma->vm_ops || (!vma->vm_ops->nopage && + !vma->vm_ops->fault))) foll_flags |= FOLL_ANON; do { @@ -1712,11 +1713,11 @@ if (unlikely(anon_vma_prepare(vma))) goto oom; if (old_page == ZERO_PAGE(address)) { - new_page = alloc_zeroed_user_highpage(vma, address); + new_page = alloc_zeroed_user_highpage_movable(vma, address); if (!new_page) goto oom; } else { - new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (!new_page) goto oom; cow_user_page(new_page, old_page, address, vma); @@ -1828,6 +1829,13 @@ unsigned long restart_addr; int need_break; + /* + * files that support invalidating or truncating portions of the + * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and + * have their .nopage function return the page locked. + */ + BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); + again: restart_addr = vma->vm_truncate_count; if (is_restart_addr(restart_addr) && start_addr < restart_addr) { @@ -1956,17 +1964,8 @@ spin_lock(&mapping->i_mmap_lock); - /* serialize i_size write against truncate_count write */ - smp_wmb(); - /* Protect against page faults, and endless unmapping loops */ + /* Protect against endless unmapping loops */ mapping->truncate_count++; - /* - * For archs where spin_lock has inclusive semantics like ia64 - * this smp_mb() will prevent to read pagetable contents - * before the truncate_count increment is visible to - * other cpus. - */ - smp_mb(); if (unlikely(is_restart_addr(mapping->truncate_count))) { if (mapping->truncate_count == 0) reset_vma_truncate_counts(mapping); @@ -2005,8 +2004,18 @@ if (IS_SWAPFILE(inode)) goto out_busy; i_size_write(inode, offset); + + /* + * unmap_mapping_range is called twice, first simply for efficiency + * so that truncate_inode_pages does fewer single-page unmaps. However + * after this first call, and before truncate_inode_pages finishes, + * it is possible for private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second unmap_mapping_range + * call must be made for correctness. + */ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); goto out_truncate; do_expand: @@ -2046,6 +2055,7 @@ down_write(&inode->i_alloc_sem); unmap_mapping_range(mapping, offset, (end - offset), 1); truncate_inode_pages_range(mapping, offset, end); + unmap_mapping_range(mapping, offset, (end - offset), 1); inode->i_op->truncate_range(inode, offset, end); up_write(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); @@ -2208,7 +2218,6 @@ /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); - lazy_mmu_prot_update(pte); unlock: pte_unmap_unlock(page_table, ptl); out: @@ -2241,7 +2250,7 @@ goto oom; if (unlikely(anon_vma_prepare(vma))) goto oom; - page = alloc_zeroed_user_highpage(vma, address); + page = alloc_zeroed_user_highpage_movable(vma, address); if (!page) goto oom; @@ -2284,10 +2293,10 @@ } /* - * do_no_page() tries to create a new page mapping. It aggressively + * __do_fault() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if - * the "write_access" parameter is true in order to avoid the next - * page fault. + * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid + * the next page fault. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. @@ -2296,92 +2305,85 @@ * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, +static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, - int write_access) -{ + pgoff_t pgoff, unsigned int flags, pte_t orig_pte) + { spinlock_t *ptl; - struct page *new_page; - struct address_space *mapping = NULL; + struct page *page, *faulted_page; pte_t entry; - unsigned int sequence = 0; - int ret = VM_FAULT_MINOR; int anon = 0; struct page *dirty_page = NULL; + struct fault_data fdata; + + fdata.address = address & PAGE_MASK; + fdata.pgoff = pgoff; + fdata.flags = flags; pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); - if (!vx_rss_avail(mm, 1)) + if (likely(vma->vm_ops->fault)) { + fdata.type = -1; + faulted_page = vma->vm_ops->fault(vma, &fdata); + WARN_ON(fdata.type == -1); + if (unlikely(!faulted_page)) + return fdata.type; + } else { + /* Legacy ->nopage path */ + fdata.type = VM_FAULT_MINOR; + faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, + &fdata.type); + /* no page was available -- either SIGBUS or OOM */ + if (unlikely(faulted_page == NOPAGE_SIGBUS)) + return VM_FAULT_SIGBUS; + else if (unlikely(faulted_page == NOPAGE_OOM)) return VM_FAULT_OOM; + } - if (vma->vm_file) { - mapping = vma->vm_file->f_mapping; - sequence = mapping->truncate_count; - smp_rmb(); /* serializes i_size against truncate_count */ - } -retry: - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); - /* - * No smp_rmb is needed here as long as there's a full - * spin_lock/unlock sequence inside the ->nopage callback - * (for the pagecache lookup) that acts as an implicit - * smp_mb() and prevents the i_size read to happen - * after the next truncate_count read. + /* + * For consistency in subsequent calls, make the faulted_page always + * locked. */ - - /* no page was available -- either SIGBUS, OOM or REFAULT */ - if (unlikely(new_page == NOPAGE_SIGBUS)) - return VM_FAULT_SIGBUS; - else if (unlikely(new_page == NOPAGE_OOM)) - return VM_FAULT_OOM; - else if (unlikely(new_page == NOPAGE_REFAULT)) - return VM_FAULT_MINOR; + if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) + lock_page(faulted_page); + else + BUG_ON(!PageLocked(faulted_page)); /* * Should we do an early C-O-W break? */ - if (write_access) { + page = faulted_page; + if (flags & FAULT_FLAG_WRITE) { if (!(vma->vm_flags & VM_SHARED)) { - struct page *page; - - if (unlikely(anon_vma_prepare(vma))) - goto oom; - page = alloc_page_vma(GFP_HIGHUSER, vma, address); - if (!page) - goto oom; - copy_user_highpage(page, new_page, address, vma); - page_cache_release(new_page); - new_page = page; anon = 1; - + if (unlikely(anon_vma_prepare(vma))) { + fdata.type = VM_FAULT_OOM; + goto out; + } + page = alloc_page_vma(GFP_HIGHUSER, vma, address); + if (!page) { + fdata.type = VM_FAULT_OOM; + goto out; + } + copy_user_highpage(page, faulted_page, address, vma); } else { - /* if the page will be shareable, see if the backing + /* + * If the page will be shareable, see if the backing * address space wants to know that the page is about - * to become writable */ + * to become writable + */ if (vma->vm_ops->page_mkwrite && - vma->vm_ops->page_mkwrite(vma, new_page) < 0 - ) { - page_cache_release(new_page); - return VM_FAULT_SIGBUS; + vma->vm_ops->page_mkwrite(vma, page) < 0) { + fdata.type = VM_FAULT_SIGBUS; + anon = 1; /* no anon but release faulted_page */ + goto out; } } + } page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - /* - * For a file-backed vma, someone could have truncated or otherwise - * invalidated this page. If unmap_mapping_range got called, - * retry getting the page. - */ - if (mapping && unlikely(sequence != mapping->truncate_count)) { - pte_unmap_unlock(page_table, ptl); - page_cache_release(new_page); - cond_resched(); - sequence = mapping->truncate_count; - smp_rmb(); - goto retry; - } /* * This silly early PAGE_DIRTY setting removes a race @@ -2394,43 +2396,68 @@ * handle that later. */ /* Only go through if we didn't race with anybody else... */ - if (pte_none(*page_table)) { - flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); - if (write_access) + if (likely(pte_same(*page_table, orig_pte))) { + flush_icache_page(vma, page); + entry = mk_pte(page, vma->vm_page_prot); + if (flags & FAULT_FLAG_WRITE) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte_at(mm, address, page_table, entry); if (anon) { inc_mm_counter(mm, anon_rss); - lru_cache_add_active(new_page); - page_add_new_anon_rmap(new_page, vma, address); + lru_cache_add_active(page); + page_add_new_anon_rmap(page, vma, address); } else { inc_mm_counter(mm, file_rss); - page_add_file_rmap(new_page); - if (write_access) { - dirty_page = new_page; + page_add_file_rmap(page); + if (flags & FAULT_FLAG_WRITE) { + dirty_page = page; get_page(dirty_page); } } - } else { - /* One of our sibling threads was faster, back out. */ - page_cache_release(new_page); - goto unlock; - } - /* no need to invalidate: a not-present page shouldn't be cached */ + /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); -unlock: + } else { + if (anon) + page_cache_release(page); + else + anon = 1; /* no anon but release faulted_page */ + } + pte_unmap_unlock(page_table, ptl); - if (dirty_page) { + +out: + unlock_page(faulted_page); + if (anon) + page_cache_release(faulted_page); + else if (dirty_page) { set_page_dirty_balance(dirty_page); put_page(dirty_page); } - return ret; -oom: - page_cache_release(new_page); - return VM_FAULT_OOM; + + return fdata.type; +} + +static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + int write_access, pte_t orig_pte) +{ + pgoff_t pgoff = (((address & PAGE_MASK) + - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; + unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); + + return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); +} + +static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + int write_access, pgoff_t pgoff, pte_t orig_pte) +{ + unsigned int flags = FAULT_FLAG_NONLINEAR | + (write_access ? FAULT_FLAG_WRITE : 0); + + return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); } /* @@ -2509,9 +2536,14 @@ print_bad_pte(vma, orig_pte, address); return VM_FAULT_OOM; } - /* We can then assume vm->vm_ops && vma->vm_ops->populate */ pgoff = pte_to_pgoff(orig_pte); + + if (vma->vm_ops && vma->vm_ops->fault) + return do_nonlinear_fault(mm, vma, address, page_table, pmd, + write_access, pgoff, orig_pte); + + /* We can then assume vm->vm_ops && vma->vm_ops->populate */ err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0); if (err == -ENOMEM) @@ -2546,10 +2578,9 @@ if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { - if (vma->vm_ops->nopage) - return do_no_page(mm, vma, address, - pte, pmd, - write_access); + if (vma->vm_ops->fault || vma->vm_ops->nopage) + return do_linear_fault(mm, vma, address, + pte, pmd, write_access, entry); if (unlikely(vma->vm_ops->nopfn)) return do_no_pfn(mm, vma, address, pte, pmd, write_access); diff -Nurb linux-2.6.22-570/mm/mempolicy.c linux-2.6.22-591/mm/mempolicy.c --- linux-2.6.22-570/mm/mempolicy.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/mempolicy.c 2007-12-21 15:36:12.000000000 -0500 @@ -594,7 +594,7 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x) { - return alloc_pages_node(node, GFP_HIGHUSER, 0); + return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0); } /* @@ -710,7 +710,8 @@ { struct vm_area_struct *vma = (struct vm_area_struct *)private; - return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma)); + return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, + page_address_in_vma(page, vma)); } #else @@ -1202,7 +1203,8 @@ #ifdef CONFIG_HUGETLBFS /* Return a zonelist suitable for a huge page allocation. */ -struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) +struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, + gfp_t gfp_flags) { struct mempolicy *pol = get_vma_policy(current, vma, addr); @@ -1210,7 +1212,7 @@ unsigned nid; nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); - return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); + return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); } return zonelist_policy(GFP_HIGHUSER, pol); } @@ -1309,7 +1311,6 @@ * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). */ -void *cpuset_being_rebound; /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) @@ -1908,4 +1909,3 @@ m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; return 0; } - diff -Nurb linux-2.6.22-570/mm/migrate.c linux-2.6.22-591/mm/migrate.c --- linux-2.6.22-570/mm/migrate.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/migrate.c 2007-12-21 15:36:12.000000000 -0500 @@ -761,7 +761,8 @@ *result = &pm->status; - return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0); + return alloc_pages_node(pm->node, + GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); } /* diff -Nurb linux-2.6.22-570/mm/mmap.c linux-2.6.22-591/mm/mmap.c --- linux-2.6.22-570/mm/mmap.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/mmap.c 2007-12-21 15:36:12.000000000 -0500 @@ -202,6 +202,17 @@ } /* + * Requires inode->i_mapping->i_mmap_lock + */ +void __unlink_file_vma(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct address_space *mapping = file->f_mapping; + + __remove_shared_vm_struct(vma, file, mapping); +} + +/* * Unlink a file-based vm structure from its prio_tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ @@ -1023,7 +1034,7 @@ } } - error = security_file_mmap(file, reqprot, prot, flags); + error = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (error) return error; @@ -1150,12 +1161,8 @@ vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } - if (flags & MAP_POPULATE) { - up_write(&mm->mmap_sem); - sys_remap_file_pages(addr, len, 0, - pgoff, flags & MAP_NONBLOCK); - down_write(&mm->mmap_sem); - } + if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) + make_pages_present(addr, addr + len); return addr; unmap_and_free_vma: diff -Nurb linux-2.6.22-570/mm/mremap.c linux-2.6.22-591/mm/mremap.c --- linux-2.6.22-570/mm/mremap.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/mremap.c 2007-12-21 15:36:12.000000000 -0500 @@ -292,6 +292,10 @@ if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; + ret = security_file_mmap(0, 0, 0, 0, new_addr, 1); + if (ret) + goto out; + ret = do_munmap(mm, new_addr, new_len); if (ret) goto out; @@ -394,8 +398,13 @@ new_addr = get_unmapped_area(vma->vm_file, 0, new_len, vma->vm_pgoff, map_flags); + if (new_addr & ~PAGE_MASK) { ret = new_addr; - if (new_addr & ~PAGE_MASK) + goto out; + } + + ret = security_file_mmap(0, 0, 0, 0, new_addr, 1); + if (ret) goto out; } ret = move_vma(vma, addr, old_len, new_len, new_addr); diff -Nurb linux-2.6.22-570/mm/nommu.c linux-2.6.22-591/mm/nommu.c --- linux-2.6.22-570/mm/nommu.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/nommu.c 2007-12-21 15:36:12.000000000 -0500 @@ -639,7 +639,7 @@ } /* allow the security API to have its say */ - ret = security_file_mmap(file, reqprot, prot, flags); + ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (ret < 0) return ret; @@ -1336,8 +1336,7 @@ return 0; } -struct page *filemap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) { BUG(); return NULL; diff -Nurb linux-2.6.22-570/mm/page_alloc.c linux-2.6.22-591/mm/page_alloc.c --- linux-2.6.22-570/mm/page_alloc.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/page_alloc.c 2007-12-21 15:36:12.000000000 -0500 @@ -143,6 +143,42 @@ EXPORT_SYMBOL(nr_node_ids); #endif +#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY +static inline int get_pageblock_migratetype(struct page *page) +{ + return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); +} + +static void set_pageblock_migratetype(struct page *page, int migratetype) +{ + set_pageblock_flags_group(page, (unsigned long)migratetype, + PB_migrate, PB_migrate_end); +} + +static inline int gfpflags_to_migratetype(gfp_t gfp_flags) +{ + WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); + + return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | + ((gfp_flags & __GFP_RECLAIMABLE) != 0); +} + +#else +static inline int get_pageblock_migratetype(struct page *page) +{ + return MIGRATE_UNMOVABLE; +} + +static void set_pageblock_migratetype(struct page *page, int migratetype) +{ +} + +static inline int gfpflags_to_migratetype(gfp_t gfp_flags) +{ + return MIGRATE_UNMOVABLE; +} +#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ + #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { @@ -397,6 +433,7 @@ { unsigned long page_idx; int order_size = 1 << order; + int migratetype = get_pageblock_migratetype(page); if (unlikely(PageCompound(page))) destroy_compound_page(page, order); @@ -409,7 +446,6 @@ __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); while (order < MAX_ORDER-1) { unsigned long combined_idx; - struct free_area *area; struct page *buddy; buddy = __page_find_buddy(page, page_idx, order); @@ -417,8 +453,7 @@ break; /* Move the buddy up one level. */ list_del(&buddy->lru); - area = zone->free_area + order; - area->nr_free--; + zone->free_area[order].nr_free--; rmv_page_order(buddy); combined_idx = __find_combined_index(page_idx, order); page = page + (combined_idx - page_idx); @@ -426,7 +461,8 @@ order++; } set_page_order(page, order); - list_add(&page->lru, &zone->free_area[order].free_list); + list_add(&page->lru, + &zone->free_area[order].free_list[migratetype]); zone->free_area[order].nr_free++; } @@ -566,7 +602,8 @@ * -- wli */ static inline void expand(struct zone *zone, struct page *page, - int low, int high, struct free_area *area) + int low, int high, struct free_area *area, + int migratetype) { unsigned long size = 1 << high; @@ -575,7 +612,7 @@ high--; size >>= 1; VM_BUG_ON(bad_range(zone, &page[size])); - list_add(&page[size].lru, &area->free_list); + list_add(&page[size].lru, &area->free_list[migratetype]); area->nr_free++; set_page_order(&page[size], high); } @@ -628,31 +665,172 @@ return 0; } +#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY +/* + * This array describes the order lists are fallen back to when + * the free lists for the desirable migrate type are depleted + */ +static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE }, + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE }, +}; + +/* + * Move the free pages in a range to the free lists of the requested type. + * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES + * boundary. If alignment is required, use move_freepages_block() + */ +int move_freepages(struct zone *zone, + struct page *start_page, struct page *end_page, + int migratetype) +{ + struct page *page; + unsigned long order; + int blocks_moved = 0; + +#ifndef CONFIG_HOLES_IN_ZONE + /* + * page_zone is not safe to call in this context when + * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant + * anyway as we check zone boundaries in move_freepages_block(). + * Remove at a later date when no bug reports exist related to + * CONFIG_PAGE_GROUP_BY_MOBILITY + */ + BUG_ON(page_zone(start_page) != page_zone(end_page)); +#endif + + for (page = start_page; page <= end_page;) { + if (!pfn_valid_within(page_to_pfn(page))) { + page++; + continue; + } + + if (!PageBuddy(page)) { + page++; + continue; + } + + order = page_order(page); + list_del(&page->lru); + list_add(&page->lru, + &zone->free_area[order].free_list[migratetype]); + page += 1 << order; + blocks_moved++; + } + + return blocks_moved; +} + +int move_freepages_block(struct zone *zone, struct page *page, int migratetype) +{ + unsigned long start_pfn, end_pfn; + struct page *start_page, *end_page; + + start_pfn = page_to_pfn(page); + start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1); + start_page = pfn_to_page(start_pfn); + end_page = start_page + MAX_ORDER_NR_PAGES - 1; + end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1; + + /* Do not cross zone boundaries */ + if (start_pfn < zone->zone_start_pfn) + start_page = page; + if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) + return 0; + + return move_freepages(zone, start_page, end_page, migratetype); +} + +/* Remove an element from the buddy allocator from the fallback list */ +static struct page *__rmqueue_fallback(struct zone *zone, int order, + int start_migratetype) +{ + struct free_area * area; + int current_order; + struct page *page; + int migratetype, i; + + /* Find the largest possible block of pages in the other list */ + for (current_order = MAX_ORDER-1; current_order >= order; + --current_order) { + for (i = 0; i < MIGRATE_TYPES - 1; i++) { + migratetype = fallbacks[start_migratetype][i]; + + area = &(zone->free_area[current_order]); + if (list_empty(&area->free_list[migratetype])) + continue; + + page = list_entry(area->free_list[migratetype].next, + struct page, lru); + area->nr_free--; + + /* + * If breaking a large block of pages, move all free + * pages to the preferred allocation list + */ + if (unlikely(current_order >= MAX_ORDER / 2)) { + migratetype = start_migratetype; + move_freepages_block(zone, page, migratetype); + } + + /* Remove the page from the freelists */ + list_del(&page->lru); + rmv_page_order(page); + __mod_zone_page_state(zone, NR_FREE_PAGES, + -(1UL << order)); + + if (current_order == MAX_ORDER - 1) + set_pageblock_migratetype(page, + start_migratetype); + + expand(zone, page, order, current_order, area, migratetype); + return page; + } + } + + return NULL; +} +#else +static struct page *__rmqueue_fallback(struct zone *zone, int order, + int start_migratetype) +{ + return NULL; +} +#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ + /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ -static struct page *__rmqueue(struct zone *zone, unsigned int order) +static struct page *__rmqueue(struct zone *zone, unsigned int order, + int migratetype) { struct free_area * area; unsigned int current_order; struct page *page; + /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { - area = zone->free_area + current_order; - if (list_empty(&area->free_list)) + area = &(zone->free_area[current_order]); + if (list_empty(&area->free_list[migratetype])) continue; - page = list_entry(area->free_list.next, struct page, lru); + page = list_entry(area->free_list[migratetype].next, + struct page, lru); list_del(&page->lru); rmv_page_order(page); area->nr_free--; __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order)); - expand(zone, page, order, current_order, area); - return page; + expand(zone, page, order, current_order, area, migratetype); + goto got_page; } - return NULL; + page = __rmqueue_fallback(zone, order, migratetype); + +got_page: + + return page; } /* @@ -661,16 +839,18 @@ * Returns the number of new pages which were placed at *list. */ static int rmqueue_bulk(struct zone *zone, unsigned int order, - unsigned long count, struct list_head *list) + unsigned long count, struct list_head *list, + int migratetype) { int i; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { - struct page *page = __rmqueue(zone, order); + struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) break; - list_add_tail(&page->lru, list); + list_add(&page->lru, list); + set_page_private(page, migratetype); } spin_unlock(&zone->lock); return i; @@ -732,7 +912,7 @@ { unsigned long pfn, max_zone_pfn; unsigned long flags; - int order; + int order, t; struct list_head *curr; if (!zone->spanned_pages) @@ -749,15 +929,15 @@ swsusp_unset_page_free(page); } - for (order = MAX_ORDER - 1; order >= 0; --order) - list_for_each(curr, &zone->free_area[order].free_list) { + for_each_migratetype_order(order, t) { + list_for_each(curr, &zone->free_area[order].free_list[t]) { unsigned long i; pfn = page_to_pfn(list_entry(curr, struct page, lru)); for (i = 0; i < (1UL << order); i++) swsusp_set_page_free(pfn_to_page(pfn + i)); } - + } spin_unlock_irqrestore(&zone->lock, flags); } @@ -797,6 +977,7 @@ local_irq_save(flags); __count_vm_event(PGFREE); list_add(&page->lru, &pcp->list); + set_page_private(page, get_pageblock_migratetype(page)); pcp->count++; if (pcp->count >= pcp->high) { free_pages_bulk(zone, pcp->batch, &pcp->list, 0); @@ -846,6 +1027,7 @@ struct page *page; int cold = !!(gfp_flags & __GFP_COLD); int cpu; + int migratetype = gfpflags_to_migratetype(gfp_flags); again: cpu = get_cpu(); @@ -856,16 +1038,32 @@ local_irq_save(flags); if (!pcp->count) { pcp->count = rmqueue_bulk(zone, 0, - pcp->batch, &pcp->list); + pcp->batch, &pcp->list, migratetype); if (unlikely(!pcp->count)) goto failed; } + +#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY + /* Find a page of the appropriate migrate type */ + list_for_each_entry(page, &pcp->list, lru) + if (page_private(page) == migratetype) + break; + + /* Allocate more to the pcp list if necessary */ + if (unlikely(&page->lru == &pcp->list)) { + pcp->count += rmqueue_bulk(zone, 0, + pcp->batch, &pcp->list, migratetype); + page = list_entry(pcp->list.next, struct page, lru); + } +#else page = list_entry(pcp->list.next, struct page, lru); +#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ + list_del(&page->lru); pcp->count--; } else { spin_lock_irqsave(&zone->lock, flags); - page = __rmqueue(zone, order); + page = __rmqueue(zone, order, migratetype); spin_unlock(&zone->lock); if (!page) goto failed; @@ -1952,6 +2150,16 @@ init_page_count(page); reset_page_mapcount(page); SetPageReserved(page); + + /* + * Mark the block movable so that blocks are reserved for + * movable at startup. This will force kernel allocations + * to reserve their blocks rather than leaking throughout + * the address space during boot when many long-lived + * kernel allocations are made + */ + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + INIT_LIST_HEAD(&page->lru); #ifdef WANT_PAGE_VIRTUAL /* The shift won't overflow because ZONE_NORMAL is below 4G. */ @@ -1964,9 +2172,9 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size) { - int order; - for (order = 0; order < MAX_ORDER ; order++) { - INIT_LIST_HEAD(&zone->free_area[order].free_list); + int order, t; + for_each_migratetype_order(order, t) { + INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); zone->free_area[order].nr_free = 0; } } @@ -2584,6 +2792,41 @@ realtotalpages); } +#ifndef CONFIG_SPARSEMEM +/* + * Calculate the size of the zone->blockflags rounded to an unsigned long + * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up + * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally + * round what is now in bits to nearest long in bits, then return it in + * bytes. + */ +static unsigned long __init usemap_size(unsigned long zonesize) +{ + unsigned long usemapsize; + + usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES); + usemapsize = usemapsize >> (MAX_ORDER-1); + usemapsize *= NR_PAGEBLOCK_BITS; + usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long)); + + return usemapsize / 8; +} + +static void __init setup_usemap(struct pglist_data *pgdat, + struct zone *zone, unsigned long zonesize) +{ + unsigned long usemapsize = usemap_size(zonesize); + zone->pageblock_flags = NULL; + if (usemapsize) { + zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); + memset(zone->pageblock_flags, 0, usemapsize); + } +} +#else +static void inline setup_usemap(struct pglist_data *pgdat, + struct zone *zone, unsigned long zonesize) {} +#endif /* CONFIG_SPARSEMEM */ + /* * Set up the zone data structures: * - mark all pages reserved @@ -2664,6 +2907,7 @@ if (!size) continue; + setup_usemap(pgdat, zone, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); BUG_ON(ret); @@ -3363,6 +3607,21 @@ for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++) ; table = (void*) __get_free_pages(GFP_ATOMIC, order); + /* + * If bucketsize is not a power-of-two, we may free + * some pages at the end of hash table. + */ + if (table) { + unsigned long alloc_end = (unsigned long)table + + (PAGE_SIZE << order); + unsigned long used = (unsigned long)table + + PAGE_ALIGN(size); + split_page(virt_to_page(table), order); + while (used < alloc_end) { + free_page(used); + used += PAGE_SIZE; + } + } } } while (!table && size > PAGE_SIZE && --log2qty); @@ -3396,4 +3655,79 @@ EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ +/* Return a pointer to the bitmap storing bits affecting a block of pages */ +static inline unsigned long *get_pageblock_bitmap(struct zone *zone, + unsigned long pfn) +{ +#ifdef CONFIG_SPARSEMEM + return __pfn_to_section(pfn)->pageblock_flags; +#else + return zone->pageblock_flags; +#endif /* CONFIG_SPARSEMEM */ +} +static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) +{ +#ifdef CONFIG_SPARSEMEM + pfn &= (PAGES_PER_SECTION-1); + return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; +#else + pfn = pfn - zone->zone_start_pfn; + return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; +#endif /* CONFIG_SPARSEMEM */ +} + +/** + * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages + * @page: The page within the block of interest + * @start_bitidx: The first bit of interest to retrieve + * @end_bitidx: The last bit of interest + * returns pageblock_bits flags + */ +unsigned long get_pageblock_flags_group(struct page *page, + int start_bitidx, int end_bitidx) +{ + struct zone *zone; + unsigned long *bitmap; + unsigned long pfn, bitidx; + unsigned long flags = 0; + unsigned long value = 1; + + zone = page_zone(page); + pfn = page_to_pfn(page); + bitmap = get_pageblock_bitmap(zone, pfn); + bitidx = pfn_to_bitidx(zone, pfn); + + for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) + if (test_bit(bitidx + start_bitidx, bitmap)) + flags |= value; + + return flags; +} + +/** + * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages + * @page: The page within the block of interest + * @start_bitidx: The first bit of interest + * @end_bitidx: The last bit of interest + * @flags: The flags to set + */ +void set_pageblock_flags_group(struct page *page, unsigned long flags, + int start_bitidx, int end_bitidx) +{ + struct zone *zone; + unsigned long *bitmap; + unsigned long pfn, bitidx; + unsigned long value = 1; + + zone = page_zone(page); + pfn = page_to_pfn(page); + bitmap = get_pageblock_bitmap(zone, pfn); + bitidx = pfn_to_bitidx(zone, pfn); + + for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) + if (flags & value) + __set_bit(bitidx + start_bitidx, bitmap); + else + __clear_bit(bitidx + start_bitidx, bitmap); +} diff -Nurb linux-2.6.22-570/mm/pdflush.c linux-2.6.22-591/mm/pdflush.c --- linux-2.6.22-570/mm/pdflush.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/pdflush.c 2007-12-21 15:36:12.000000000 -0500 @@ -92,6 +92,7 @@ static int __pdflush(struct pdflush_work *my_work) { current->flags |= PF_FLUSHER | PF_SWAPWRITE; + set_freezable(); my_work->fn = NULL; my_work->who = current; INIT_LIST_HEAD(&my_work->list); diff -Nurb linux-2.6.22-570/mm/rmap.c linux-2.6.22-591/mm/rmap.c --- linux-2.6.22-570/mm/rmap.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/rmap.c 2007-12-21 15:36:12.000000000 -0500 @@ -622,8 +622,10 @@ printk (KERN_EMERG " page->count = %x\n", page_count(page)); printk (KERN_EMERG " page->mapping = %p\n", page->mapping); print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); - if (vma->vm_ops) + if (vma->vm_ops) { print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); + print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault); + } if (vma->vm_file && vma->vm_file->f_op) print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); BUG(); diff -Nurb linux-2.6.22-570/mm/shmem.c linux-2.6.22-591/mm/shmem.c --- linux-2.6.22-570/mm/shmem.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/shmem.c 2007-12-21 15:36:12.000000000 -0500 @@ -81,6 +81,7 @@ SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ SGP_WRITE, /* may exceed i_size, may allocate page */ + SGP_FAULT, /* same as SGP_CACHE, return with page locked */ }; static int shmem_getpage(struct inode *inode, unsigned long idx, @@ -92,8 +93,11 @@ * The above definition of ENTRIES_PER_PAGE, and the use of * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: * might be reconsidered if it ever diverges from PAGE_SIZE. + * + * Mobility flags are masked out as swap vectors cannot move */ - return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); + return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, + PAGE_CACHE_SHIFT-PAGE_SHIFT); } static inline void shmem_dir_free(struct page *page) @@ -371,7 +375,7 @@ } spin_unlock(&info->lock); - page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); + page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping)); if (page) set_page_private(page, 0); spin_lock(&info->lock); @@ -1110,6 +1114,10 @@ if (idx >= SHMEM_MAX_INDEX) return -EFBIG; + + if (type) + *type = VM_FAULT_MINOR; + /* * Normally, filepage is NULL on entry, and either found * uptodate immediately, or allocated and zeroed, or read @@ -1299,8 +1307,10 @@ } done: if (*pagep != filepage) { - unlock_page(filepage); *pagep = filepage; + if (sgp != SGP_FAULT) + unlock_page(filepage); + } return 0; @@ -1312,72 +1322,29 @@ return error; } -static struct page *shmem_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static struct page *shmem_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct page *page = NULL; - unsigned long idx; int error; - idx = (address - vma->vm_start) >> PAGE_SHIFT; - idx += vma->vm_pgoff; - idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return NOPAGE_SIGBUS; - - error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); - if (error) - return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; - - mark_page_accessed(page); - return page; -} - -static int shmem_populate(struct vm_area_struct *vma, - unsigned long addr, unsigned long len, - pgprot_t prot, unsigned long pgoff, int nonblock) -{ - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct mm_struct *mm = vma->vm_mm; - enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; - unsigned long size; - - size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) - return -EINVAL; + BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); - while ((long) len > 0) { - struct page *page = NULL; - int err; - /* - * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE - */ - err = shmem_getpage(inode, pgoff, &page, sgp, NULL); - if (err) - return err; - /* Page may still be null, but only if nonblock was set. */ - if (page) { - mark_page_accessed(page); - err = install_page(mm, vma, addr, page, prot); - if (err) { - page_cache_release(page); - return err; - } - } else if (vma->vm_flags & VM_NONLINEAR) { - /* No page was found just because we can't read it in - * now (being here implies nonblock != 0), but the page - * may exist, so set the PTE to fault it in later. */ - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; + if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; } - len -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; + error = shmem_getpage(inode, fdata->pgoff, &page, + SGP_FAULT, &fdata->type); + if (error) { + fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS); + return NULL; } - return 0; + + mark_page_accessed(page); + return page; } #ifdef CONFIG_NUMA @@ -1424,6 +1391,7 @@ { file_accessed(file); vma->vm_ops = &shmem_vm_ops; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; return 0; } @@ -2477,8 +2445,7 @@ }; static struct vm_operations_struct shmem_vm_ops = { - .nopage = shmem_nopage, - .populate = shmem_populate, + .fault = shmem_fault, #ifdef CONFIG_NUMA .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, @@ -2614,5 +2581,6 @@ fput(vma->vm_file); vma->vm_file = file; vma->vm_ops = &shmem_vm_ops; + vma->vm_flags |= VM_CAN_INVALIDATE; return 0; } diff -Nurb linux-2.6.22-570/mm/shmem.c.orig linux-2.6.22-591/mm/shmem.c.orig --- linux-2.6.22-570/mm/shmem.c.orig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/mm/shmem.c.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,2619 +0,0 @@ -/* - * Resizable virtual memory filesystem for Linux. - * - * Copyright (C) 2000 Linus Torvalds. - * 2000 Transmeta Corp. - * 2000-2001 Christoph Rohland - * 2000-2001 SAP AG - * 2002 Red Hat Inc. - * Copyright (C) 2002-2005 Hugh Dickins. - * Copyright (C) 2002-2005 VERITAS Software Corporation. - * Copyright (C) 2004 Andi Kleen, SuSE Labs - * - * Extended attribute support for tmpfs: - * Copyright (c) 2004, Luke Kenneth Casson Leighton - * Copyright (c) 2004 Red Hat, Inc., James Morris - * - * This file is released under the GPL. - */ - -/* - * This virtual memory filesystem is heavily based on the ramfs. It - * extends ramfs by the ability to use swap and honor resource limits - * which makes it a completely usable filesystem. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* This magic number is used in glibc for posix shared memory */ -#define TMPFS_MAGIC 0x01021994 - -#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) -#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) - -#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) -#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) - -#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) - -/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ -#define SHMEM_PAGEIN VM_READ -#define SHMEM_TRUNCATE VM_WRITE - -/* Definition to limit shmem_truncate's steps between cond_rescheds */ -#define LATENCY_LIMIT 64 - -/* Pretend that each entry is of this size in directory's i_size */ -#define BOGO_DIRENT_SIZE 20 - -/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ -enum sgp_type { - SGP_QUICK, /* don't try more than file page cache lookup */ - SGP_READ, /* don't exceed i_size, don't allocate page */ - SGP_CACHE, /* don't exceed i_size, may allocate page */ - SGP_WRITE, /* may exceed i_size, may allocate page */ -}; - -static int shmem_getpage(struct inode *inode, unsigned long idx, - struct page **pagep, enum sgp_type sgp, int *type); - -static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) -{ - /* - * The above definition of ENTRIES_PER_PAGE, and the use of - * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: - * might be reconsidered if it ever diverges from PAGE_SIZE. - */ - return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static inline void shmem_dir_free(struct page *page) -{ - __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static struct page **shmem_dir_map(struct page *page) -{ - return (struct page **)kmap_atomic(page, KM_USER0); -} - -static inline void shmem_dir_unmap(struct page **dir) -{ - kunmap_atomic(dir, KM_USER0); -} - -static swp_entry_t *shmem_swp_map(struct page *page) -{ - return (swp_entry_t *)kmap_atomic(page, KM_USER1); -} - -static inline void shmem_swp_balance_unmap(void) -{ - /* - * When passing a pointer to an i_direct entry, to code which - * also handles indirect entries and so will shmem_swp_unmap, - * we must arrange for the preempt count to remain in balance. - * What kmap_atomic of a lowmem page does depends on config - * and architecture, so pretend to kmap_atomic some lowmem page. - */ - (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); -} - -static inline void shmem_swp_unmap(swp_entry_t *entry) -{ - kunmap_atomic(entry, KM_USER1); -} - -static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -/* - * shmem_file_setup pre-accounts the whole fixed size of a VM object, - * for shared memory and for shared anonymous (/dev/zero) mappings - * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), - * consistent with the pre-accounting of private mappings ... - */ -static inline int shmem_acct_size(unsigned long flags, loff_t size) -{ - return (flags & VM_ACCOUNT)? - security_vm_enough_memory(VM_ACCT(size)): 0; -} - -static inline void shmem_unacct_size(unsigned long flags, loff_t size) -{ - if (flags & VM_ACCOUNT) - vm_unacct_memory(VM_ACCT(size)); -} - -/* - * ... whereas tmpfs objects are accounted incrementally as - * pages are allocated, in order to allow huge sparse files. - * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, - * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. - */ -static inline int shmem_acct_block(unsigned long flags) -{ - return (flags & VM_ACCOUNT)? - 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); -} - -static inline void shmem_unacct_blocks(unsigned long flags, long pages) -{ - if (!(flags & VM_ACCOUNT)) - vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); -} - -static const struct super_operations shmem_ops; -static const struct address_space_operations shmem_aops; -static const struct file_operations shmem_file_operations; -static const struct inode_operations shmem_inode_operations; -static const struct inode_operations shmem_dir_inode_operations; -static const struct inode_operations shmem_special_inode_operations; -static struct vm_operations_struct shmem_vm_ops; - -static struct backing_dev_info shmem_backing_dev_info __read_mostly = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, - .unplug_io_fn = default_unplug_io_fn, -}; - -static LIST_HEAD(shmem_swaplist); -static DEFINE_SPINLOCK(shmem_swaplist_lock); - -static void shmem_free_blocks(struct inode *inode, long pages) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks += pages; - inode->i_blocks -= pages*BLOCKS_PER_PAGE; - spin_unlock(&sbinfo->stat_lock); - } -} - -/* - * shmem_recalc_inode - recalculate the size of an inode - * - * @inode: inode to recalc - * - * We have to calculate the free blocks since the mm can drop - * undirtied hole pages behind our back. - * - * But normally info->alloced == inode->i_mapping->nrpages + info->swapped - * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) - * - * It has to be called with the spinlock held. - */ -static void shmem_recalc_inode(struct inode *inode) -{ - struct shmem_inode_info *info = SHMEM_I(inode); - long freed; - - freed = info->alloced - info->swapped - inode->i_mapping->nrpages; - if (freed > 0) { - info->alloced -= freed; - shmem_unacct_blocks(info->flags, freed); - shmem_free_blocks(inode, freed); - } -} - -/* - * shmem_swp_entry - find the swap vector position in the info structure - * - * @info: info structure for the inode - * @index: index of the page to find - * @page: optional page to add to the structure. Has to be preset to - * all zeros - * - * If there is no space allocated yet it will return NULL when - * page is NULL, else it will use the page for the needed block, - * setting it to NULL on return to indicate that it has been used. - * - * The swap vector is organized the following way: - * - * There are SHMEM_NR_DIRECT entries directly stored in the - * shmem_inode_info structure. So small files do not need an addional - * allocation. - * - * For pages with index > SHMEM_NR_DIRECT there is the pointer - * i_indirect which points to a page which holds in the first half - * doubly indirect blocks, in the second half triple indirect blocks: - * - * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the - * following layout (for SHMEM_NR_DIRECT == 16): - * - * i_indirect -> dir --> 16-19 - * | +-> 20-23 - * | - * +-->dir2 --> 24-27 - * | +-> 28-31 - * | +-> 32-35 - * | +-> 36-39 - * | - * +-->dir3 --> 40-43 - * +-> 44-47 - * +-> 48-51 - * +-> 52-55 - */ -static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) -{ - unsigned long offset; - struct page **dir; - struct page *subdir; - - if (index < SHMEM_NR_DIRECT) { - shmem_swp_balance_unmap(); - return info->i_direct+index; - } - if (!info->i_indirect) { - if (page) { - info->i_indirect = *page; - *page = NULL; - } - return NULL; /* need another page */ - } - - index -= SHMEM_NR_DIRECT; - offset = index % ENTRIES_PER_PAGE; - index /= ENTRIES_PER_PAGE; - dir = shmem_dir_map(info->i_indirect); - - if (index >= ENTRIES_PER_PAGE/2) { - index -= ENTRIES_PER_PAGE/2; - dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; - index %= ENTRIES_PER_PAGE; - subdir = *dir; - if (!subdir) { - if (page) { - *dir = *page; - *page = NULL; - } - shmem_dir_unmap(dir); - return NULL; /* need another page */ - } - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } - - dir += index; - subdir = *dir; - if (!subdir) { - if (!page || !(subdir = *page)) { - shmem_dir_unmap(dir); - return NULL; /* need a page */ - } - *dir = subdir; - *page = NULL; - } - shmem_dir_unmap(dir); - return shmem_swp_map(subdir) + offset; -} - -static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) -{ - long incdec = value? 1: -1; - - entry->val = value; - info->swapped += incdec; - if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { - struct page *page = kmap_atomic_to_page(entry); - set_page_private(page, page_private(page) + incdec); - } -} - -/* - * shmem_swp_alloc - get the position of the swap entry for the page. - * If it does not exist allocate the entry. - * - * @info: info structure for the inode - * @index: index of the page to find - * @sgp: check and recheck i_size? skip allocation? - */ -static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) -{ - struct inode *inode = &info->vfs_inode; - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - struct page *page = NULL; - swp_entry_t *entry; - - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return ERR_PTR(-EINVAL); - - while (!(entry = shmem_swp_entry(info, index, &page))) { - if (sgp == SGP_READ) - return shmem_swp_map(ZERO_PAGE(0)); - /* - * Test free_blocks against 1 not 0, since we have 1 data - * page (and perhaps indirect index pages) yet to allocate: - * a waste to allocate index if we cannot allocate data. - */ - if (sbinfo->max_blocks) { - spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks <= 1) { - spin_unlock(&sbinfo->stat_lock); - return ERR_PTR(-ENOSPC); - } - sbinfo->free_blocks--; - inode->i_blocks += BLOCKS_PER_PAGE; - spin_unlock(&sbinfo->stat_lock); - } - - spin_unlock(&info->lock); - page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); - if (page) - set_page_private(page, 0); - spin_lock(&info->lock); - - if (!page) { - shmem_free_blocks(inode, 1); - return ERR_PTR(-ENOMEM); - } - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { - entry = ERR_PTR(-EINVAL); - break; - } - if (info->next_index <= index) - info->next_index = index + 1; - } - if (page) { - /* another task gave its page, or truncated the file */ - shmem_free_blocks(inode, 1); - shmem_dir_free(page); - } - if (info->next_index <= index && !IS_ERR(entry)) - info->next_index = index + 1; - return entry; -} - -/* - * shmem_free_swp - free some swap entries in a directory - * - * @dir: pointer to the directory - * @edir: pointer after last entry of the directory - * @punch_lock: pointer to spinlock when needed for the holepunch case - */ -static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, - spinlock_t *punch_lock) -{ - spinlock_t *punch_unlock = NULL; - swp_entry_t *ptr; - int freed = 0; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val) { - if (unlikely(punch_lock)) { - punch_unlock = punch_lock; - punch_lock = NULL; - spin_lock(punch_unlock); - if (!ptr->val) - continue; - } - free_swap_and_cache(*ptr); - *ptr = (swp_entry_t){0}; - freed++; - } - } - if (punch_unlock) - spin_unlock(punch_unlock); - return freed; -} - -static int shmem_map_and_free_swp(struct page *subdir, int offset, - int limit, struct page ***dir, spinlock_t *punch_lock) -{ - swp_entry_t *ptr; - int freed = 0; - - ptr = shmem_swp_map(subdir); - for (; offset < limit; offset += LATENCY_LIMIT) { - int size = limit - offset; - if (size > LATENCY_LIMIT) - size = LATENCY_LIMIT; - freed += shmem_free_swp(ptr+offset, ptr+offset+size, - punch_lock); - if (need_resched()) { - shmem_swp_unmap(ptr); - if (*dir) { - shmem_dir_unmap(*dir); - *dir = NULL; - } - cond_resched(); - ptr = shmem_swp_map(subdir); - } - } - shmem_swp_unmap(ptr); - return freed; -} - -static void shmem_free_pages(struct list_head *next) -{ - struct page *page; - int freed = 0; - - do { - page = container_of(next, struct page, lru); - next = next->next; - shmem_dir_free(page); - freed++; - if (freed >= LATENCY_LIMIT) { - cond_resched(); - freed = 0; - } - } while (next); -} - -static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) -{ - struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - unsigned long diroff; - struct page **dir; - struct page *topdir; - struct page *middir; - struct page *subdir; - swp_entry_t *ptr; - LIST_HEAD(pages_to_free); - long nr_pages_to_free = 0; - long nr_swaps_freed = 0; - int offset; - int freed; - int punch_hole; - spinlock_t *needs_lock; - spinlock_t *punch_lock; - unsigned long upper_limit; - - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (idx >= info->next_index) - return; - - spin_lock(&info->lock); - info->flags |= SHMEM_TRUNCATE; - if (likely(end == (loff_t) -1)) { - limit = info->next_index; - upper_limit = SHMEM_MAX_INDEX; - info->next_index = idx; - needs_lock = NULL; - punch_hole = 0; - } else { - if (end + 1 >= inode->i_size) { /* we may free a little more */ - limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - upper_limit = SHMEM_MAX_INDEX; - } else { - limit = (end + 1) >> PAGE_CACHE_SHIFT; - upper_limit = limit; - } - needs_lock = &info->lock; - punch_hole = 1; - } - - topdir = info->i_indirect; - if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { - info->i_indirect = NULL; - nr_pages_to_free++; - list_add(&topdir->lru, &pages_to_free); - } - spin_unlock(&info->lock); - - if (info->swapped && idx < SHMEM_NR_DIRECT) { - ptr = info->i_direct; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); - } - - /* - * If there are no indirect blocks or we are punching a hole - * below indirect blocks, nothing to be done. - */ - if (!topdir || limit <= SHMEM_NR_DIRECT) - goto done2; - - /* - * The truncation case has already dropped info->lock, and we're safe - * because i_size and next_index have already been lowered, preventing - * access beyond. But in the punch_hole case, we still need to take - * the lock when updating the swap directory, because there might be - * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or - * shmem_writepage. However, whenever we find we can remove a whole - * directory page (not at the misaligned start or end of the range), - * we first NULLify its pointer in the level above, and then have no - * need to take the lock when updating its contents: needs_lock and - * punch_lock (either pointing to info->lock or NULL) manage this. - */ - - upper_limit -= SHMEM_NR_DIRECT; - limit -= SHMEM_NR_DIRECT; - idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; - offset = idx % ENTRIES_PER_PAGE; - idx -= offset; - - dir = shmem_dir_map(topdir); - stage = ENTRIES_PER_PAGEPAGE/2; - if (idx < ENTRIES_PER_PAGEPAGE/2) { - middir = topdir; - diroff = idx/ENTRIES_PER_PAGE; - } else { - dir += ENTRIES_PER_PAGE/2; - dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; - while (stage <= idx) - stage += ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (*dir) { - diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % - ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; - if (!diroff && !offset && upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); - } - shmem_dir_unmap(dir); - dir = shmem_dir_map(middir); - } else { - diroff = 0; - offset = 0; - idx = stage; - } - } - - for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir); - dir = shmem_dir_map(topdir) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto done1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (punch_hole) - needs_lock = &info->lock; - if (upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); - } - shmem_dir_unmap(dir); - cond_resched(); - dir = shmem_dir_map(middir); - diroff = 0; - } - punch_lock = needs_lock; - subdir = dir[diroff]; - if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { - if (needs_lock) { - spin_lock(needs_lock); - dir[diroff] = NULL; - spin_unlock(needs_lock); - punch_lock = NULL; - } else - dir[diroff] = NULL; - nr_pages_to_free++; - list_add(&subdir->lru, &pages_to_free); - } - if (subdir && page_private(subdir) /* has swap entries */) { - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - freed = shmem_map_and_free_swp(subdir, - offset, size, &dir, punch_lock); - if (!dir) - dir = shmem_dir_map(middir); - nr_swaps_freed += freed; - if (offset || punch_lock) { - spin_lock(&info->lock); - set_page_private(subdir, - page_private(subdir) - freed); - spin_unlock(&info->lock); - } else - BUG_ON(page_private(subdir) != freed); - } - offset = 0; - } -done1: - shmem_dir_unmap(dir); -done2: - if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { - /* - * Call truncate_inode_pages again: racing shmem_unuse_inode - * may have swizzled a page in from swap since vmtruncate or - * generic_delete_inode did it, before we lowered next_index. - * Also, though shmem_getpage checks i_size before adding to - * cache, no recheck after: so fix the narrow window there too. - * - * Recalling truncate_inode_pages_range and unmap_mapping_range - * every time for punch_hole (which never got a chance to clear - * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, - * yet hardly ever necessary: try to optimize them out later. - */ - truncate_inode_pages_range(inode->i_mapping, start, end); - if (punch_hole) - unmap_mapping_range(inode->i_mapping, start, - end - start, 1); - } - - spin_lock(&info->lock); - info->flags &= ~SHMEM_TRUNCATE; - info->swapped -= nr_swaps_freed; - if (nr_pages_to_free) - shmem_free_blocks(inode, nr_pages_to_free); - shmem_recalc_inode(inode); - spin_unlock(&info->lock); - - /* - * Empty swap vector directory pages to be freed? - */ - if (!list_empty(&pages_to_free)) { - pages_to_free.prev->next = NULL; - shmem_free_pages(pages_to_free.next); - } -} - -static void shmem_truncate(struct inode *inode) -{ - shmem_truncate_range(inode, inode->i_size, (loff_t)-1); -} - -static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - struct page *page = NULL; - int error; - - if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - if (attr->ia_size < inode->i_size) { - /* - * If truncating down to a partial page, then - * if that page is already allocated, hold it - * in memory until the truncation is over, so - * truncate_partial_page cannnot miss it were - * it assigned to swap. - */ - if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { - (void) shmem_getpage(inode, - attr->ia_size>>PAGE_CACHE_SHIFT, - &page, SGP_READ, NULL); - } - /* - * Reset SHMEM_PAGEIN flag so that shmem_truncate can - * detect if any pages might have been added to cache - * after truncate_inode_pages. But we needn't bother - * if it's being fully truncated to zero-length: the - * nrpages check is efficient enough in that case. - */ - if (attr->ia_size) { - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - info->flags &= ~SHMEM_PAGEIN; - spin_unlock(&info->lock); - } - } - } - - error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); -#ifdef CONFIG_TMPFS_POSIX_ACL - if (!error && (attr->ia_valid & ATTR_MODE)) - error = generic_acl_chmod(inode, &shmem_acl_ops); -#endif - if (page) - page_cache_release(page); - return error; -} - -static void shmem_delete_inode(struct inode *inode) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - struct shmem_inode_info *info = SHMEM_I(inode); - - if (inode->i_op->truncate == shmem_truncate) { - truncate_inode_pages(inode->i_mapping, 0); - shmem_unacct_size(info->flags, inode->i_size); - inode->i_size = 0; - shmem_truncate(inode); - if (!list_empty(&info->swaplist)) { - spin_lock(&shmem_swaplist_lock); - list_del_init(&info->swaplist); - spin_unlock(&shmem_swaplist_lock); - } - } - BUG_ON(inode->i_blocks); - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); - } - clear_inode(inode); -} - -static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) -{ - swp_entry_t *ptr; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val == entry.val) - return ptr - dir; - } - return -1; -} - -static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) -{ - struct inode *inode; - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - struct page **dir; - struct page *subdir; - swp_entry_t *ptr; - int offset; - - idx = 0; - ptr = info->i_direct; - spin_lock(&info->lock); - limit = info->next_index; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) { - shmem_swp_balance_unmap(); - goto found; - } - if (!info->i_indirect) - goto lost2; - - dir = shmem_dir_map(info->i_indirect); - stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; - - for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir-1); - dir = shmem_dir_map(info->i_indirect) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto lost1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - subdir = *dir; - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } - subdir = *dir; - if (subdir && page_private(subdir)) { - ptr = shmem_swp_map(subdir); - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) { - shmem_dir_unmap(dir); - goto found; - } - shmem_swp_unmap(ptr); - } - } -lost1: - shmem_dir_unmap(dir-1); -lost2: - spin_unlock(&info->lock); - return 0; -found: - idx += offset; - inode = &info->vfs_inode; - if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) { - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, ptr + offset, 0); - } - shmem_swp_unmap(ptr); - spin_unlock(&info->lock); - /* - * Decrement swap count even when the entry is left behind: - * try_to_unuse will skip over mms, then reincrement count. - */ - swap_free(entry); - return 1; -} - -/* - * shmem_unuse() search for an eventually swapped out shmem page. - */ -int shmem_unuse(swp_entry_t entry, struct page *page) -{ - struct list_head *p, *next; - struct shmem_inode_info *info; - int found = 0; - - spin_lock(&shmem_swaplist_lock); - list_for_each_safe(p, next, &shmem_swaplist) { - info = list_entry(p, struct shmem_inode_info, swaplist); - if (!info->swapped) - list_del_init(&info->swaplist); - else if (shmem_unuse_inode(info, entry, page)) { - /* move head to start search for next from here */ - list_move_tail(&shmem_swaplist, &info->swaplist); - found = 1; - break; - } - } - spin_unlock(&shmem_swaplist_lock); - return found; -} - -/* - * Move the page from the page cache to the swap cache. - */ -static int shmem_writepage(struct page *page, struct writeback_control *wbc) -{ - struct shmem_inode_info *info; - swp_entry_t *entry, swap; - struct address_space *mapping; - unsigned long index; - struct inode *inode; - - BUG_ON(!PageLocked(page)); - /* - * shmem_backing_dev_info's capabilities prevent regular writeback or - * sync from ever calling shmem_writepage; but a stacking filesystem - * may use the ->writepage of its underlying filesystem, in which case - * we want to do nothing when that underlying filesystem is tmpfs - * (writing out to swap is useful as a response to memory pressure, but - * of no use to stabilize the data) - just redirty the page, unlock it - * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the - * page_mapped check below, must be avoided unless we're in reclaim. - */ - if (!wbc->for_reclaim) { - set_page_dirty(page); - unlock_page(page); - return 0; - } - BUG_ON(page_mapped(page)); - - mapping = page->mapping; - index = page->index; - inode = mapping->host; - info = SHMEM_I(inode); - if (info->flags & VM_LOCKED) - goto redirty; - swap = get_swap_page(); - if (!swap.val) - goto redirty; - - spin_lock(&info->lock); - shmem_recalc_inode(inode); - if (index >= info->next_index) { - BUG_ON(!(info->flags & SHMEM_TRUNCATE)); - goto unlock; - } - entry = shmem_swp_entry(info, index, NULL); - BUG_ON(!entry); - BUG_ON(entry->val); - - if (move_to_swap_cache(page, swap) == 0) { - shmem_swp_set(info, entry, swap.val); - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - if (list_empty(&info->swaplist)) { - spin_lock(&shmem_swaplist_lock); - /* move instead of add in case we're racing */ - list_move_tail(&info->swaplist, &shmem_swaplist); - spin_unlock(&shmem_swaplist_lock); - } - unlock_page(page); - return 0; - } - - shmem_swp_unmap(entry); -unlock: - spin_unlock(&info->lock); - swap_free(swap); -redirty: - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ -} - -#ifdef CONFIG_NUMA -static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) -{ - char *nodelist = strchr(value, ':'); - int err = 1; - - if (nodelist) { - /* NUL-terminate policy string */ - *nodelist++ = '\0'; - if (nodelist_parse(nodelist, *policy_nodes)) - goto out; - if (!nodes_subset(*policy_nodes, node_online_map)) - goto out; - } - if (!strcmp(value, "default")) { - *policy = MPOL_DEFAULT; - /* Don't allow a nodelist */ - if (!nodelist) - err = 0; - } else if (!strcmp(value, "prefer")) { - *policy = MPOL_PREFERRED; - /* Insist on a nodelist of one node only */ - if (nodelist) { - char *rest = nodelist; - while (isdigit(*rest)) - rest++; - if (!*rest) - err = 0; - } - } else if (!strcmp(value, "bind")) { - *policy = MPOL_BIND; - /* Insist on a nodelist */ - if (nodelist) - err = 0; - } else if (!strcmp(value, "interleave")) { - *policy = MPOL_INTERLEAVE; - /* Default to nodes online if no nodelist */ - if (!nodelist) - *policy_nodes = node_online_map; - err = 0; - } -out: - /* Restore string for error message */ - if (nodelist) - *--nodelist = ':'; - return err; -} - -static struct page *shmem_swapin_async(struct shared_policy *p, - swp_entry_t entry, unsigned long idx) -{ - struct page *page; - struct vm_area_struct pvma; - - /* Create a pseudo vma that just contains the policy */ - memset(&pvma, 0, sizeof(struct vm_area_struct)); - pvma.vm_end = PAGE_SIZE; - pvma.vm_pgoff = idx; - pvma.vm_policy = mpol_shared_policy_lookup(p, idx); - page = read_swap_cache_async(entry, &pvma, 0); - mpol_free(pvma.vm_policy); - return page; -} - -struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry, - unsigned long idx) -{ - struct shared_policy *p = &info->policy; - int i, num; - struct page *page; - unsigned long offset; - - num = valid_swaphandles(entry, &offset); - for (i = 0; i < num; offset++, i++) { - page = shmem_swapin_async(p, - swp_entry(swp_type(entry), offset), idx); - if (!page) - break; - page_cache_release(page); - } - lru_add_drain(); /* Push any new pages onto the LRU now */ - return shmem_swapin_async(p, entry, idx); -} - -static struct page * -shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, - unsigned long idx) -{ - struct vm_area_struct pvma; - struct page *page; - - memset(&pvma, 0, sizeof(struct vm_area_struct)); - pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); - pvma.vm_pgoff = idx; - pvma.vm_end = PAGE_SIZE; - page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0); - mpol_free(pvma.vm_policy); - return page; -} -#else -static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) -{ - return 1; -} - -static inline struct page * -shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) -{ - swapin_readahead(entry, 0, NULL); - return read_swap_cache_async(entry, NULL, 0); -} - -static inline struct page * -shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) -{ - return alloc_page(gfp | __GFP_ZERO); -} -#endif - -/* - * shmem_getpage - either get the page from swap or allocate a new one - * - * If we allocate a new one we do not mark it dirty. That's up to the - * vm. If we swap it in we mark it dirty since we also free the swap - * entry since a page cannot live in both the swap and page cache - */ -static int shmem_getpage(struct inode *inode, unsigned long idx, - struct page **pagep, enum sgp_type sgp, int *type) -{ - struct address_space *mapping = inode->i_mapping; - struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo; - struct page *filepage = *pagep; - struct page *swappage; - swp_entry_t *entry; - swp_entry_t swap; - int error; - - if (idx >= SHMEM_MAX_INDEX) - return -EFBIG; - /* - * Normally, filepage is NULL on entry, and either found - * uptodate immediately, or allocated and zeroed, or read - * in under swappage, which is then assigned to filepage. - * But shmem_prepare_write passes in a locked filepage, - * which may be found not uptodate by other callers too, - * and may need to be copied from the swappage read in. - */ -repeat: - if (!filepage) - filepage = find_lock_page(mapping, idx); - if (filepage && PageUptodate(filepage)) - goto done; - error = 0; - if (sgp == SGP_QUICK) - goto failed; - - spin_lock(&info->lock); - shmem_recalc_inode(inode); - entry = shmem_swp_alloc(info, idx, sgp); - if (IS_ERR(entry)) { - spin_unlock(&info->lock); - error = PTR_ERR(entry); - goto failed; - } - swap = *entry; - - if (swap.val) { - /* Look it up and read it in.. */ - swappage = lookup_swap_cache(swap); - if (!swappage) { - shmem_swp_unmap(entry); - /* here we actually do the io */ - if (type && *type == VM_FAULT_MINOR) { - __count_vm_event(PGMAJFAULT); - *type = VM_FAULT_MAJOR; - } - spin_unlock(&info->lock); - swappage = shmem_swapin(info, swap, idx); - if (!swappage) { - spin_lock(&info->lock); - entry = shmem_swp_alloc(info, idx, sgp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - if (entry->val == swap.val) - error = -ENOMEM; - shmem_swp_unmap(entry); - } - spin_unlock(&info->lock); - if (error) - goto failed; - goto repeat; - } - wait_on_page_locked(swappage); - page_cache_release(swappage); - goto repeat; - } - - /* We have to do this with page locked to prevent races */ - if (TestSetPageLocked(swappage)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - wait_on_page_locked(swappage); - page_cache_release(swappage); - goto repeat; - } - if (PageWriteback(swappage)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - wait_on_page_writeback(swappage); - unlock_page(swappage); - page_cache_release(swappage); - goto repeat; - } - if (!PageUptodate(swappage)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - unlock_page(swappage); - page_cache_release(swappage); - error = -EIO; - goto failed; - } - - if (filepage) { - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); - delete_from_swap_cache(swappage); - spin_unlock(&info->lock); - copy_highpage(filepage, swappage); - unlock_page(swappage); - page_cache_release(swappage); - flush_dcache_page(filepage); - SetPageUptodate(filepage); - set_page_dirty(filepage); - swap_free(swap); - } else if (!(error = move_from_swap_cache( - swappage, idx, mapping))) { - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - filepage = swappage; - swap_free(swap); - } else { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - unlock_page(swappage); - page_cache_release(swappage); - if (error == -ENOMEM) { - /* let kswapd refresh zone for GFP_ATOMICs */ - congestion_wait(WRITE, HZ/50); - } - goto repeat; - } - } else if (sgp == SGP_READ && !filepage) { - shmem_swp_unmap(entry); - filepage = find_get_page(mapping, idx); - if (filepage && - (!PageUptodate(filepage) || TestSetPageLocked(filepage))) { - spin_unlock(&info->lock); - wait_on_page_locked(filepage); - page_cache_release(filepage); - filepage = NULL; - goto repeat; - } - spin_unlock(&info->lock); - } else { - shmem_swp_unmap(entry); - sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) { - spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks == 0 || - shmem_acct_block(info->flags)) { - spin_unlock(&sbinfo->stat_lock); - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } - sbinfo->free_blocks--; - inode->i_blocks += BLOCKS_PER_PAGE; - spin_unlock(&sbinfo->stat_lock); - } else if (shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } - - if (!filepage) { - spin_unlock(&info->lock); - filepage = shmem_alloc_page(mapping_gfp_mask(mapping), - info, - idx); - if (!filepage) { - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - error = -ENOMEM; - goto failed; - } - - spin_lock(&info->lock); - entry = shmem_swp_alloc(info, idx, sgp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - swap = *entry; - shmem_swp_unmap(entry); - } - if (error || swap.val || 0 != add_to_page_cache_lru( - filepage, mapping, idx, GFP_ATOMIC)) { - spin_unlock(&info->lock); - page_cache_release(filepage); - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - filepage = NULL; - if (error) - goto failed; - goto repeat; - } - info->flags |= SHMEM_PAGEIN; - } - - info->alloced++; - spin_unlock(&info->lock); - flush_dcache_page(filepage); - SetPageUptodate(filepage); - } -done: - if (*pagep != filepage) { - unlock_page(filepage); - *pagep = filepage; - } - return 0; - -failed: - if (*pagep != filepage) { - unlock_page(filepage); - page_cache_release(filepage); - } - return error; -} - -static struct page *shmem_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) -{ - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct page *page = NULL; - unsigned long idx; - int error; - - idx = (address - vma->vm_start) >> PAGE_SHIFT; - idx += vma->vm_pgoff; - idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return NOPAGE_SIGBUS; - - error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); - if (error) - return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; - - mark_page_accessed(page); - return page; -} - -static int shmem_populate(struct vm_area_struct *vma, - unsigned long addr, unsigned long len, - pgprot_t prot, unsigned long pgoff, int nonblock) -{ - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct mm_struct *mm = vma->vm_mm; - enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; - unsigned long size; - - size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) - return -EINVAL; - - while ((long) len > 0) { - struct page *page = NULL; - int err; - /* - * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE - */ - err = shmem_getpage(inode, pgoff, &page, sgp, NULL); - if (err) - return err; - /* Page may still be null, but only if nonblock was set. */ - if (page) { - mark_page_accessed(page); - err = install_page(mm, vma, addr, page, prot); - if (err) { - page_cache_release(page); - return err; - } - } else if (vma->vm_flags & VM_NONLINEAR) { - /* No page was found just because we can't read it in - * now (being here implies nonblock != 0), but the page - * may exist, so set the PTE to fault it in later. */ - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } - - len -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; - } - return 0; -} - -#ifdef CONFIG_NUMA -int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) -{ - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); -} - -struct mempolicy * -shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) -{ - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - unsigned long idx; - - idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); -} -#endif - -int shmem_lock(struct file *file, int lock, struct user_struct *user) -{ - struct inode *inode = file->f_path.dentry->d_inode; - struct shmem_inode_info *info = SHMEM_I(inode); - int retval = -ENOMEM; - - spin_lock(&info->lock); - if (lock && !(info->flags & VM_LOCKED)) { - if (!user_shm_lock(inode->i_size, user)) - goto out_nomem; - info->flags |= VM_LOCKED; - } - if (!lock && (info->flags & VM_LOCKED) && user) { - user_shm_unlock(inode->i_size, user); - info->flags &= ~VM_LOCKED; - } - retval = 0; -out_nomem: - spin_unlock(&info->lock); - return retval; -} - -static int shmem_mmap(struct file *file, struct vm_area_struct *vma) -{ - file_accessed(file); - vma->vm_ops = &shmem_vm_ops; - return 0; -} - -static struct inode * -shmem_get_inode(struct super_block *sb, int mode, dev_t dev) -{ - struct inode *inode; - struct shmem_inode_info *info; - struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); - return NULL; - } - sbinfo->free_inodes--; - spin_unlock(&sbinfo->stat_lock); - } - - inode = new_inode(sb); - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_mapping->a_ops = &shmem_aops; - inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_generation = get_seconds(); - info = SHMEM_I(inode); - memset(info, 0, (char *)inode - (char *)info); - spin_lock_init(&info->lock); - INIT_LIST_HEAD(&info->swaplist); - - switch (mode & S_IFMT) { - default: - inode->i_op = &shmem_special_inode_operations; - init_special_inode(inode, mode, dev); - break; - case S_IFREG: - inode->i_op = &shmem_inode_operations; - inode->i_fop = &shmem_file_operations; - mpol_shared_policy_init(&info->policy, sbinfo->policy, - &sbinfo->policy_nodes); - break; - case S_IFDIR: - inc_nlink(inode); - /* Some things misbehave if size == 0 on a directory */ - inode->i_size = 2 * BOGO_DIRENT_SIZE; - inode->i_op = &shmem_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - break; - case S_IFLNK: - /* - * Must not load anything in the rbtree, - * mpol_free_shared_policy will not be called. - */ - mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, - NULL); - break; - } - } else if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); - } - return inode; -} - -#ifdef CONFIG_TMPFS -static const struct inode_operations shmem_symlink_inode_operations; -static const struct inode_operations shmem_symlink_inline_operations; - -/* - * Normally tmpfs makes no use of shmem_prepare_write, but it - * lets a tmpfs file be used read-write below the loop driver. - */ -static int -shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) -{ - struct inode *inode = page->mapping->host; - return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL); -} - -static ssize_t -shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) -{ - struct inode *inode = file->f_path.dentry->d_inode; - loff_t pos; - unsigned long written; - ssize_t err; - - if ((ssize_t) count < 0) - return -EINVAL; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - - mutex_lock(&inode->i_mutex); - - pos = *ppos; - written = 0; - - err = generic_write_checks(file, &pos, &count, 0); - if (err || !count) - goto out; - - err = remove_suid(file->f_path.dentry); - if (err) - goto out; - - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - - do { - struct page *page = NULL; - unsigned long bytes, index, offset; - char *kaddr; - int left; - - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; - bytes = PAGE_CACHE_SIZE - offset; - if (bytes > count) - bytes = count; - - /* - * We don't hold page lock across copy from user - - * what would it guard against? - so no deadlock here. - * But it still may be a good idea to prefault below. - */ - - err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL); - if (err) - break; - - left = bytes; - if (PageHighMem(page)) { - volatile unsigned char dummy; - __get_user(dummy, buf); - __get_user(dummy, buf + bytes - 1); - - kaddr = kmap_atomic(page, KM_USER0); - left = __copy_from_user_inatomic(kaddr + offset, - buf, bytes); - kunmap_atomic(kaddr, KM_USER0); - } - if (left) { - kaddr = kmap(page); - left = __copy_from_user(kaddr + offset, buf, bytes); - kunmap(page); - } - - written += bytes; - count -= bytes; - pos += bytes; - buf += bytes; - if (pos > inode->i_size) - i_size_write(inode, pos); - - flush_dcache_page(page); - set_page_dirty(page); - mark_page_accessed(page); - page_cache_release(page); - - if (left) { - pos -= left; - written -= left; - err = -EFAULT; - break; - } - - /* - * Our dirty pages are not counted in nr_dirty, - * and we do not attempt to balance dirty pages. - */ - - cond_resched(); - } while (count); - - *ppos = pos; - if (written) - err = written; -out: - mutex_unlock(&inode->i_mutex); - return err; -} - -static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct address_space *mapping = inode->i_mapping; - unsigned long index, offset; - - index = *ppos >> PAGE_CACHE_SHIFT; - offset = *ppos & ~PAGE_CACHE_MASK; - - for (;;) { - struct page *page = NULL; - unsigned long end_index, nr, ret; - loff_t i_size = i_size_read(inode); - - end_index = i_size >> PAGE_CACHE_SHIFT; - if (index > end_index) - break; - if (index == end_index) { - nr = i_size & ~PAGE_CACHE_MASK; - if (nr <= offset) - break; - } - - desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL); - if (desc->error) { - if (desc->error == -EINVAL) - desc->error = 0; - break; - } - - /* - * We must evaluate after, since reads (unlike writes) - * are called without i_mutex protection against truncate - */ - nr = PAGE_CACHE_SIZE; - i_size = i_size_read(inode); - end_index = i_size >> PAGE_CACHE_SHIFT; - if (index == end_index) { - nr = i_size & ~PAGE_CACHE_MASK; - if (nr <= offset) { - if (page) - page_cache_release(page); - break; - } - } - nr -= offset; - - if (page) { - /* - * If users can be writing to this page using arbitrary - * virtual addresses, take care about potential aliasing - * before reading the page on the kernel side. - */ - if (mapping_writably_mapped(mapping)) - flush_dcache_page(page); - /* - * Mark the page accessed if we read the beginning. - */ - if (!offset) - mark_page_accessed(page); - } else { - page = ZERO_PAGE(0); - page_cache_get(page); - } - - /* - * Ok, we have the page, and it's up-to-date, so - * now we can copy it to user space... - * - * The actor routine returns how many bytes were actually used.. - * NOTE! This may not be the same as how much of a user buffer - * we filled up (we may be padding etc), so we can only update - * "pos" here (the actor routine has to update the user buffer - * pointers and the remaining count). - */ - ret = actor(desc, page, offset, nr); - offset += ret; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; - - page_cache_release(page); - if (ret != nr || !desc->count) - break; - - cond_resched(); - } - - *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; - file_accessed(filp); -} - -static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ - read_descriptor_t desc; - - if ((ssize_t) count < 0) - return -EINVAL; - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - if (!count) - return 0; - - desc.written = 0; - desc.count = count; - desc.arg.buf = buf; - desc.error = 0; - - do_shmem_file_read(filp, ppos, &desc, file_read_actor); - if (desc.written) - return desc.written; - return desc.error; -} - -static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos, - size_t count, read_actor_t actor, void *target) -{ - read_descriptor_t desc; - - if (!count) - return 0; - - desc.written = 0; - desc.count = count; - desc.arg.data = target; - desc.error = 0; - - do_shmem_file_read(in_file, ppos, &desc, actor); - if (desc.written) - return desc.written; - return desc.error; -} - -static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); - - buf->f_type = TMPFS_MAGIC; - buf->f_bsize = PAGE_CACHE_SIZE; - buf->f_namelen = NAME_MAX; - spin_lock(&sbinfo->stat_lock); - if (sbinfo->max_blocks) { - buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; - } - if (sbinfo->max_inodes) { - buf->f_files = sbinfo->max_inodes; - buf->f_ffree = sbinfo->free_inodes; - } - /* else leave those fields 0 like simple_statfs */ - spin_unlock(&sbinfo->stat_lock); - return 0; -} - -/* - * File creation. Allocate an inode, and we're done.. - */ -static int -shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) -{ - struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); - int error = -ENOSPC; - - if (inode) { - error = security_inode_init_security(inode, dir, NULL, NULL, - NULL); - if (error) { - if (error != -EOPNOTSUPP) { - iput(inode); - return error; - } - } - error = shmem_acl_init(inode, dir); - if (error) { - iput(inode); - return error; - } - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - inode->i_mode |= S_ISGID; - } - dir->i_size += BOGO_DIRENT_SIZE; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ - } - return error; -} - -static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - int error; - - if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) - return error; - inc_nlink(dir); - return 0; -} - -static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - return shmem_mknod(dir, dentry, mode | S_IFREG, 0); -} - -/* - * Link a file.. - */ -static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = old_dentry->d_inode; - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - - /* - * No ordinary (disk based) filesystem counts links as inodes; - * but each new link needs a new dentry, pinning lowmem, and - * tmpfs dentries cannot be pruned until they are unlinked. - */ - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); - return -ENOSPC; - } - sbinfo->free_inodes--; - spin_unlock(&sbinfo->stat_lock); - } - - dir->i_size += BOGO_DIRENT_SIZE; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - inc_nlink(inode); - atomic_inc(&inode->i_count); /* New dentry reference */ - dget(dentry); /* Extra pinning count for the created dentry */ - d_instantiate(dentry, inode); - return 0; -} - -static int shmem_unlink(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - - if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) { - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_inodes) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock(&sbinfo->stat_lock); - } - } - - dir->i_size -= BOGO_DIRENT_SIZE; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - drop_nlink(inode); - dput(dentry); /* Undo the count from "create" - this does all the work */ - return 0; -} - -static int shmem_rmdir(struct inode *dir, struct dentry *dentry) -{ - if (!simple_empty(dentry)) - return -ENOTEMPTY; - - drop_nlink(dentry->d_inode); - drop_nlink(dir); - return shmem_unlink(dir, dentry); -} - -/* - * The VFS layer already does all the dentry stuff for rename, - * we just have to decrement the usage count for the target if - * it exists so that the VFS layer correctly free's it when it - * gets overwritten. - */ -static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) -{ - struct inode *inode = old_dentry->d_inode; - int they_are_dirs = S_ISDIR(inode->i_mode); - - if (!simple_empty(new_dentry)) - return -ENOTEMPTY; - - if (new_dentry->d_inode) { - (void) shmem_unlink(new_dir, new_dentry); - if (they_are_dirs) - drop_nlink(old_dir); - } else if (they_are_dirs) { - drop_nlink(old_dir); - inc_nlink(new_dir); - } - - old_dir->i_size -= BOGO_DIRENT_SIZE; - new_dir->i_size += BOGO_DIRENT_SIZE; - old_dir->i_ctime = old_dir->i_mtime = - new_dir->i_ctime = new_dir->i_mtime = - inode->i_ctime = CURRENT_TIME; - return 0; -} - -static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) -{ - int error; - int len; - struct inode *inode; - struct page *page = NULL; - char *kaddr; - struct shmem_inode_info *info; - - len = strlen(symname) + 1; - if (len > PAGE_CACHE_SIZE) - return -ENAMETOOLONG; - - inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); - if (!inode) - return -ENOSPC; - - error = security_inode_init_security(inode, dir, NULL, NULL, - NULL); - if (error) { - if (error != -EOPNOTSUPP) { - iput(inode); - return error; - } - error = 0; - } - - info = SHMEM_I(inode); - inode->i_size = len-1; - if (len <= (char *)inode - (char *)info) { - /* do it inline */ - memcpy(info, symname, len); - inode->i_op = &shmem_symlink_inline_operations; - } else { - error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); - if (error) { - iput(inode); - return error; - } - inode->i_op = &shmem_symlink_inode_operations; - kaddr = kmap_atomic(page, KM_USER0); - memcpy(kaddr, symname, len); - kunmap_atomic(kaddr, KM_USER0); - set_page_dirty(page); - page_cache_release(page); - } - if (dir->i_mode & S_ISGID) - inode->i_gid = dir->i_gid; - dir->i_size += BOGO_DIRENT_SIZE; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - d_instantiate(dentry, inode); - dget(dentry); - return 0; -} - -static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) -{ - nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); - return NULL; -} - -static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct page *page = NULL; - int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); - nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); - return page; -} - -static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) -{ - if (!IS_ERR(nd_get_link(nd))) { - struct page *page = cookie; - kunmap(page); - mark_page_accessed(page); - page_cache_release(page); - } -} - -static const struct inode_operations shmem_symlink_inline_operations = { - .readlink = generic_readlink, - .follow_link = shmem_follow_link_inline, -}; - -static const struct inode_operations shmem_symlink_inode_operations = { - .truncate = shmem_truncate, - .readlink = generic_readlink, - .follow_link = shmem_follow_link, - .put_link = shmem_put_link, -}; - -#ifdef CONFIG_TMPFS_POSIX_ACL -/** - * Superblocks without xattr inode operations will get security.* xattr - * support from the VFS "for free". As soon as we have any other xattrs - * like ACLs, we also need to implement the security.* handlers at - * filesystem level, though. - */ - -static size_t shmem_xattr_security_list(struct inode *inode, char *list, - size_t list_len, const char *name, - size_t name_len) -{ - return security_inode_listsecurity(inode, list, list_len); -} - -static int shmem_xattr_security_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_getsecurity(inode, name, buffer, size, - -EOPNOTSUPP); -} - -static int shmem_xattr_security_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return security_inode_setsecurity(inode, name, value, size, flags); -} - -static struct xattr_handler shmem_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = shmem_xattr_security_list, - .get = shmem_xattr_security_get, - .set = shmem_xattr_security_set, -}; - -static struct xattr_handler *shmem_xattr_handlers[] = { - &shmem_xattr_acl_access_handler, - &shmem_xattr_acl_default_handler, - &shmem_xattr_security_handler, - NULL -}; -#endif - -static struct dentry *shmem_get_parent(struct dentry *child) -{ - return ERR_PTR(-ESTALE); -} - -static int shmem_match(struct inode *ino, void *vfh) -{ - __u32 *fh = vfh; - __u64 inum = fh[2]; - inum = (inum << 32) | fh[1]; - return ino->i_ino == inum && fh[0] == ino->i_generation; -} - -static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) -{ - struct dentry *de = NULL; - struct inode *inode; - __u32 *fh = vfh; - __u64 inum = fh[2]; - inum = (inum << 32) | fh[1]; - - inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); - if (inode) { - de = d_find_alias(inode); - iput(inode); - } - - return de? de: ERR_PTR(-ESTALE); -} - -static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, - int len, int type, - int (*acceptable)(void *context, struct dentry *de), - void *context) -{ - if (len < 3) - return ERR_PTR(-ESTALE); - - return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, - context); -} - -static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, - int connectable) -{ - struct inode *inode = dentry->d_inode; - - if (*len < 3) - return 255; - - if (hlist_unhashed(&inode->i_hash)) { - /* Unfortunately insert_inode_hash is not idempotent, - * so as we hash inodes here rather than at creation - * time, we need a lock to ensure we only try - * to do it once - */ - static DEFINE_SPINLOCK(lock); - spin_lock(&lock); - if (hlist_unhashed(&inode->i_hash)) - __insert_inode_hash(inode, - inode->i_ino + inode->i_generation); - spin_unlock(&lock); - } - - fh[0] = inode->i_generation; - fh[1] = inode->i_ino; - fh[2] = ((__u64)inode->i_ino) >> 32; - - *len = 3; - return 1; -} - -static struct export_operations shmem_export_ops = { - .get_parent = shmem_get_parent, - .get_dentry = shmem_get_dentry, - .encode_fh = shmem_encode_fh, - .decode_fh = shmem_decode_fh, -}; - -static int shmem_parse_options(char *options, int *mode, uid_t *uid, - gid_t *gid, unsigned long *blocks, unsigned long *inodes, - int *policy, nodemask_t *policy_nodes) -{ - char *this_char, *value, *rest; - - while (options != NULL) { - this_char = options; - for (;;) { - /* - * NUL-terminate this option: unfortunately, - * mount options form a comma-separated list, - * but mpol's nodelist may also contain commas. - */ - options = strchr(options, ','); - if (options == NULL) - break; - options++; - if (!isdigit(*options)) { - options[-1] = '\0'; - break; - } - } - if (!*this_char) - continue; - if ((value = strchr(this_char,'=')) != NULL) { - *value++ = 0; - } else { - printk(KERN_ERR - "tmpfs: No value for mount option '%s'\n", - this_char); - return 1; - } - - if (!strcmp(this_char,"size")) { - unsigned long long size; - size = memparse(value,&rest); - if (*rest == '%') { - size <<= PAGE_SHIFT; - size *= totalram_pages; - do_div(size, 100); - rest++; - } - if (*rest) - goto bad_val; - *blocks = size >> PAGE_CACHE_SHIFT; - } else if (!strcmp(this_char,"nr_blocks")) { - *blocks = memparse(value,&rest); - if (*rest) - goto bad_val; - } else if (!strcmp(this_char,"nr_inodes")) { - *inodes = memparse(value,&rest); - if (*rest) - goto bad_val; - } else if (!strcmp(this_char,"mode")) { - if (!mode) - continue; - *mode = simple_strtoul(value,&rest,8); - if (*rest) - goto bad_val; - } else if (!strcmp(this_char,"uid")) { - if (!uid) - continue; - *uid = simple_strtoul(value,&rest,0); - if (*rest) - goto bad_val; - } else if (!strcmp(this_char,"gid")) { - if (!gid) - continue; - *gid = simple_strtoul(value,&rest,0); - if (*rest) - goto bad_val; - } else if (!strcmp(this_char,"mpol")) { - if (shmem_parse_mpol(value,policy,policy_nodes)) - goto bad_val; - } else { - printk(KERN_ERR "tmpfs: Bad mount option %s\n", - this_char); - return 1; - } - } - return 0; - -bad_val: - printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", - value, this_char); - return 1; - -} - -static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - unsigned long max_blocks = sbinfo->max_blocks; - unsigned long max_inodes = sbinfo->max_inodes; - int policy = sbinfo->policy; - nodemask_t policy_nodes = sbinfo->policy_nodes; - unsigned long blocks; - unsigned long inodes; - int error = -EINVAL; - - if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, - &max_inodes, &policy, &policy_nodes)) - return error; - - spin_lock(&sbinfo->stat_lock); - blocks = sbinfo->max_blocks - sbinfo->free_blocks; - inodes = sbinfo->max_inodes - sbinfo->free_inodes; - if (max_blocks < blocks) - goto out; - if (max_inodes < inodes) - goto out; - /* - * Those tests also disallow limited->unlimited while any are in - * use, so i_blocks will always be zero when max_blocks is zero; - * but we must separately disallow unlimited->limited, because - * in that case we have no record of how much is already in use. - */ - if (max_blocks && !sbinfo->max_blocks) - goto out; - if (max_inodes && !sbinfo->max_inodes) - goto out; - - error = 0; - sbinfo->max_blocks = max_blocks; - sbinfo->free_blocks = max_blocks - blocks; - sbinfo->max_inodes = max_inodes; - sbinfo->free_inodes = max_inodes - inodes; - sbinfo->policy = policy; - sbinfo->policy_nodes = policy_nodes; -out: - spin_unlock(&sbinfo->stat_lock); - return error; -} -#endif - -static void shmem_put_super(struct super_block *sb) -{ - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; -} - -static int shmem_fill_super(struct super_block *sb, - void *data, int silent) -{ - struct inode *inode; - struct dentry *root; - int mode = S_IRWXUGO | S_ISVTX; - uid_t uid = current->fsuid; - gid_t gid = current->fsgid; - int err = -ENOMEM; - struct shmem_sb_info *sbinfo; - unsigned long blocks = 0; - unsigned long inodes = 0; - int policy = MPOL_DEFAULT; - nodemask_t policy_nodes = node_online_map; - -#ifdef CONFIG_TMPFS - /* - * Per default we only allow half of the physical ram per - * tmpfs instance, limiting inodes to one per page of lowmem; - * but the internal instance is left unlimited. - */ - if (!(sb->s_flags & MS_NOUSER)) { - blocks = totalram_pages / 2; - inodes = totalram_pages - totalhigh_pages; - if (inodes > blocks) - inodes = blocks; - if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, - &inodes, &policy, &policy_nodes)) - return -EINVAL; - } - sb->s_export_op = &shmem_export_ops; -#else - sb->s_flags |= MS_NOUSER; -#endif - - /* Round up to L1_CACHE_BYTES to resist false sharing */ - sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info), - L1_CACHE_BYTES), GFP_KERNEL); - if (!sbinfo) - return -ENOMEM; - - spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_blocks = blocks; - sbinfo->free_blocks = blocks; - sbinfo->max_inodes = inodes; - sbinfo->free_inodes = inodes; - sbinfo->policy = policy; - sbinfo->policy_nodes = policy_nodes; - - sb->s_fs_info = sbinfo; - sb->s_maxbytes = SHMEM_MAX_BYTES; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; - sb->s_op = &shmem_ops; - sb->s_time_gran = 1; -#ifdef CONFIG_TMPFS_POSIX_ACL - sb->s_xattr = shmem_xattr_handlers; - sb->s_flags |= MS_POSIXACL; -#endif - - inode = shmem_get_inode(sb, S_IFDIR | mode, 0); - if (!inode) - goto failed; - inode->i_uid = uid; - inode->i_gid = gid; - root = d_alloc_root(inode); - if (!root) - goto failed_iput; - sb->s_root = root; - return 0; - -failed_iput: - iput(inode); -failed: - shmem_put_super(sb); - return err; -} - -static struct kmem_cache *shmem_inode_cachep; - -static struct inode *shmem_alloc_inode(struct super_block *sb) -{ - struct shmem_inode_info *p; - p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); - if (!p) - return NULL; - return &p->vfs_inode; -} - -static void shmem_destroy_inode(struct inode *inode) -{ - if ((inode->i_mode & S_IFMT) == S_IFREG) { - /* only struct inode is valid if it's an inline symlink */ - mpol_free_shared_policy(&SHMEM_I(inode)->policy); - } - shmem_acl_destroy_inode(inode); - kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); -} - -static void init_once(void *foo, struct kmem_cache *cachep, - unsigned long flags) -{ - struct shmem_inode_info *p = (struct shmem_inode_info *) foo; - - inode_init_once(&p->vfs_inode); -#ifdef CONFIG_TMPFS_POSIX_ACL - p->i_acl = NULL; - p->i_default_acl = NULL; -#endif -} - -static int init_inodecache(void) -{ - shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", - sizeof(struct shmem_inode_info), - 0, 0, init_once, NULL); - if (shmem_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - kmem_cache_destroy(shmem_inode_cachep); -} - -static const struct address_space_operations shmem_aops = { - .writepage = shmem_writepage, - .set_page_dirty = __set_page_dirty_no_writeback, -#ifdef CONFIG_TMPFS - .prepare_write = shmem_prepare_write, - .commit_write = simple_commit_write, -#endif - .migratepage = migrate_page, -}; - -static const struct file_operations shmem_file_operations = { - .mmap = shmem_mmap, -#ifdef CONFIG_TMPFS - .llseek = generic_file_llseek, - .read = shmem_file_read, - .write = shmem_file_write, - .fsync = simple_sync_file, - .sendfile = shmem_file_sendfile, -#endif -}; - -static const struct inode_operations shmem_inode_operations = { - .truncate = shmem_truncate, - .setattr = shmem_notify_change, - .truncate_range = shmem_truncate_range, -#ifdef CONFIG_TMPFS_POSIX_ACL - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, - .permission = shmem_permission, -#endif - -}; - -static const struct inode_operations shmem_dir_inode_operations = { -#ifdef CONFIG_TMPFS - .create = shmem_create, - .lookup = simple_lookup, - .link = shmem_link, - .unlink = shmem_unlink, - .symlink = shmem_symlink, - .mkdir = shmem_mkdir, - .rmdir = shmem_rmdir, - .mknod = shmem_mknod, - .rename = shmem_rename, -#endif -#ifdef CONFIG_TMPFS_POSIX_ACL - .setattr = shmem_notify_change, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, - .permission = shmem_permission, -#endif -}; - -static const struct inode_operations shmem_special_inode_operations = { -#ifdef CONFIG_TMPFS_POSIX_ACL - .setattr = shmem_notify_change, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, - .permission = shmem_permission, -#endif -}; - -static const struct super_operations shmem_ops = { - .alloc_inode = shmem_alloc_inode, - .destroy_inode = shmem_destroy_inode, -#ifdef CONFIG_TMPFS - .statfs = shmem_statfs, - .remount_fs = shmem_remount_fs, -#endif - .delete_inode = shmem_delete_inode, - .drop_inode = generic_delete_inode, - .put_super = shmem_put_super, -}; - -static struct vm_operations_struct shmem_vm_ops = { - .nopage = shmem_nopage, - .populate = shmem_populate, -#ifdef CONFIG_NUMA - .set_policy = shmem_set_policy, - .get_policy = shmem_get_policy, -#endif -}; - - -static int shmem_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) -{ - return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt); -} - -static struct file_system_type tmpfs_fs_type = { - .owner = THIS_MODULE, - .name = "tmpfs", - .get_sb = shmem_get_sb, - .kill_sb = kill_litter_super, -}; -static struct vfsmount *shm_mnt; - -static int __init init_tmpfs(void) -{ - int error; - - error = init_inodecache(); - if (error) - goto out3; - - error = register_filesystem(&tmpfs_fs_type); - if (error) { - printk(KERN_ERR "Could not register tmpfs\n"); - goto out2; - } - - shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, - tmpfs_fs_type.name, NULL); - if (IS_ERR(shm_mnt)) { - error = PTR_ERR(shm_mnt); - printk(KERN_ERR "Could not kern_mount tmpfs\n"); - goto out1; - } - return 0; - -out1: - unregister_filesystem(&tmpfs_fs_type); -out2: - destroy_inodecache(); -out3: - shm_mnt = ERR_PTR(error); - return error; -} -module_init(init_tmpfs) - -/* - * shmem_file_setup - get an unlinked file living in tmpfs - * - * @name: name for dentry (to be seen in /proc//maps - * @size: size to be set for the file - * - */ -struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) -{ - int error; - struct file *file; - struct inode *inode; - struct dentry *dentry, *root; - struct qstr this; - - if (IS_ERR(shm_mnt)) - return (void *)shm_mnt; - - if (size < 0 || size > SHMEM_MAX_BYTES) - return ERR_PTR(-EINVAL); - - if (shmem_acct_size(flags, size)) - return ERR_PTR(-ENOMEM); - - error = -ENOMEM; - this.name = name; - this.len = strlen(name); - this.hash = 0; /* will go */ - root = shm_mnt->mnt_root; - dentry = d_alloc(root, &this); - if (!dentry) - goto put_memory; - - error = -ENFILE; - file = get_empty_filp(); - if (!file) - goto put_dentry; - - error = -ENOSPC; - inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); - if (!inode) - goto close_file; - - SHMEM_I(inode)->flags = flags & VM_ACCOUNT; - d_instantiate(dentry, inode); - inode->i_size = size; - inode->i_nlink = 0; /* It is unlinked */ - file->f_path.mnt = mntget(shm_mnt); - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = &shmem_file_operations; - file->f_mode = FMODE_WRITE | FMODE_READ; - return file; - -close_file: - put_filp(file); -put_dentry: - dput(dentry); -put_memory: - shmem_unacct_size(flags, size); - return ERR_PTR(error); -} - -/* - * shmem_zero_setup - setup a shared anonymous mapping - * - * @vma: the vma to be mmapped is prepared by do_mmap_pgoff - */ -int shmem_zero_setup(struct vm_area_struct *vma) -{ - struct file *file; - loff_t size = vma->vm_end - vma->vm_start; - - file = shmem_file_setup("dev/zero", size, vma->vm_flags); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (vma->vm_file) - fput(vma->vm_file); - vma->vm_file = file; - vma->vm_ops = &shmem_vm_ops; - return 0; -} diff -Nurb linux-2.6.22-570/mm/slab.c linux-2.6.22-591/mm/slab.c --- linux-2.6.22-570/mm/slab.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/mm/slab.c 2007-12-21 15:36:12.000000000 -0500 @@ -1639,6 +1639,8 @@ #endif flags |= cachep->gfpflags; + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + flags |= __GFP_RECLAIMABLE; page = alloc_pages_node(nodeid, flags, cachep->gfporder); if (!page) diff -Nurb linux-2.6.22-570/mm/slub.c linux-2.6.22-591/mm/slub.c --- linux-2.6.22-570/mm/slub.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/mm/slub.c 2007-12-21 15:36:12.000000000 -0500 @@ -985,6 +985,9 @@ if (s->flags & SLAB_CACHE_DMA) flags |= SLUB_DMA; + if (s->flags & SLAB_RECLAIM_ACCOUNT) + flags |= __GFP_RECLAIMABLE; + if (node == -1) page = alloc_pages(flags, s->order); else @@ -1989,6 +1992,7 @@ #ifdef CONFIG_NUMA s->defrag_ratio = 100; #endif + raise_kswapd_order(s->order); if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) return 1; diff -Nurb linux-2.6.22-570/mm/swap_state.c linux-2.6.22-591/mm/swap_state.c --- linux-2.6.22-570/mm/swap_state.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/swap_state.c 2007-12-21 15:36:12.000000000 -0500 @@ -334,7 +334,8 @@ * Get a new page to read into from swap. */ if (!new_page) { - new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, + vma, addr); if (!new_page) break; /* Out of memory */ } diff -Nurb linux-2.6.22-570/mm/truncate.c linux-2.6.22-591/mm/truncate.c --- linux-2.6.22-570/mm/truncate.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/truncate.c 2007-12-21 15:36:12.000000000 -0500 @@ -82,7 +82,7 @@ /* * If truncate cannot remove the fs-private metadata from the page, the page * becomes anonymous. It will be left on the LRU and may even be mapped into - * user pagetables if we're racing with filemap_nopage(). + * user pagetables if we're racing with filemap_fault(). * * We need to bale out if page->mapping is no longer equal to the original * mapping. This happens a) when the VM reclaimed the page while we waited on @@ -192,6 +192,11 @@ unlock_page(page); continue; } + if (page_mapped(page)) { + unmap_mapping_range(mapping, + (loff_t)page_index<index<index > next) next = page->index; next++; @@ -397,7 +407,7 @@ break; } wait_on_page_writeback(page); - while (page_mapped(page)) { + if (page_mapped(page)) { if (!did_range_unmap) { /* * Zap the rest of the file in one hit. @@ -417,6 +427,7 @@ PAGE_CACHE_SIZE, 0); } } + BUG_ON(page_mapped(page)); ret = do_launder_page(mapping, page); if (ret == 0 && !invalidate_complete_page2(mapping, page)) ret = -EIO; diff -Nurb linux-2.6.22-570/mm/util.c linux-2.6.22-591/mm/util.c --- linux-2.6.22-570/mm/util.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/util.c 2007-12-21 15:36:12.000000000 -0500 @@ -18,9 +18,8 @@ } EXPORT_SYMBOL(__kzalloc); -/* +/** * kstrdup - allocate space for and copy an existing string - * * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory */ @@ -41,6 +40,32 @@ EXPORT_SYMBOL(kstrdup); /** + * kstrndup - allocate space for and copy an existing string + * @s: the string to duplicate + * @max: read at most @max chars from @s + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kstrndup(const char *s, size_t max, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s); + if (len > max) + len = max; + buf = kmalloc_track_caller(len+1, gfp); + if (buf) { + memcpy(buf, s, len); + buf[len] = '\0'; + } + return buf; +} +EXPORT_SYMBOL(kstrndup); + +/** * kmemdup - duplicate region of memory * * @src: memory region to duplicate @@ -60,7 +85,6 @@ /* * strndup_user - duplicate an existing string from user space - * * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. */ diff -Nurb linux-2.6.22-570/mm/vmalloc.c linux-2.6.22-591/mm/vmalloc.c --- linux-2.6.22-570/mm/vmalloc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/vmalloc.c 2007-12-21 15:36:12.000000000 -0500 @@ -68,12 +68,12 @@ } while (pud++, addr = next, addr != end); } -void unmap_vm_area(struct vm_struct *area) +void unmap_kernel_range(unsigned long addr, unsigned long size) { pgd_t *pgd; unsigned long next; - unsigned long addr = (unsigned long) area->addr; - unsigned long end = addr + area->size; + unsigned long start = addr; + unsigned long end = addr + size; BUG_ON(addr >= end); pgd = pgd_offset_k(addr); @@ -84,7 +84,12 @@ continue; vunmap_pud_range(pgd, addr, next); } while (pgd++, addr = next, addr != end); - flush_tlb_kernel_range((unsigned long) area->addr, end); + flush_tlb_kernel_range(start, end); +} + +static void unmap_vm_area(struct vm_struct *area) +{ + unmap_kernel_range((unsigned long)area->addr, area->size); } static int vmap_pte_range(pmd_t *pmd, unsigned long addr, diff -Nurb linux-2.6.22-570/mm/vmscan.c linux-2.6.22-591/mm/vmscan.c --- linux-2.6.22-570/mm/vmscan.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/mm/vmscan.c 2007-12-21 15:36:12.000000000 -0500 @@ -1272,6 +1272,34 @@ return nr_reclaimed; } +static unsigned int kswapd_min_order __read_mostly; + +static inline int kswapd_order(unsigned int order) +{ + return max(kswapd_min_order, order); +} + +/** + * raise_kswapd_order - Raise the minimum order that kswapd reclaims + * @order: The minimum order kswapd should reclaim at + * + * kswapd normally reclaims at order 0 unless there is a higher-order + * allocation being serviced. This function is used to set the minimum + * order that kswapd reclaims at when it is known there will be regular + * high-order allocations at a given order. + */ +void raise_kswapd_order(unsigned int order) +{ + if (order >= MAX_ORDER) + return; + + /* Update order if necessary and inform if changed */ + if (order > kswapd_min_order) { + kswapd_min_order = order; + printk(KERN_INFO "kswapd reclaim order set to %d\n", order); + } +} + /* * The background pageout daemon, started as a kernel thread * from the init process. @@ -1314,13 +1342,14 @@ * trying to free the first piece of memory in the first place). */ tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + set_freezable(); - order = 0; + order = kswapd_order(0); for ( ; ; ) { unsigned long new_order; prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); - new_order = pgdat->kswapd_max_order; + new_order = kswapd_order(pgdat->kswapd_max_order); pgdat->kswapd_max_order = 0; if (order < new_order) { /* @@ -1332,7 +1361,7 @@ if (!freezing(current)) schedule(); - order = pgdat->kswapd_max_order; + order = kswapd_order(pgdat->kswapd_max_order); } finish_wait(&pgdat->kswapd_wait, &wait); diff -Nurb linux-2.6.22-570/net/802/tr.c linux-2.6.22-591/net/802/tr.c --- linux-2.6.22-570/net/802/tr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/802/tr.c 2007-12-21 15:36:15.000000000 -0500 @@ -36,6 +36,7 @@ #include #include #include +#include static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev); static void rif_check_expire(unsigned long dummy); @@ -532,7 +533,7 @@ seq_puts(seq, "if TR address TTL rcf routing segments\n"); else { - struct net_device *dev = dev_get_by_index(entry->iface); + struct net_device *dev = dev_get_by_index(&init_net, entry->iface); long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) - (long) jiffies; @@ -639,7 +640,7 @@ rif_timer.function = rif_check_expire; add_timer(&rif_timer); - proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops); + proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); return 0; } diff -Nurb linux-2.6.22-570/net/8021q/Makefile linux-2.6.22-591/net/8021q/Makefile --- linux-2.6.22-570/net/8021q/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/8021q/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -4,7 +4,7 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o -8021q-objs := vlan.o vlan_dev.o +8021q-objs := vlan.o vlan_dev.o vlan_netlink.o ifeq ($(CONFIG_PROC_FS),y) 8021q-objs += vlanproc.o diff -Nurb linux-2.6.22-570/net/8021q/vlan.c linux-2.6.22-591/net/8021q/vlan.c --- linux-2.6.22-570/net/8021q/vlan.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/8021q/vlan.c 2007-12-21 15:36:15.000000000 -0500 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "vlan.h" @@ -50,7 +51,7 @@ static char vlan_buggyright[] = "David S. Miller "; static int vlan_device_event(struct notifier_block *, unsigned long, void *); -static int vlan_ioctl_handler(void __user *); +static int vlan_ioctl_handler(struct net *net, void __user *); static int unregister_vlan_dev(struct net_device *, unsigned short ); static struct notifier_block vlan_notifier_block = { @@ -97,15 +98,22 @@ /* Register us to receive netdevice events */ err = register_netdevice_notifier(&vlan_notifier_block); - if (err < 0) { - dev_remove_pack(&vlan_packet_type); - vlan_proc_cleanup(); - return err; - } + if (err < 0) + goto err1; - vlan_ioctl_set(vlan_ioctl_handler); + err = vlan_netlink_init(); + if (err < 0) + goto err2; + vlan_ioctl_set(vlan_ioctl_handler); return 0; + +err2: + unregister_netdevice_notifier(&vlan_notifier_block); +err1: + vlan_proc_cleanup(); + dev_remove_pack(&vlan_packet_type); + return err; } /* Cleanup all vlan devices @@ -117,7 +125,7 @@ struct net_device *dev, *nxt; rtnl_lock(); - for_each_netdev_safe(dev, nxt) { + for_each_netdev_safe(&init_net, dev, nxt) { if (dev->priv_flags & IFF_802_1Q_VLAN) { unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, VLAN_DEV_INFO(dev)->vlan_id); @@ -136,6 +144,7 @@ { int i; + vlan_netlink_fini(); vlan_ioctl_set(NULL); /* Un-register us from receiving netdevice events */ @@ -197,6 +206,34 @@ kfree(grp); } +static struct vlan_group *vlan_group_alloc(int ifindex) +{ + struct vlan_group *grp; + unsigned int size; + unsigned int i; + + grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); + if (!grp) + return NULL; + + size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; + + for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { + grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL); + if (!grp->vlan_devices_arrays[i]) + goto err; + } + + grp->real_dev_ifindex = ifindex; + hlist_add_head_rcu(&grp->hlist, + &vlan_group_hash[vlan_grp_hashfn(ifindex)]); + return grp; + +err: + vlan_group_free(grp); + return NULL; +} + static void vlan_rcu_free(struct rcu_head *rcu) { vlan_group_free(container_of(rcu, struct vlan_group, rcu)); @@ -278,47 +315,62 @@ return ret; } -static int unregister_vlan_device(const char *vlan_IF_name) +int unregister_vlan_device(struct net_device *dev) { - struct net_device *dev = NULL; int ret; - - dev = dev_get_by_name(vlan_IF_name); - ret = -EINVAL; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - rtnl_lock(); - ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, VLAN_DEV_INFO(dev)->vlan_id); - - dev_put(dev); unregister_netdevice(dev); - rtnl_unlock(); - if (ret == 1) ret = 0; + return ret; +} + +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + +static int vlan_dev_init(struct net_device *dev) +{ + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; + + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ + dev->flags = real_dev->flags & ~IFF_UP; + dev->iflink = real_dev->ifindex; + dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | + (1<<__LINK_STATE_DORMANT))) | + (1<<__LINK_STATE_PRESENT); + + /* TODO: maybe just assign it to be ETHERNET? */ + dev->type = real_dev->type; + + memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len); + memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); + dev->addr_len = real_dev->addr_len; + + if (real_dev->features & NETIF_F_HW_VLAN_TX) { + dev->hard_header = real_dev->hard_header; + dev->hard_header_len = real_dev->hard_header_len; + dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; + dev->rebuild_header = real_dev->rebuild_header; } else { - printk(VLAN_ERR - "%s: ERROR: Tried to remove a non-vlan device " - "with VLAN code, name: %s priv_flags: %hX\n", - __FUNCTION__, dev->name, dev->priv_flags); - dev_put(dev); - ret = -EPERM; - } - } else { -#ifdef VLAN_DEBUG - printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__); -#endif - ret = -EINVAL; + dev->hard_header = vlan_dev_hard_header; + dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; + dev->hard_start_xmit = vlan_dev_hard_start_xmit; + dev->rebuild_header = vlan_dev_rebuild_header; } + dev->hard_header_parse = real_dev->hard_header_parse; - return ret; + lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); + return 0; } -static void vlan_setup(struct net_device *new_dev) +void vlan_setup(struct net_device *new_dev) { SET_MODULE_OWNER(new_dev); @@ -338,6 +390,7 @@ /* set up method calls */ new_dev->change_mtu = vlan_dev_change_mtu; + new_dev->init = vlan_dev_init; new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; new_dev->set_mac_address = vlan_dev_set_mac_address; @@ -366,77 +419,110 @@ } } -/* - * vlan network devices have devices nesting below it, and are a special - * "super class" of normal network devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key vlan_netdev_xmit_lock_key; - - -/* Attach a VLAN device to a mac address (ie Ethernet Card). - * Returns the device that was created, or NULL if there was - * an error of some kind. - */ -static struct net_device *register_vlan_device(const char *eth_IF_name, - unsigned short VLAN_ID) +int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) { - struct vlan_group *grp; - struct net_device *new_dev; - struct net_device *real_dev; /* the ethernet device */ - char name[IFNAMSIZ]; - int i; - -#ifdef VLAN_DEBUG - printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", - __FUNCTION__, eth_IF_name, VLAN_ID); -#endif - - if (VLAN_ID >= VLAN_VID_MASK) - goto out_ret_null; - - /* find the device relating to eth_IF_name. */ - real_dev = dev_get_by_name(eth_IF_name); - if (!real_dev) - goto out_ret_null; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { printk(VLAN_DBG "%s: VLANs not supported on %s.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !real_dev->vlan_rx_register) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + return -EOPNOTSUPP; } - /* From this point on, all the data structures must remain - * consistent. - */ - rtnl_lock(); - /* The real device must be up and operating in order to * assosciate a VLAN device with it. */ if (!(real_dev->flags & IFF_UP)) - goto out_unlock; + return -ENETDOWN; - if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) { + if (__find_vlan_dev(real_dev, vlan_id) != NULL) { /* was already registered. */ printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__); - goto out_unlock; + return -EEXIST; } + return 0; +} + +int register_vlan_dev(struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev = vlan->real_dev; + unsigned short vlan_id = vlan->vlan_id; + struct vlan_group *grp, *ngrp = NULL; + int err; + + grp = __vlan_find_group(real_dev->ifindex); + if (!grp) { + ngrp = grp = vlan_group_alloc(real_dev->ifindex); + if (!grp) + return -ENOBUFS; + } + + err = register_netdevice(dev); + if (err < 0) + goto out_free_group; + + /* Account for reference in struct vlan_dev_info */ + dev_hold(real_dev); + + vlan_transfer_operstate(real_dev, dev); + linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ + + /* So, got the sucker initialized, now lets place + * it into our local structure. + */ + vlan_group_set_device(grp, vlan_id, dev); + if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) + real_dev->vlan_rx_register(real_dev, ngrp); + if (real_dev->features & NETIF_F_HW_VLAN_FILTER) + real_dev->vlan_rx_add_vid(real_dev, vlan_id); + + if (vlan_proc_add_dev(dev) < 0) + printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", + dev->name); + return 0; + +out_free_group: + if (ngrp) + vlan_group_free(ngrp); + return err; +} + +/* Attach a VLAN device to a mac address (ie Ethernet Card). + * Returns 0 if the device was created or a negative error code otherwise. + */ +static int register_vlan_device(struct net_device *real_dev, + unsigned short VLAN_ID) +{ + struct net_device *new_dev; + char name[IFNAMSIZ]; + int err; + +#ifdef VLAN_DEBUG + printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", + __FUNCTION__, eth_IF_name, VLAN_ID); +#endif + + if (VLAN_ID >= VLAN_VID_MASK) + return -ERANGE; + + err = vlan_check_real_dev(real_dev, VLAN_ID); + if (err < 0) + return err; + /* Gotta set up the fields for the device. */ #ifdef VLAN_DEBUG printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n", @@ -471,138 +557,40 @@ vlan_setup); if (new_dev == NULL) - goto out_unlock; - -#ifdef VLAN_DEBUG - printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); -#endif - /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - new_dev->flags = real_dev->flags; - new_dev->flags &= ~IFF_UP; - - new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | - (1<<__LINK_STATE_DORMANT))) | - (1<<__LINK_STATE_PRESENT); + return -ENOBUFS; /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. */ new_dev->mtu = real_dev->mtu; - /* TODO: maybe just assign it to be ETHERNET? */ - new_dev->type = real_dev->type; - - new_dev->hard_header_len = real_dev->hard_header_len; - if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) { - /* Regular ethernet + 4 bytes (18 total). */ - new_dev->hard_header_len += VLAN_HLEN; - } - +#ifdef VLAN_DEBUG + printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n", new_dev->priv, sizeof(struct vlan_dev_info)); - - memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len); - memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); - new_dev->addr_len = real_dev->addr_len; - - if (real_dev->features & NETIF_F_HW_VLAN_TX) { - new_dev->hard_header = real_dev->hard_header; - new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; - new_dev->rebuild_header = real_dev->rebuild_header; - } else { - new_dev->hard_header = vlan_dev_hard_header; - new_dev->hard_start_xmit = vlan_dev_hard_start_xmit; - new_dev->rebuild_header = vlan_dev_rebuild_header; - } - new_dev->hard_header_parse = real_dev->hard_header_parse; +#endif VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ VLAN_DEV_INFO(new_dev)->real_dev = real_dev; VLAN_DEV_INFO(new_dev)->dent = NULL; - VLAN_DEV_INFO(new_dev)->flags = 1; + VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR; -#ifdef VLAN_DEBUG - printk(VLAN_DBG "About to go find the group for idx: %i\n", - real_dev->ifindex); -#endif - - if (register_netdevice(new_dev)) + new_dev->rtnl_link_ops = &vlan_link_ops; + err = register_vlan_dev(new_dev); + if (err < 0) goto out_free_newdev; - lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); - - new_dev->iflink = real_dev->ifindex; - vlan_transfer_operstate(real_dev, new_dev); - linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ - - /* So, got the sucker initialized, now lets place - * it into our local structure. - */ - grp = __vlan_find_group(real_dev->ifindex); - - /* Note, we are running under the RTNL semaphore - * so it cannot "appear" on us. - */ - if (!grp) { /* need to add a new group */ - grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); - if (!grp) - goto out_free_unregister; - - for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { - grp->vlan_devices_arrays[i] = kzalloc( - sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN, - GFP_KERNEL); - - if (!grp->vlan_devices_arrays[i]) - goto out_free_arrays; - } - - /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */ - grp->real_dev_ifindex = real_dev->ifindex; - - hlist_add_head_rcu(&grp->hlist, - &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]); - - if (real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, grp); - } - - vlan_group_set_device(grp, VLAN_ID, new_dev); - - if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */ - printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", - new_dev->name); - - if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_add_vid(real_dev, VLAN_ID); - - rtnl_unlock(); - - + /* Account for reference in struct vlan_dev_info */ + dev_hold(real_dev); #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new device successfully, returning.\n"); #endif - return new_dev; - -out_free_arrays: - vlan_group_free(grp); - -out_free_unregister: - unregister_netdev(new_dev); - goto out_unlock; + return 0; out_free_newdev: free_netdev(new_dev); - -out_unlock: - rtnl_unlock(); - -out_put_dev: - dev_put(real_dev); - -out_ret_null: - return NULL; + return err; } static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) @@ -612,6 +600,9 @@ int i, flgs; struct net_device *vlandev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!grp) goto out; @@ -691,11 +682,12 @@ * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ -static int vlan_ioctl_handler(void __user *arg) +static int vlan_ioctl_handler(struct net *net, void __user *arg) { - int err = 0; + int err; unsigned short vid = 0; struct vlan_ioctl_args args; + struct net_device *dev = NULL; if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args))) return -EFAULT; @@ -708,35 +700,61 @@ printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd); #endif + rtnl_lock(); + switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: + case SET_VLAN_EGRESS_PRIORITY_CMD: + case SET_VLAN_FLAG_CMD: + case ADD_VLAN_CMD: + case DEL_VLAN_CMD: + case GET_VLAN_REALDEV_NAME_CMD: + case GET_VLAN_VID_CMD: + err = -ENODEV; + dev = __dev_get_by_name(&init_net, args.device1); + if (!dev) + goto out; + + err = -EINVAL; + if (args.cmd != ADD_VLAN_CMD && + !(dev->priv_flags & IFF_802_1Q_VLAN)) + goto out; + } + + switch (args.cmd) { + case SET_VLAN_INGRESS_PRIORITY_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_ingress_priority(args.device1, + break; + vlan_dev_set_ingress_priority(dev, args.u.skb_priority, args.vlan_qos); break; case SET_VLAN_EGRESS_PRIORITY_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_egress_priority(args.device1, + break; + err = vlan_dev_set_egress_priority(dev, args.u.skb_priority, args.vlan_qos); break; case SET_VLAN_FLAG_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_vlan_flag(args.device1, + break; + err = vlan_dev_set_vlan_flag(dev, args.u.flag, args.vlan_qos); break; case SET_VLAN_NAME_TYPE_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { + if ((args.u.name_type >= 0) && + (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { vlan_name_type = args.u.name_type; err = 0; } else { @@ -745,26 +763,17 @@ break; case ADD_VLAN_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* we have been given the name of the Ethernet Device we want to - * talk to: args.dev1 We also have the - * VLAN ID: args.u.VID - */ - if (register_vlan_device(args.device1, args.u.VID)) { - err = 0; - } else { - err = -EINVAL; - } + break; + err = register_vlan_device(dev, args.u.VID); break; case DEL_VLAN_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* Here, the args.dev1 is the actual VLAN we want - * to get rid of. - */ - err = unregister_vlan_device(args.device1); + break; + err = unregister_vlan_device(dev); break; case GET_VLAN_INGRESS_PRIORITY_CMD: @@ -788,9 +797,7 @@ err = -EINVAL; break; case GET_VLAN_REALDEV_NAME_CMD: - err = vlan_dev_get_realdev_name(args.device1, args.u.device2); - if (err) - goto out; + vlan_dev_get_realdev_name(dev, args.u.device2); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) { err = -EFAULT; @@ -798,9 +805,7 @@ break; case GET_VLAN_VID_CMD: - err = vlan_dev_get_vid(args.device1, &vid); - if (err) - goto out; + vlan_dev_get_vid(dev, &vid); args.u.VID = vid; if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) { @@ -812,9 +817,11 @@ /* pass on to underlying device instead?? */ printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n", __FUNCTION__, args.cmd); - return -EINVAL; + err = -EINVAL; + break; } out: + rtnl_unlock(); return err; } diff -Nurb linux-2.6.22-570/net/8021q/vlan.h linux-2.6.22-591/net/8021q/vlan.h --- linux-2.6.22-570/net/8021q/vlan.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/8021q/vlan.h 2007-12-21 15:36:12.000000000 -0500 @@ -62,11 +62,24 @@ int vlan_dev_open(struct net_device* dev); int vlan_dev_stop(struct net_device* dev); int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd); -int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val); -int vlan_dev_get_realdev_name(const char* dev_name, char* result); -int vlan_dev_get_vid(const char* dev_name, unsigned short* result); +void vlan_dev_set_ingress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio); +int vlan_dev_set_egress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio); +int vlan_dev_set_vlan_flag(const struct net_device *dev, + u32 flag, short flag_val); +void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); +void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); void vlan_dev_set_multicast_list(struct net_device *vlan_dev); +int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); +void vlan_setup(struct net_device *dev); +int register_vlan_dev(struct net_device *dev); +int unregister_vlan_device(struct net_device *dev); + +int vlan_netlink_init(void); +void vlan_netlink_fini(void); + +extern struct rtnl_link_ops vlan_link_ops; + #endif /* !(__BEN_VLAN_802_1Q_INC__) */ diff -Nurb linux-2.6.22-570/net/8021q/vlan_dev.c linux-2.6.22-591/net/8021q/vlan_dev.c --- linux-2.6.22-570/net/8021q/vlan_dev.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/8021q/vlan_dev.c 2007-12-21 15:36:15.000000000 -0500 @@ -73,7 +73,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) { - if (VLAN_DEV_INFO(skb->dev)->flags & 1) { + if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { if (skb_shared(skb) || skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); kfree_skb(skb); @@ -132,6 +132,11 @@ vhdr = (struct vlan_hdr *)(skb->data); + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); @@ -360,7 +365,8 @@ * header shuffling in the hard_start_xmit. Users can turn off this * REORDER behaviour with the vconfig tool. */ - build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0); + if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR)) + build_vlan_header = 1; if (build_vlan_header) { vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); @@ -544,136 +550,83 @@ return 0; } -int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) +void vlan_dev_set_ingress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* see if a priority mapping exists.. */ - VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio; - dev_put(dev); - return 0; - } + if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) + vlan->nr_ingress_mappings--; + else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio) + vlan->nr_ingress_mappings++; - dev_put(dev); - } - return -EINVAL; + vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio; } -int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) +int vlan_dev_set_egress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; + u32 vlan_qos = (vlan_prio << 13) & 0xE000; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { /* See if a priority mapping exists.. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; + mp = vlan->egress_priority_map[skb_prio & 0xF]; while (mp) { if (mp->priority == skb_prio) { - mp->vlan_qos = ((vlan_prio << 13) & 0xE000); - dev_put(dev); + if (mp->vlan_qos && !vlan_qos) + vlan->nr_egress_mappings--; + else if (!mp->vlan_qos && vlan_qos) + vlan->nr_egress_mappings++; + mp->vlan_qos = vlan_qos; return 0; } mp = mp->next; } /* Create a new mapping then. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; + mp = vlan->egress_priority_map[skb_prio & 0xF]; np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); - if (np) { + if (!np) + return -ENOBUFS; + np->next = mp; np->priority = skb_prio; - np->vlan_qos = ((vlan_prio << 13) & 0xE000); - VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np; - dev_put(dev); + np->vlan_qos = vlan_qos; + vlan->egress_priority_map[skb_prio & 0xF] = np; + if (vlan_qos) + vlan->nr_egress_mappings++; return 0; - } else { - dev_put(dev); - return -ENOBUFS; - } - } - dev_put(dev); - } - return -EINVAL; } -/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */ -int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val) +/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ +int vlan_dev_set_vlan_flag(const struct net_device *dev, + u32 flag, short flag_val) { - struct net_device *dev = dev_get_by_name(dev_name); - - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { /* verify flag is supported */ - if (flag == 1) { + if (flag == VLAN_FLAG_REORDER_HDR) { if (flag_val) { - VLAN_DEV_INFO(dev)->flags |= 1; + VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR; } else { - VLAN_DEV_INFO(dev)->flags &= ~1; + VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; } - dev_put(dev); return 0; - } else { - printk(KERN_ERR "%s: flag %i is not valid.\n", - __FUNCTION__, (int)(flag)); - dev_put(dev); - return -EINVAL; } - } else { - printk(KERN_ERR - "%s: %s is not a vlan device, priv_flags: %hX.\n", - __FUNCTION__, dev->name, dev->priv_flags); - dev_put(dev); - } - } else { - printk(KERN_ERR "%s: Could not find device: %s\n", - __FUNCTION__, dev_name); - } - + printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag); return -EINVAL; } - -int vlan_dev_get_realdev_name(const char *dev_name, char* result) +void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) { - struct net_device *dev = dev_get_by_name(dev_name); - int rv = 0; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); - rv = 0; - } else { - rv = -EINVAL; - } - dev_put(dev); - } else { - rv = -ENODEV; - } - return rv; } -int vlan_dev_get_vid(const char *dev_name, unsigned short* result) +void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) { - struct net_device *dev = dev_get_by_name(dev_name); - int rv = 0; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { *result = VLAN_DEV_INFO(dev)->vlan_id; - rv = 0; - } else { - rv = -EINVAL; - } - dev_put(dev); - } else { - rv = -ENODEV; - } - return rv; } - int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p) { struct sockaddr *addr = (struct sockaddr *)(addr_struct_p); @@ -828,7 +781,7 @@ break; case SIOCETHTOOL: - err = dev_ethtool(&ifrr); + err = dev_ethtool(real_dev->nd_net, &ifrr); } if (!err) diff -Nurb linux-2.6.22-570/net/8021q/vlan_netlink.c linux-2.6.22-591/net/8021q/vlan_netlink.c --- linux-2.6.22-570/net/8021q/vlan_netlink.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/net/8021q/vlan_netlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -0,0 +1,237 @@ +/* + * VLAN netlink control interface + * + * Copyright (c) 2007 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include "vlan.h" + + +static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = { + [IFLA_VLAN_ID] = { .type = NLA_U16 }, + [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, + [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, + [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { + [IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) }, +}; + + +static inline int vlan_validate_qos_map(struct nlattr *attr) +{ + if (!attr) + return 0; + return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy); +} + +static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + struct ifla_vlan_flags *flags; + u16 id; + int err; + + if (!data) + return -EINVAL; + + if (data[IFLA_VLAN_ID]) { + id = nla_get_u16(data[IFLA_VLAN_ID]); + if (id >= VLAN_VID_MASK) + return -ERANGE; + } + if (data[IFLA_VLAN_FLAGS]) { + flags = nla_data(data[IFLA_VLAN_FLAGS]); + if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR) + return -EINVAL; + } + + err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]); + if (err < 0) + return err; + err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]); + if (err < 0) + return err; + return 0; +} + +static int vlan_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct ifla_vlan_flags *flags; + struct ifla_vlan_qos_mapping *m; + struct nlattr *attr; + int rem; + + if (data[IFLA_VLAN_FLAGS]) { + flags = nla_data(data[IFLA_VLAN_FLAGS]); + vlan->flags = (vlan->flags & ~flags->mask) | + (flags->flags & flags->mask); + } + if (data[IFLA_VLAN_INGRESS_QOS]) { + nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + m = nla_data(attr); + vlan_dev_set_ingress_priority(dev, m->to, m->from); + } + } + if (data[IFLA_VLAN_EGRESS_QOS]) { + nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + m = nla_data(attr); + vlan_dev_set_egress_priority(dev, m->from, m->to); + } + } + return 0; +} + +static int vlan_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev; + int err; + + if (!data[IFLA_VLAN_ID]) + return -EINVAL; + + if (!tb[IFLA_LINK]) + return -EINVAL; + real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + + vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); + vlan->real_dev = real_dev; + vlan->flags = VLAN_FLAG_REORDER_HDR; + + err = vlan_check_real_dev(real_dev, vlan->vlan_id); + if (err < 0) + return err; + + if (!tb[IFLA_MTU]) + dev->mtu = real_dev->mtu; + else if (dev->mtu > real_dev->mtu) + return -EINVAL; + + err = vlan_changelink(dev, tb, data); + if (err < 0) + return err; + + return register_vlan_dev(dev); +} + +static void vlan_dellink(struct net_device *dev) +{ + unregister_vlan_device(dev); +} + +static inline size_t vlan_qos_map_size(unsigned int n) +{ + if (n == 0) + return 0; + /* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */ + return nla_total_size(sizeof(struct nlattr)) + + nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n; +} + +static size_t vlan_get_size(const struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + + return nla_total_size(2) + /* IFLA_VLAN_ID */ + vlan_qos_map_size(vlan->nr_ingress_mappings) + + vlan_qos_map_size(vlan->nr_egress_mappings); +} + +static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct vlan_priority_tci_mapping *pm; + struct ifla_vlan_flags f; + struct ifla_vlan_qos_mapping m; + struct nlattr *nest; + unsigned int i; + + NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id); + if (vlan->flags) { + f.flags = vlan->flags; + f.mask = ~0; + NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f); + } + if (vlan->nr_ingress_mappings) { + nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS); + if (nest == NULL) + goto nla_put_failure; + + for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) { + if (!vlan->ingress_priority_map[i]) + continue; + + m.from = i; + m.to = vlan->ingress_priority_map[i]; + NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING, + sizeof(m), &m); + } + nla_nest_end(skb, nest); + } + + if (vlan->nr_egress_mappings) { + nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS); + if (nest == NULL) + goto nla_put_failure; + + for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { + for (pm = vlan->egress_priority_map[i]; pm; + pm = pm->next) { + if (!pm->vlan_qos) + continue; + + m.from = pm->priority; + m.to = (pm->vlan_qos >> 13) & 0x7; + NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING, + sizeof(m), &m); + } + } + nla_nest_end(skb, nest); + } + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +struct rtnl_link_ops vlan_link_ops __read_mostly = { + .kind = "vlan", + .maxtype = IFLA_VLAN_MAX, + .policy = vlan_policy, + .priv_size = sizeof(struct vlan_dev_info), + .setup = vlan_setup, + .validate = vlan_validate, + .newlink = vlan_newlink, + .changelink = vlan_changelink, + .dellink = vlan_dellink, + .get_size = vlan_get_size, + .fill_info = vlan_fill_info, +}; + +int __init vlan_netlink_init(void) +{ + return rtnl_link_register(&vlan_link_ops); +} + +void __exit vlan_netlink_fini(void) +{ + rtnl_link_unregister(&vlan_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("vlan"); diff -Nurb linux-2.6.22-570/net/8021q/vlanproc.c linux-2.6.22-591/net/8021q/vlanproc.c --- linux-2.6.22-570/net/8021q/vlanproc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/8021q/vlanproc.c 2007-12-21 15:36:15.000000000 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #include "vlanproc.h" #include "vlan.h" @@ -143,7 +144,7 @@ remove_proc_entry(name_conf, proc_vlan_dir); if (proc_vlan_dir) - proc_net_remove(name_root); + proc_net_remove(&init_net, name_root); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... @@ -156,7 +157,7 @@ int __init vlan_proc_init(void) { - proc_vlan_dir = proc_mkdir(name_root, proc_net); + proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); if (proc_vlan_dir) { proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR, @@ -253,7 +254,7 @@ if (*pos == 0) return SEQ_START_TOKEN; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_vlan_dev(dev)) continue; @@ -272,9 +273,9 @@ dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_vlan_dev(dev)) continue; @@ -342,7 +343,7 @@ seq_printf(seq, "Device: %s", dev_info->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, - "\nINGRESS priority mappings: 0:%lu 1:%lu 2:%lu 3:%lu 4:%lu 5:%lu 6:%lu 7:%lu\n", + "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", dev_info->ingress_priority_map[0], dev_info->ingress_priority_map[1], dev_info->ingress_priority_map[2], @@ -357,7 +358,7 @@ const struct vlan_priority_tci_mapping *mp = dev_info->egress_priority_map[i]; while (mp) { - seq_printf(seq, "%lu:%hu ", + seq_printf(seq, "%u:%hu ", mp->priority, ((mp->vlan_qos >> 13) & 0x7)); mp = mp->next; } diff -Nurb linux-2.6.22-570/net/Kconfig linux-2.6.22-591/net/Kconfig --- linux-2.6.22-570/net/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/Kconfig 2007-12-21 15:36:15.000000000 -0500 @@ -27,6 +27,13 @@ menu "Networking options" +config NET_NS + bool "Network namespace support" + depends on EXPERIMENTAL + help + Support what appear to user space as multiple instances of the + network stack. + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff -Nurb linux-2.6.22-570/net/Makefile linux-2.6.22-591/net/Makefile --- linux-2.6.22-570/net/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/Makefile 2007-12-23 03:20:02.000000000 -0500 @@ -14,7 +14,7 @@ # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ +obj-$(CONFIG_NET) += ethernet/ sched/ netlink/ 802/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ diff -Nurb linux-2.6.22-570/net/Makefile.orig linux-2.6.22-591/net/Makefile.orig --- linux-2.6.22-570/net/Makefile.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/net/Makefile.orig 2007-07-08 19:32:17.000000000 -0400 @@ -0,0 +1,58 @@ +# +# Makefile for the linux networking. +# +# 2 Sep 2000, Christoph Hellwig +# Rewritten to use lists instead of if-statements. +# + +obj-y := nonet.o + +obj-$(CONFIG_NET) := socket.o core/ + +tmp-$(CONFIG_COMPAT) := compat.o +obj-$(CONFIG_NET) += $(tmp-y) + +# LLC has to be linked before the files in net/802/ +obj-$(CONFIG_LLC) += llc/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ +obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_INET) += ipv4/ +obj-$(CONFIG_XFRM) += xfrm/ +obj-$(CONFIG_UNIX) += unix/ +ifneq ($(CONFIG_IPV6),) +obj-y += ipv6/ +endif +obj-$(CONFIG_PACKET) += packet/ +obj-$(CONFIG_NET_KEY) += key/ +obj-$(CONFIG_NET_SCHED) += sched/ +obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_IPX) += ipx/ +obj-$(CONFIG_ATALK) += appletalk/ +obj-$(CONFIG_WAN_ROUTER) += wanrouter/ +obj-$(CONFIG_X25) += x25/ +obj-$(CONFIG_LAPB) += lapb/ +obj-$(CONFIG_NETROM) += netrom/ +obj-$(CONFIG_ROSE) += rose/ +obj-$(CONFIG_AX25) += ax25/ +obj-$(CONFIG_IRDA) += irda/ +obj-$(CONFIG_BT) += bluetooth/ +obj-$(CONFIG_SUNRPC) += sunrpc/ +obj-$(CONFIG_RXRPC) += rxrpc/ +obj-$(CONFIG_AF_RXRPC) += rxrpc/ +obj-$(CONFIG_ATM) += atm/ +obj-$(CONFIG_DECNET) += decnet/ +obj-$(CONFIG_ECONET) += econet/ +obj-$(CONFIG_VLAN_8021Q) += 8021q/ +obj-$(CONFIG_IP_DCCP) += dccp/ +obj-$(CONFIG_IP_SCTP) += sctp/ +obj-y += wireless/ +obj-$(CONFIG_MAC80211) += mac80211/ +obj-$(CONFIG_IEEE80211) += ieee80211/ +obj-$(CONFIG_TIPC) += tipc/ +obj-$(CONFIG_NETLABEL) += netlabel/ +obj-$(CONFIG_IUCV) += iucv/ +obj-$(CONFIG_RFKILL) += rfkill/ + +ifeq ($(CONFIG_NET),y) +obj-$(CONFIG_SYSCTL) += sysctl_net.o +endif diff -Nurb linux-2.6.22-570/net/appletalk/aarp.c linux-2.6.22-591/net/appletalk/aarp.c --- linux-2.6.22-570/net/appletalk/aarp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/appletalk/aarp.c 2007-12-21 15:36:15.000000000 -0500 @@ -330,15 +330,19 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; int ct; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { write_lock_bh(&aarp_lock); for (ct = 0; ct < AARP_HASH_SIZE; ct++) { - __aarp_expire_device(&resolved[ct], ptr); - __aarp_expire_device(&unresolved[ct], ptr); - __aarp_expire_device(&proxies[ct], ptr); + __aarp_expire_device(&resolved[ct], dev); + __aarp_expire_device(&unresolved[ct], dev); + __aarp_expire_device(&proxies[ct], dev); } write_unlock_bh(&aarp_lock); @@ -712,6 +716,9 @@ struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; + if (dev->nd_net != &init_net) + goto out0; + /* We only do Ethernet SNAP AARP. */ if (dev->type != ARPHRD_ETHER) goto out0; diff -Nurb linux-2.6.22-570/net/appletalk/atalk_proc.c linux-2.6.22-591/net/appletalk/atalk_proc.c --- linux-2.6.22-570/net/appletalk/atalk_proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/appletalk/atalk_proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) @@ -271,7 +272,7 @@ struct proc_dir_entry *p; int rc = -ENOMEM; - atalk_proc_dir = proc_mkdir("atalk", proc_net); + atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net); if (!atalk_proc_dir) goto out; atalk_proc_dir->owner = THIS_MODULE; @@ -306,7 +307,7 @@ out_route: remove_proc_entry("interface", atalk_proc_dir); out_interface: - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); goto out; } @@ -316,5 +317,5 @@ remove_proc_entry("route", atalk_proc_dir); remove_proc_entry("socket", atalk_proc_dir); remove_proc_entry("arp", atalk_proc_dir); - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); } diff -Nurb linux-2.6.22-570/net/appletalk/ddp.c linux-2.6.22-591/net/appletalk/ddp.c --- linux-2.6.22-570/net/appletalk/ddp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/appletalk/ddp.c 2007-12-21 15:36:15.000000000 -0500 @@ -647,9 +647,14 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) /* Discard any use of this */ - atalk_dev_down(ptr); + atalk_dev_down(dev); return NOTIFY_DONE; } @@ -672,7 +677,7 @@ if (copy_from_user(&atreq, arg, sizeof(atreq))) return -EFAULT; - dev = __dev_get_by_name(atreq.ifr_name); + dev = __dev_get_by_name(&init_net, atreq.ifr_name); if (!dev) return -ENODEV; @@ -896,7 +901,7 @@ if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) return -EFAULT; name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (!dev) return -ENODEV; } @@ -1024,11 +1029,14 @@ * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct socket *sock, int protocol) +static int atalk_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do * and gives you the full ELAP frame. Should be handy for CAP 8) @@ -1036,7 +1044,7 @@ if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); if (!sk) goto out; rc = 0; @@ -1265,7 +1273,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb) { - struct net_device *dev = __dev_get_by_name("ipddp0"); + struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); struct net_device_stats *stats; /* This needs to be able to handle ipddp"N" devices */ @@ -1398,6 +1406,9 @@ int origlen; __u16 len_hops; + if (dev->nd_net != &init_net) + goto freeit; + /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; @@ -1483,6 +1494,9 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + if (dev->nd_net != &init_net) + goto freeit; + /* Expand any short form frames */ if (skb_mac_header(skb)[2] == 1) { struct ddpehdr *ddp; diff -Nurb linux-2.6.22-570/net/atm/clip.c linux-2.6.22-591/net/atm/clip.c --- linux-2.6.22-570/net/atm/clip.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/clip.c 2007-12-21 15:36:15.000000000 -0500 @@ -293,7 +293,7 @@ struct neigh_parms *parms; DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry); - neigh->type = inet_addr_type(entry->ip); + neigh->type = inet_addr_type(&init_net, entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; @@ -525,7 +525,10 @@ struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} + }; struct rtable *rt; if (vcc->push != clip_push) { @@ -620,6 +623,9 @@ { struct net_device *dev = arg; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { neigh_ifdown(&clip_tbl, dev); return NOTIFY_DONE; @@ -954,6 +960,7 @@ seq = file->private_data; seq->private = state; + state->ns.net = get_net(PROC_NET(inode)); out: return rc; @@ -962,11 +969,19 @@ goto out; } +static int arp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct clip_seq_state *state = seq->private; + put_net(state->ns.net); + return seq_release_private(inode, file); +} + static const struct file_operations arp_seq_fops = { .open = arp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = arp_seq_release, .owner = THIS_MODULE }; #endif diff -Nurb linux-2.6.22-570/net/atm/common.c linux-2.6.22-591/net/atm/common.c --- linux-2.6.22-570/net/atm/common.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/common.c 2007-12-21 15:36:15.000000000 -0500 @@ -132,7 +132,7 @@ .obj_size = sizeof(struct atm_vcc), }; -int vcc_create(struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family) { struct sock *sk; struct atm_vcc *vcc; @@ -140,7 +140,7 @@ sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff -Nurb linux-2.6.22-570/net/atm/common.h linux-2.6.22-591/net/atm/common.h --- linux-2.6.22-570/net/atm/common.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/common.h 2007-12-21 15:36:15.000000000 -0500 @@ -10,7 +10,7 @@ #include /* for poll_table */ -int vcc_create(struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, diff -Nurb linux-2.6.22-570/net/atm/mpc.c linux-2.6.22-591/net/atm/mpc.c --- linux-2.6.22-570/net/atm/mpc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/mpc.c 2007-12-21 15:36:15.000000000 -0500 @@ -244,7 +244,7 @@ char name[IFNAMSIZ]; sprintf(name, "lec%d", itf); - dev = dev_get_by_name(name); + dev = dev_get_by_name(&init_net, name); return dev; } @@ -956,6 +956,10 @@ struct lec_priv *priv; dev = (struct net_device *)dev_ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->name == NULL || strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ diff -Nurb linux-2.6.22-570/net/atm/proc.c linux-2.6.22-591/net/atm/proc.c --- linux-2.6.22-570/net/atm/proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -22,6 +22,7 @@ #include #include #include /* for __init */ +#include #include #include #include @@ -475,7 +476,7 @@ if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - remove_proc_entry("net/atm", NULL); + remove_proc_entry("atm", init_net.proc_net); } int __init atm_proc_init(void) @@ -483,7 +484,7 @@ static struct atm_proc_entry *e; int ret; - atm_proc_root = proc_mkdir("net/atm",NULL); + atm_proc_root = proc_mkdir("atm", init_net.proc_net); if (!atm_proc_root) goto err_out; for (e = atm_proc_ents; e->name; e++) { diff -Nurb linux-2.6.22-570/net/atm/pvc.c linux-2.6.22-591/net/atm/pvc.c --- linux-2.6.22-570/net/atm/pvc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/pvc.c 2007-12-21 15:36:15.000000000 -0500 @@ -124,10 +124,13 @@ }; -static int pvc_create(struct socket *sock,int protocol) +static int pvc_create(struct net *net, struct socket *sock,int protocol) { + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &pvc_proto_ops; - return vcc_create(sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC); } diff -Nurb linux-2.6.22-570/net/atm/svc.c linux-2.6.22-591/net/atm/svc.c --- linux-2.6.22-570/net/atm/svc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/atm/svc.c 2007-12-21 15:36:15.000000000 -0500 @@ -33,7 +33,7 @@ #endif -static int svc_create(struct socket *sock,int protocol); +static int svc_create(struct net *net, struct socket *sock,int protocol); /* @@ -335,7 +335,7 @@ lock_sock(sk); - error = svc_create(newsock,0); + error = svc_create(sk->sk_net, newsock,0); if (error) goto out; @@ -636,12 +636,15 @@ }; -static int svc_create(struct socket *sock,int protocol) +static int svc_create(struct net *net, struct socket *sock,int protocol) { int error; + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &svc_proto_ops; - error = vcc_create(sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; ATM_SD(sock)->remote.sas_family = AF_ATMSVC; diff -Nurb linux-2.6.22-570/net/ax25/af_ax25.c linux-2.6.22-591/net/ax25/af_ax25.c --- linux-2.6.22-570/net/ax25/af_ax25.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ax25/af_ax25.c 2007-12-21 15:36:15.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include @@ -103,6 +104,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Reject non AX.25 devices */ if (dev->type != ARPHRD_AX25) return NOTIFY_DONE; @@ -627,7 +631,7 @@ break; } - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) { res = -ENODEV; break; @@ -779,11 +783,14 @@ .obj_size = sizeof(struct sock), }; -static int ax25_create(struct socket *sock, int protocol) +static int ax25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; ax25_cb *ax25; + if (net != &init_net) + return -EAFNOSUPPORT; + switch (sock->type) { case SOCK_DGRAM: if (protocol == 0 || protocol == PF_AX25) @@ -829,7 +836,7 @@ return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) return -ENOMEM; ax25 = sk->sk_protinfo = ax25_create_cb(); @@ -854,7 +861,7 @@ struct sock *sk; ax25_cb *ax25, *oax25; - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { @@ -1998,9 +2005,9 @@ register_netdevice_notifier(&ax25_dev_notifier); ax25_register_sysctl(); - proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); + proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); + proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); out: return rc; } @@ -2014,9 +2021,9 @@ static void __exit ax25_exit(void) { - proc_net_remove("ax25_route"); - proc_net_remove("ax25"); - proc_net_remove("ax25_calls"); + proc_net_remove(&init_net, "ax25_route"); + proc_net_remove(&init_net, "ax25"); + proc_net_remove(&init_net, "ax25_calls"); ax25_rt_free(); ax25_uid_free(); ax25_dev_free(); diff -Nurb linux-2.6.22-570/net/ax25/ax25_in.c linux-2.6.22-591/net/ax25/ax25_in.c --- linux-2.6.22-570/net/ax25/ax25_in.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ax25/ax25_in.c 2007-12-21 15:36:15.000000000 -0500 @@ -451,6 +451,11 @@ skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if ((*skb->data & 0x0F) != 0) { kfree_skb(skb); /* Not a KISS data frame */ return 0; diff -Nurb linux-2.6.22-570/net/bluetooth/af_bluetooth.c linux-2.6.22-591/net/bluetooth/af_bluetooth.c --- linux-2.6.22-570/net/bluetooth/af_bluetooth.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/af_bluetooth.c 2007-12-21 15:36:15.000000000 -0500 @@ -95,10 +95,13 @@ } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct socket *sock, int proto) +static int bt_sock_create(struct net *net, struct socket *sock, int proto) { int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; @@ -113,7 +116,7 @@ read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(sock, proto); + err = bt_proto[proto]->create(net, sock, proto); module_put(bt_proto[proto]->owner); } diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/core.c linux-2.6.22-591/net/bluetooth/bnep/core.c --- linux-2.6.22-570/net/bluetooth/bnep/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/bnep/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -474,7 +475,6 @@ daemonize("kbnepd %s", dev->name); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&wait, current); add_wait_queue(sk->sk_sleep, &wait); diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/sock.c linux-2.6.22-591/net/bluetooth/bnep/sock.c --- linux-2.6.22-570/net/bluetooth/bnep/sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/bnep/sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -204,7 +204,7 @@ .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct socket *sock, int protocol) +static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -213,7 +213,7 @@ if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/core.c linux-2.6.22-591/net/bluetooth/cmtp/core.c --- linux-2.6.22-570/net/bluetooth/cmtp/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/cmtp/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -287,7 +288,6 @@ daemonize("kcmtpd_ctr_%d", session->num); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&wait, current); add_wait_queue(sk->sk_sleep, &wait); diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/sock.c linux-2.6.22-591/net/bluetooth/cmtp/sock.c --- linux-2.6.22-570/net/bluetooth/cmtp/sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/cmtp/sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -195,7 +195,7 @@ .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct socket *sock, int protocol) +static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -204,7 +204,7 @@ if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-570/net/bluetooth/hci_sock.c linux-2.6.22-591/net/bluetooth/hci_sock.c --- linux-2.6.22-570/net/bluetooth/hci_sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/hci_sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -618,7 +618,7 @@ .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct socket *sock, int protocol) +static int hci_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -629,7 +629,7 @@ sock->ops = &hci_sock_ops; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/core.c linux-2.6.22-591/net/bluetooth/hidp/core.c --- linux-2.6.22-570/net/bluetooth/hidp/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/hidp/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -547,7 +548,6 @@ daemonize("khidpd_%04x%04x", vendor, product); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&ctrl_wait, current); init_waitqueue_entry(&intr_wait, current); diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/sock.c linux-2.6.22-591/net/bluetooth/hidp/sock.c --- linux-2.6.22-570/net/bluetooth/hidp/sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/hidp/sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -246,7 +246,7 @@ .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct socket *sock, int protocol) +static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -255,7 +255,7 @@ if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-570/net/bluetooth/l2cap.c linux-2.6.22-591/net/bluetooth/l2cap.c --- linux-2.6.22-570/net/bluetooth/l2cap.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/l2cap.c 2007-12-21 15:36:15.000000000 -0500 @@ -518,11 +518,11 @@ .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); if (!sk) return NULL; @@ -543,7 +543,7 @@ return sk; } -static int l2cap_sock_create(struct socket *sock, int protocol) +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -560,7 +560,7 @@ sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -1425,7 +1425,7 @@ goto response; } - sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC); + sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); if (!sk) goto response; diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/core.c linux-2.6.22-591/net/bluetooth/rfcomm/core.c --- linux-2.6.22-570/net/bluetooth/rfcomm/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/rfcomm/core.c 2007-12-21 15:36:12.000000000 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1940,7 +1941,6 @@ daemonize("krfcommd"); set_user_nice(current, -10); - current->flags |= PF_NOFREEZE; BT_DBG(""); diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/sock.c linux-2.6.22-591/net/bluetooth/rfcomm/sock.c --- linux-2.6.22-570/net/bluetooth/rfcomm/sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/rfcomm/sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -282,12 +282,12 @@ .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); if (!sk) return NULL; @@ -323,7 +323,7 @@ return sk; } -static int rfcomm_sock_create(struct socket *sock, int protocol) +static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -336,7 +336,7 @@ sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -868,7 +868,7 @@ goto done; } - sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); if (!sk) goto done; diff -Nurb linux-2.6.22-570/net/bluetooth/sco.c linux-2.6.22-591/net/bluetooth/sco.c --- linux-2.6.22-570/net/bluetooth/sco.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bluetooth/sco.c 2007-12-21 15:36:15.000000000 -0500 @@ -414,11 +414,11 @@ .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); if (!sk) return NULL; @@ -439,7 +439,7 @@ return sk; } -static int sco_sock_create(struct socket *sock, int protocol) +static int sco_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -452,7 +452,7 @@ sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -807,7 +807,7 @@ bh_lock_sock(parent); - sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC); + sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); goto done; diff -Nurb linux-2.6.22-570/net/bridge/br_if.c linux-2.6.22-591/net/bridge/br_if.c --- linux-2.6.22-570/net/bridge/br_if.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/bridge/br_if.c 2007-12-21 15:36:15.000000000 -0500 @@ -45,7 +45,7 @@ old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ethtool(&ifr); + err = dev_ethtool(dev->nd_net, &ifr); set_fs(old_fs); if (!err) { @@ -314,7 +314,7 @@ int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -455,7 +455,7 @@ struct net_device *dev, *nxt; rtnl_lock(); - for_each_netdev_safe(dev, nxt) + for_each_netdev_safe(&init_net, dev, nxt) if (dev->priv_flags & IFF_EBRIDGE) del_br(dev->priv); rtnl_unlock(); diff -Nurb linux-2.6.22-570/net/bridge/br_ioctl.c linux-2.6.22-591/net/bridge/br_ioctl.c --- linux-2.6.22-570/net/bridge/br_ioctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_ioctl.c 2007-12-21 15:36:15.000000000 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "br_private.h" @@ -27,7 +28,7 @@ struct net_device *dev; int i = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -90,7 +91,7 @@ if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) return -EINVAL; @@ -364,7 +365,7 @@ return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) { switch (cmd) { case SIOCGIFBR: diff -Nurb linux-2.6.22-570/net/bridge/br_netfilter.c linux-2.6.22-591/net/bridge/br_netfilter.c --- linux-2.6.22-570/net/bridge/br_netfilter.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/bridge/br_netfilter.c 2007-12-21 15:36:15.000000000 -0500 @@ -310,6 +310,7 @@ if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct rtable *rt; struct flowi fl = { + .fl_net = &init_net, .nl_u = { .ip4_u = { .daddr = iph->daddr, @@ -518,6 +519,10 @@ if (unlikely(!pskb_may_pull(skb, len))) goto out; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { #ifdef CONFIG_SYSCTL @@ -591,6 +596,10 @@ { struct sk_buff *skb = *pskb; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); skb->dst = NULL; @@ -635,6 +644,10 @@ struct net_device *parent; int pf; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (!skb->nf_bridge) return NF_ACCEPT; @@ -674,6 +687,10 @@ struct sk_buff *skb = *pskb; struct net_device **d = (struct net_device **)(skb->cb); + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #ifdef CONFIG_SYSCTL if (!brnf_call_arptables) return NF_ACCEPT; @@ -718,6 +735,10 @@ struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (!skb->nf_bridge) return NF_ACCEPT; @@ -762,6 +783,10 @@ struct net_device *realoutdev = bridge_parent(skb->dev); int pf; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #ifdef CONFIG_NETFILTER_DEBUG /* Be very paranoid. This probably won't happen anymore, but let's * keep the check just to be sure... */ @@ -833,6 +858,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if ((*pskb)->nf_bridge && !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; diff -Nurb linux-2.6.22-570/net/bridge/br_netlink.c linux-2.6.22-591/net/bridge/br_netlink.c --- linux-2.6.22-570/net/bridge/br_netlink.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_netlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -12,6 +12,8 @@ #include #include +#include +#include #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -95,10 +97,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); } /* @@ -106,11 +108,15 @@ */ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct net_device *dev; int idx; + if (net != &init_net) + return 0; + idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -134,12 +140,16 @@ */ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *protinfo; struct net_device *dev; struct net_bridge_port *p; u8 new_state; + if (net != &init_net) + return -EINVAL; + if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -155,7 +165,7 @@ if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(&init_net, ifm->ifi_index); if (!dev) return -ENODEV; diff -Nurb linux-2.6.22-570/net/bridge/br_notify.c linux-2.6.22-591/net/bridge/br_notify.c --- linux-2.6.22-570/net/bridge/br_notify.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_notify.c 2007-12-21 15:36:15.000000000 -0500 @@ -15,6 +15,7 @@ #include #include +#include #include "br_private.h" @@ -36,6 +37,9 @@ struct net_bridge_port *p = dev->br_port; struct net_bridge *br; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff -Nurb linux-2.6.22-570/net/bridge/br_private.h linux-2.6.22-591/net/bridge/br_private.h --- linux-2.6.22-570/net/bridge/br_private.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_private.h 2007-12-21 15:36:15.000000000 -0500 @@ -196,7 +196,7 @@ /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); +extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER diff -Nurb linux-2.6.22-570/net/bridge/br_stp_bpdu.c linux-2.6.22-591/net/bridge/br_stp_bpdu.c --- linux-2.6.22-570/net/bridge/br_stp_bpdu.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_stp_bpdu.c 2007-12-21 15:36:15.000000000 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -141,6 +142,9 @@ struct net_bridge *br; const unsigned char *buf; + if (dev->nd_net != &init_net) + goto err; + if (!p) goto err; diff -Nurb linux-2.6.22-570/net/bridge/br_stp_if.c linux-2.6.22-591/net/bridge/br_stp_if.c --- linux-2.6.22-570/net/bridge/br_stp_if.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_stp_if.c 2007-12-21 15:36:12.000000000 -0500 @@ -125,7 +125,7 @@ char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; - r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); if (r == 0) { br->stp_enabled = BR_USER_STP; printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_br.c linux-2.6.22-591/net/bridge/br_sysfs_br.c --- linux-2.6.22-570/net/bridge/br_sysfs_br.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_sysfs_br.c 2007-12-21 15:36:12.000000000 -0500 @@ -360,8 +360,9 @@ * * Returns the number of bytes read. */ -static ssize_t brforward_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t brforward_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = to_dev(kobj); struct net_bridge *br = to_bridge(dev); @@ -383,8 +384,7 @@ static struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, - .mode = S_IRUGO, - .owner = THIS_MODULE, }, + .mode = S_IRUGO, }, .read = brforward_read, }; diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_if.c linux-2.6.22-591/net/bridge/br_sysfs_if.c --- linux-2.6.22-570/net/bridge/br_sysfs_if.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/br_sysfs_if.c 2007-12-21 15:36:12.000000000 -0500 @@ -29,8 +29,7 @@ #define BRPORT_ATTR(_name,_mode,_show,_store) \ struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ - .mode = _mode, \ - .owner = THIS_MODULE, }, \ + .mode = _mode }, \ .show = _show, \ .store = _store, \ }; diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebt_ulog.c linux-2.6.22-591/net/bridge/netfilter/ebt_ulog.c --- linux-2.6.22-570/net/bridge/netfilter/ebt_ulog.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/bridge/netfilter/ebt_ulog.c 2007-12-21 15:36:15.000000000 -0500 @@ -301,8 +301,9 @@ spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, + EBT_ULOG_MAXNLGROUPS, NULL, NULL, + THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtable_filter.c linux-2.6.22-591/net/bridge/netfilter/ebtable_filter.c --- linux-2.6.22-570/net/bridge/netfilter/ebtable_filter.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/netfilter/ebtable_filter.c 2007-12-21 15:36:15.000000000 -0500 @@ -64,6 +64,10 @@ ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ebt_do_table(hook, pskb, in, out, &frame_filter); } diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtable_nat.c linux-2.6.22-591/net/bridge/netfilter/ebtable_nat.c --- linux-2.6.22-570/net/bridge/netfilter/ebtable_nat.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/netfilter/ebtable_nat.c 2007-12-21 15:36:15.000000000 -0500 @@ -64,6 +64,10 @@ ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ebt_do_table(hook, pskb, in, out, &frame_nat); } @@ -71,6 +75,10 @@ ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ebt_do_table(hook, pskb, in, out, &frame_nat); } diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtables.c linux-2.6.22-591/net/bridge/netfilter/ebtables.c --- linux-2.6.22-570/net/bridge/netfilter/ebtables.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/bridge/netfilter/ebtables.c 2007-12-21 15:36:15.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -1438,6 +1439,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + switch(cmd) { case EBT_SO_SET_ENTRIES: ret = do_replace(user, len); @@ -1457,6 +1461,9 @@ struct ebt_replace tmp; struct ebt_table *t; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; diff -Nurb linux-2.6.22-570/net/core/Makefile linux-2.6.22-591/net/core/Makefile --- linux-2.6.22-570/net/core/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/Makefile 2007-12-21 15:36:15.000000000 -0500 @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o + gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff -Nurb linux-2.6.22-570/net/core/dev.c linux-2.6.22-591/net/core/dev.c --- linux-2.6.22-570/net/core/dev.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/core/dev.c 2007-12-21 15:36:15.000000000 -0500 @@ -116,6 +116,7 @@ #include #include #include +#include #include #include @@ -152,9 +153,22 @@ static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback = netdev_dma_event, + }, +}; #endif /* @@ -176,25 +190,50 @@ * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -LIST_HEAD(dev_base_head); DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base_head); EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 -static struct hlist_head dev_name_head[1<dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; +} + +static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) +{ + return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; +} + +/* Device list insertion */ +static int list_netdevice(struct net_device *dev) +{ + struct net *net = dev->nd_net; + + ASSERT_RTNL(); + + write_lock_bh(&dev_base_lock); + list_add_tail(&dev->dev_list, &net->dev_base_head); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + write_unlock_bh(&dev_base_lock); + return 0; } -static inline struct hlist_head *dev_index_hash(int ifindex) +/* Device list removal */ +static void unlist_netdevice(struct net_device *dev) { - return &dev_index_head[ifindex & ((1<dev_list); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + write_unlock_bh(&dev_base_lock); } /* @@ -477,7 +516,7 @@ * If device already registered then return base of 1 * to indicate not to probe for this interface */ - if (__dev_get_by_name(name)) + if (__dev_get_by_name(&init_net, name)) return 1; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) @@ -532,11 +571,11 @@ * careful with locks. */ -struct net_device *__dev_get_by_name(const char *name) +struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; - hlist_for_each(p, dev_name_hash(name)) { + hlist_for_each(p, dev_name_hash(net, name)) { struct net_device *dev = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(dev->name, name, IFNAMSIZ)) @@ -556,12 +595,12 @@ * matching device is found. */ -struct net_device *dev_get_by_name(const char *name) +struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -579,11 +618,11 @@ * or @dev_base_lock. */ -struct net_device *__dev_get_by_index(int ifindex) +struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; - hlist_for_each(p, dev_index_hash(ifindex)) { + hlist_for_each(p, dev_index_hash(net, ifindex)) { struct net_device *dev = hlist_entry(p, struct net_device, index_hlist); if (dev->ifindex == ifindex) @@ -603,12 +642,12 @@ * dev_put to indicate they have finished with it. */ -struct net_device *dev_get_by_index(int ifindex) +struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -629,13 +668,13 @@ * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; @@ -645,12 +684,12 @@ EXPORT_SYMBOL(dev_getbyhwaddr); -struct net_device *__dev_getfirstbyhwtype(unsigned short type) +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(net, dev) if (dev->type == type) return dev; @@ -659,12 +698,12 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype); -struct net_device *dev_getfirstbyhwtype(unsigned short type) +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; rtnl_lock(); - dev = __dev_getfirstbyhwtype(type); + dev = __dev_getfirstbyhwtype(net, type); if (dev) dev_hold(dev); rtnl_unlock(); @@ -684,13 +723,13 @@ * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) +struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; @@ -727,9 +766,10 @@ } /** - * dev_alloc_name - allocate a name for a device - * @dev: device + * __dev_alloc_name - allocate a name for a device + * @net: network namespace to allocate the device name in * @name: name format string + * @buf: scratch buffer and result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -740,10 +780,9 @@ * Returns the number of the unit assigned or a negative errno code. */ -int dev_alloc_name(struct net_device *dev, const char *name) +static int __dev_alloc_name(struct net *net, const char *name, char *buf) { int i = 0; - char buf[IFNAMSIZ]; const char *p; const int max_netdevices = 8*PAGE_SIZE; long *inuse; @@ -764,14 +803,14 @@ if (!inuse) return -ENOMEM; - for_each_netdev(d) { + for_each_netdev(net, d) { if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, sizeof(buf), name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!strncmp(buf, d->name, IFNAMSIZ)) set_bit(i, inuse); } @@ -780,11 +819,9 @@ free_page((unsigned long) inuse); } - snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(buf)) { - strlcpy(dev->name, buf, IFNAMSIZ); + snprintf(buf, IFNAMSIZ, name, i); + if (!__dev_get_by_name(net, buf)) return i; - } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -793,6 +830,34 @@ return -ENFILE; } +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + char buf[IFNAMSIZ]; + struct net *net; + int ret; + + BUG_ON(!dev->nd_net); + net = dev->nd_net; + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_change_name - change name of a device @@ -805,9 +870,12 @@ int dev_change_name(struct net_device *dev, char *newname) { int err = 0; + struct net *net; ASSERT_RTNL(); + BUG_ON(!dev->nd_net); + net = dev->nd_net; if (dev->flags & IFF_UP) return -EBUSY; @@ -820,14 +888,18 @@ return err; strcpy(newname, dev->name); } - else if (__dev_get_by_name(newname)) + else if (__dev_get_by_name(net, newname)) return -EEXIST; - else + else { + if (strncmp(newname, dev->name, IFNAMSIZ)) + printk(KERN_INFO "%s renamed to %s\n", + dev->name, newname); strlcpy(dev->name, newname, IFNAMSIZ); + } device_rename(&dev->dev, dev->name); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); return err; @@ -871,12 +943,12 @@ * available in this kernel then it becomes a nop. */ -void dev_load(const char *name) +void dev_load(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); read_unlock(&dev_base_lock); if (!dev && capable(CAP_SYS_MODULE)) @@ -1019,6 +1091,8 @@ } +static int dev_boot_phase = 1; + /* * Device change register/unregister. These are not inline or static * as we export them to the world. @@ -1045,14 +1119,17 @@ rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); - if (!err) { - for_each_netdev(dev) { + if (!err && !dev_boot_phase) { + struct net *net; + for_each_net(net) { + for_each_netdev(net, dev) { nb->notifier_call(nb, NETDEV_REGISTER, dev); if (dev->flags & IFF_UP) nb->notifier_call(nb, NETDEV_UP, dev); } } + } rtnl_unlock(); return err; } @@ -1086,9 +1163,9 @@ * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers(unsigned long val, void *v) +int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - return raw_notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, dev); } /* When > 0 there are consumers of rx skb time stamps */ @@ -1510,9 +1587,11 @@ skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + if (!(dev->features & NETIF_F_GEN_CSUM) + || ((dev->features & NETIF_F_IP_CSUM) + && skb->protocol == htons(ETH_P_IP)) + || ((dev->features & NETIF_F_IPV6_CSUM) + && skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -2016,12 +2095,13 @@ * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + } } #endif return; @@ -2063,7 +2143,7 @@ * match. --pb */ -static int dev_ifname(struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -2076,7 +2156,7 @@ return -EFAULT; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); + dev = __dev_get_by_index(net, ifr.ifr_ifindex); if (!dev) { read_unlock(&dev_base_lock); return -ENODEV; @@ -2096,7 +2176,7 @@ * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char __user *arg) +static int dev_ifconf(struct net *net, char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2120,7 +2200,7 @@ */ total = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (!nx_dev_visible(current->nx_info, dev)) continue; for (i = 0; i < NPROTO; i++) { @@ -2156,6 +2236,7 @@ */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; loff_t off; struct net_device *dev; @@ -2164,7 +2245,7 @@ return SEQ_START_TOKEN; off = 1; - for_each_netdev(dev) + for_each_netdev(net, dev) if (off++ == *pos) return dev; @@ -2173,9 +2254,10 @@ void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return v == SEQ_START_TOKEN ? - first_net_device() : next_net_device((struct net_device *)v); + first_net_device(net) : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -2274,7 +2356,22 @@ static int dev_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int dev_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_seq_fops = { @@ -2282,7 +2379,7 @@ .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_seq_release, }; static const struct seq_operations softnet_seq_ops = { @@ -2434,30 +2531,49 @@ }; -static int __init dev_proc_init(void) +static int dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; - if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) - goto out_dev2; - - if (wext_proc_init()) + if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; rc = 0; out: return rc; +out_ptype: + proc_net_remove(net, "ptype"); out_softnet: - proc_net_remove("ptype"); -out_dev2: - proc_net_remove("softnet_stat"); + proc_net_remove(net, "softnet_stat"); out_dev: - proc_net_remove("dev"); + proc_net_remove(net, "dev"); goto out; } + +static void dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + proc_net_remove(net, "ptype"); + proc_net_remove(net, "softnet_stat"); + proc_net_remove(net, "dev"); +} + +static struct pernet_operations dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} #else #define dev_proc_init() 0 #endif /* CONFIG_PROC_FS */ @@ -2691,10 +2807,10 @@ /* * Perform the SIOCxIFxxx calls. */ -static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -2847,7 +2963,7 @@ * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -2860,12 +2976,12 @@ if (cmd == SIOCGIFCONF) { rtnl_lock(); - ret = dev_ifconf((char __user *) arg); + ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq __user *)arg); + return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -2895,9 +3011,9 @@ case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); read_unlock(&dev_base_lock); if (!ret) { if (colon) @@ -2909,9 +3025,9 @@ return ret; case SIOCETHTOOL: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(&ifr); + ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) @@ -2933,9 +3049,9 @@ case SIOCSIFNAME: if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) @@ -2974,9 +3090,9 @@ /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; @@ -2996,9 +3112,9 @@ if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -3007,7 +3123,7 @@ } /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(&ifr, cmd, arg); + return wext_handle_ioctl(net, &ifr, cmd, arg); return -EINVAL; } } @@ -3020,19 +3136,17 @@ * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -static int dev_new_index(void) +static int dev_new_index(struct net *net) { static int ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; - if (!__dev_get_by_index(ifindex)) + if (!__dev_get_by_index(net, ifindex)) return ifindex; } } -static int dev_boot_phase = 1; - /* Delayed registration/unregisteration */ static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); @@ -3066,6 +3180,7 @@ struct hlist_head *head; struct hlist_node *p; int ret; + struct net *net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3074,6 +3189,8 @@ /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + BUG_ON(!dev->nd_net); + net = dev->nd_net; spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -3098,12 +3215,12 @@ goto out; } - dev->ifindex = dev_new_index(); + dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence of name */ - head = dev_name_hash(dev->name); + head = dev_name_hash(net, dev->name); hlist_for_each(p, head) { struct net_device *d = hlist_entry(p, struct net_device, name_hlist); @@ -3113,6 +3230,22 @@ } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { @@ -3164,12 +3297,8 @@ set_bit(__LINK_STATE_PRESENT, &dev->state); dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &dev_base_head); - hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - write_unlock_bh(&dev_base_lock); + list_netdevice(dev); /* Notify protocols, that a new device appeared. */ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); @@ -3379,6 +3508,7 @@ dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; + dev->nd_net = &init_net; if (sizeof_priv) dev->priv = netdev_priv(dev); @@ -3457,11 +3587,7 @@ dev_close(dev); /* And unlink it from device chain. */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); + unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; @@ -3519,6 +3645,122 @@ EXPORT_SYMBOL(unregister_netdev); +/** + * dev_change_net_namespace - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + */ + +int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +{ + char buf[IFNAMSIZ]; + const char *destname; + int err; + + ASSERT_RTNL(); + + /* Don't allow namespace local devices to be moved. */ + err = -EINVAL; + if (dev->features & NETIF_F_NETNS_LOCAL) + goto out; + + /* Ensure the device has been registrered */ + err = -EINVAL; + if (dev->reg_state != NETREG_REGISTERED) + goto out; + + /* Get out if there is nothing todo */ + err = 0; + if (dev->nd_net == net) + goto out; + + /* Pick the destination device name, and ensure + * we can use it in the destination network namespace. + */ + err = -EEXIST; + destname = dev->name; + if (__dev_get_by_name(net, destname)) { + /* We get here if we can't use the current device name */ + if (!pat) + goto out; + if (!dev_valid_name(pat)) + goto out; + if (strchr(pat, '%')) { + if (__dev_alloc_name(net, pat, buf) < 0) + goto out; + destname = buf; + } else + destname = pat; + if (__dev_get_by_name(net, destname)) + goto out; + } + + /* + * And now a mini version of register_netdevice unregister_netdevice. + */ + + /* If device is running close it first. */ + if (dev->flags & IFF_UP) + dev_close(dev); + + /* And unlink it from device chain */ + err = -ENODEV; + unlist_netdevice(dev); + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + + /* Actually switch the network namespace */ + dev->nd_net = net; + + /* Assign the new device name */ + if (destname != dev->name) + strcpy(dev->name, destname); + + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) { + int iflink = (dev->iflink == dev->ifindex); + dev->ifindex = dev_new_index(net); + if (iflink) + dev->iflink = dev->ifindex; + } + + /* Fixup sysfs */ + err = device_rename(&dev->dev, dev->name); + BUG_ON(err); + + /* Add the device back in the hashes */ + list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + call_netdevice_notifiers(NETDEV_REGISTER, dev); + + synchronize_net(); + err = 0; +out: + return err; +} + static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3569,12 +3811,13 @@ * This is called when the number of channels allocated to the net_dma_client * changes. The net_dma_client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) + +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3583,10 +3826,12 @@ i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3595,7 +3840,6 @@ } i++; } - rcu_read_unlock(); } /** @@ -3604,23 +3848,53 @@ * @chan: DMA channel for the event * @event: event type */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) -{ - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } break; case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return ack; } /** @@ -3628,12 +3902,10 @@ */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register(&net_dma.client); + dma_async_client_chan_request(&net_dma.client); return 0; } @@ -3679,6 +3951,75 @@ } EXPORT_SYMBOL(netdev_compute_features); +/* Initialize per network namespace state */ +static int netdev_init(struct net *net) +{ + int i; + INIT_LIST_HEAD(&net->dev_base_head); + rwlock_init(&dev_base_lock); + + net->dev_name_head = kmalloc( + sizeof(*net->dev_name_head)*NETDEV_HASHENTRIES, GFP_KERNEL); + if (!net->dev_name_head) + return -ENOMEM; + + net->dev_index_head = kmalloc( + sizeof(*net->dev_index_head)*NETDEV_HASHENTRIES, GFP_KERNEL); + if (!net->dev_index_head) { + kfree(net->dev_name_head); + return -ENOMEM; + } + + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&net->dev_name_head[i]); + + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&net->dev_index_head[i]); + + return 0; +} + +static void netdev_exit(struct net *net) +{ + kfree(net->dev_name_head); + kfree(net->dev_index_head); +} + +static struct pernet_operations netdev_net_ops = { + .init = netdev_init, + .exit = netdev_exit, +}; + +static void default_device_exit(struct net *net) +{ + struct net_device *dev, *next; + /* + * Push all migratable of the network devices back to the + * initial network namespace + */ + rtnl_lock(); + for_each_netdev_safe(net, dev, next) { + int err; + + /* Ignore unmoveable devices (i.e. loopback) */ + if (dev->features & NETIF_F_NETNS_LOCAL) + continue; + + /* Push remaing network devices to init_net */ + err = dev_change_net_namespace(dev, &init_net, "dev%d"); + if (err) { + printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); + unregister_netdevice(dev); + } + } + rtnl_unlock(); +} + +static struct pernet_operations default_device_ops = { + .exit = default_device_exit, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -3706,11 +4047,11 @@ for (i = 0; i < 16; i++) INIT_LIST_HEAD(&ptype_base[i]); - for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) - INIT_HLIST_HEAD(&dev_name_head[i]); + if (register_pernet_subsys(&netdev_net_ops)) + goto out; - for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) - INIT_HLIST_HEAD(&dev_index_head[i]); + if (register_pernet_device(&default_device_ops)) + goto out; /* * Initialise the packet receive queues. diff -Nurb linux-2.6.22-570/net/core/dev_mcast.c linux-2.6.22-591/net/core/dev_mcast.c --- linux-2.6.22-570/net/core/dev_mcast.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/dev_mcast.c 2007-12-21 15:36:15.000000000 -0500 @@ -46,6 +46,7 @@ #include #include #include +#include /* @@ -219,11 +220,12 @@ #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; struct net_device *dev; loff_t off = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (off++ == *pos) return dev; } @@ -272,7 +274,22 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_mc_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_mc_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int dev_mc_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_mc_seq_fops = { @@ -280,14 +297,31 @@ .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_mc_seq_release, }; #endif +static int dev_mc_net_init(struct net *net) +{ + if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void dev_mc_net_exit(struct net *net) +{ + proc_net_remove(net, "dev_mcast"); +} + +static struct pernet_operations dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + void __init dev_mcast_init(void) { - proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); + register_pernet_subsys(&dev_mc_net_ops); } EXPORT_SYMBOL(dev_mc_add); diff -Nurb linux-2.6.22-570/net/core/dst.c linux-2.6.22-591/net/core/dst.c --- linux-2.6.22-570/net/core/dst.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/dst.c 2007-12-21 15:36:15.000000000 -0500 @@ -15,7 +15,9 @@ #include #include #include +#include +#include #include /* Locking strategy: @@ -236,13 +238,14 @@ if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + struct net *net = dev->nd_net; + dst->dev = &net->loopback_dev; + dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = &net->loopback_dev; dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(dst->neighbour->dev); } } } @@ -252,6 +255,9 @@ struct net_device *dev = ptr; struct dst_entry *dst; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: diff -Nurb linux-2.6.22-570/net/core/ethtool.c linux-2.6.22-591/net/core/ethtool.c --- linux-2.6.22-570/net/core/ethtool.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/ethtool.c 2007-12-21 15:36:15.000000000 -0500 @@ -798,9 +798,9 @@ /* The main entry point in this file. Called from net/core/dev.c */ -int dev_ethtool(struct ifreq *ifr) +int dev_ethtool(struct net *net, struct ifreq *ifr) { - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; diff -Nurb linux-2.6.22-570/net/core/fib_rules.c linux-2.6.22-591/net/core/fib_rules.c --- linux-2.6.22-570/net/core/fib_rules.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/fib_rules.c 2007-12-21 15:36:15.000000000 -0500 @@ -11,21 +11,20 @@ #include #include #include +#include +#include #include -static LIST_HEAD(rules_ops); -static DEFINE_SPINLOCK(rules_mod_lock); - -static void notify_rule_change(int event, struct fib_rule *rule, +static void notify_rule_change(struct net *net, int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); -static struct fib_rules_ops *lookup_rules_ops(int family) +static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) { struct fib_rules_ops *ops; rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (ops->family == family) { if (!try_module_get(ops->owner)) ops = NULL; @@ -47,10 +46,10 @@ static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } -int fib_rules_register(struct fib_rules_ops *ops) +int fib_rules_register(struct net *net, struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; @@ -63,15 +62,16 @@ ops->action == NULL) return -EINVAL; - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) + spin_lock(&net->rules_mod_lock); + list_for_each_entry(o, &net->rules_ops, list) if (ops->family == o->family) goto errout; - list_add_tail_rcu(&ops->list, &rules_ops); + hold_net(net); + list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: - spin_unlock(&rules_mod_lock); + spin_unlock(&net->rules_mod_lock); return err; } @@ -88,13 +88,13 @@ } } -int fib_rules_unregister(struct fib_rules_ops *ops) +int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops) { int err = 0; struct fib_rules_ops *o; - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) { + spin_lock(&net->rules_mod_lock); + list_for_each_entry(o, &net->rules_ops, list) { if (o == ops) { list_del_rcu(&o->list); cleanup_ops(ops); @@ -104,9 +104,11 @@ err = -ENOENT; out: - spin_unlock(&rules_mod_lock); + spin_unlock(&net->rules_mod_lock); synchronize_rcu(); + if (!err) + release_net(net); return err; } @@ -197,6 +199,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -206,7 +209,7 @@ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; - ops = lookup_rules_ops(frh->family); + ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = EAFNOSUPPORT; goto errout; @@ -234,7 +237,7 @@ rule->ifindex = -1; nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); + dev = __dev_get_by_name(net, rule->ifname); if (dev) rule->ifindex = dev->ifindex; } @@ -256,7 +259,7 @@ rule->table = frh_get_table(frh, tb); if (!rule->pref && ops->default_pref) - rule->pref = ops->default_pref(); + rule->pref = ops->default_pref(ops); err = -EINVAL; if (tb[FRA_GOTO]) { @@ -319,7 +322,7 @@ else list_add_rcu(&rule->list, ops->rules_list); - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -333,6 +336,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; @@ -342,7 +346,7 @@ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; - ops = lookup_rules_ops(frh->family); + ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = EAFNOSUPPORT; goto errout; @@ -408,7 +412,7 @@ } synchronize_rcu(); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, + notify_rule_change(net, RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).pid); fib_rule_put(rule); flush_route_cache(ops); @@ -514,13 +518,17 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct fib_rules_ops *ops; int idx = 0, family; + if (net != &init_net) + return -EINVAL; + family = rtnl_msg_family(cb->nlh); if (family != AF_UNSPEC) { /* Protocol specific dump request */ - ops = lookup_rules_ops(family); + ops = lookup_rules_ops(net, family); if (ops == NULL) return -EAFNOSUPPORT; @@ -528,7 +536,7 @@ } rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (idx < cb->args[0] || !try_module_get(ops->owner)) goto skip; @@ -545,7 +553,7 @@ return skb->len; } -static void notify_rule_change(int event, struct fib_rule *rule, +static void notify_rule_change(struct net *net, int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid) { @@ -563,10 +571,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(ops->nlgroup, err); + rtnl_set_sk_err(net, ops->nlgroup, err); } static void attach_rules(struct list_head *rules, struct net_device *dev) @@ -594,19 +602,23 @@ void *ptr) { struct net_device *dev = ptr; + struct net *net = dev->nd_net; struct fib_rules_ops *ops; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); rcu_read_lock(); switch (event) { case NETDEV_REGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, &net->rules_ops, list) attach_rules(ops->rules_list, dev); break; case NETDEV_UNREGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, &net->rules_ops, list) detach_rules(ops->rules_list, dev); break; } @@ -620,13 +632,28 @@ .notifier_call = fib_rules_event, }; +static int fib_rules_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->rules_ops); + spin_lock_init(&net->rules_mod_lock); + return 0; +} + +static struct pernet_operations fib_rules_net_ops = { + .init = fib_rules_net_init, +}; + static int __init fib_rules_init(void) { + int ret; rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); - return register_netdevice_notifier(&fib_rules_notifier); + ret = register_pernet_subsys(&fib_rules_net_ops); + if (!ret) + ret = register_netdevice_notifier(&fib_rules_notifier); + return ret; } subsys_initcall(fib_rules_init); diff -Nurb linux-2.6.22-570/net/core/neighbour.c linux-2.6.22-591/net/core/neighbour.c --- linux-2.6.22-570/net/core/neighbour.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/neighbour.c 2007-12-21 15:36:15.000000000 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #define NEIGH_DEBUG 1 @@ -361,7 +362,7 @@ return n; } -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net * net, const void *pkey) { struct neighbour *n; int key_len = tbl->key_len; @@ -371,7 +372,8 @@ read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { - if (!memcmp(n->primary_key, pkey, key_len)) { + if (!memcmp(n->primary_key, pkey, key_len) && + (net == n->dev->nd_net)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -449,7 +451,8 @@ goto out; } -struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, +struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, + struct net * net, const void *pkey, struct net_device *dev, int creat) { struct pneigh_entry *n; @@ -465,6 +468,7 @@ for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { if (!memcmp(n->key, pkey, key_len) && + (n->net == net) && (n->dev == dev || !n->dev)) { read_unlock_bh(&tbl->lock); goto out; @@ -479,6 +483,7 @@ if (!n) goto out; + n->net = hold_net(net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -501,7 +506,7 @@ } -int pneigh_delete(struct neigh_table *tbl, const void *pkey, +int pneigh_delete(struct neigh_table *tbl, struct net * net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; @@ -516,13 +521,15 @@ write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { - if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { + if (!memcmp(n->key, pkey, key_len) && n->dev == dev && + (n->net == net)) { *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); + release_net(n->net); kfree(n); return 0; } @@ -545,6 +552,7 @@ tbl->pdestructor(n); if (n->dev) dev_put(n->dev); + release_net(n->net); kfree(n); continue; } @@ -1266,12 +1274,37 @@ spin_unlock(&tbl->proxy_queue.lock); } +static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, + struct net * net, int ifindex) +{ + struct neigh_parms *p; + + for (p = &tbl->parms; p; p = p->next) { + if (p->net != net) + continue; + if ((p->dev && p->dev->ifindex == ifindex) || + (!p->dev && !ifindex)) + return p; + } + + return NULL; +} struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); + struct neigh_parms *p, *ref; + struct net * net; + + net = &init_net; + if (dev) + net = dev->nd_net; + + ref = lookup_neigh_params(tbl, net, 0); + if (!ref) + return NULL; + p = kmemdup(ref, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); @@ -1287,6 +1320,7 @@ dev_hold(dev); p->dev = dev; } + p->net = hold_net(net); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1296,6 +1330,20 @@ return p; } +struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, + struct net *net) +{ + struct neigh_parms *parms; + if (net != &init_net) { + parms = neigh_parms_alloc(NULL, tbl); + release_net(parms->net); + parms->net = hold_net(net); + } + else + parms = neigh_parms_clone(&tbl->parms); + return parms; +} + static void neigh_rcu_free_parms(struct rcu_head *head) { struct neigh_parms *parms = @@ -1328,6 +1376,7 @@ void neigh_parms_destroy(struct neigh_parms *parms) { + release_net(parms->net); kfree(parms); } @@ -1338,6 +1387,7 @@ unsigned long now = jiffies; unsigned long phsize; + tbl->parms.net = &init_net; atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = @@ -1353,7 +1403,7 @@ panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); tbl->pde->proc_fops = &neigh_stat_seq_fops; @@ -1443,6 +1493,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1458,7 +1509,7 @@ ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1477,7 +1528,7 @@ goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, nla_data(dst_attr), dev); + err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); goto out_dev_put; } @@ -1508,6 +1559,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1524,7 +1576,7 @@ ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1553,7 +1605,7 @@ struct pneigh_entry *pn; err = -ENOBUFS; - pn = pneigh_lookup(tbl, dst, dev, 1); + pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { pn->flags = ndm->ndm_flags; err = 0; @@ -1748,19 +1800,6 @@ return -EMSGSIZE; } -static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, - int ifindex) -{ - struct neigh_parms *p; - - for (p = &tbl->parms; p; p = p->next) - if ((p->dev && p->dev->ifindex == ifindex) || - (!p->dev && !ifindex)) - return p; - - return NULL; -} - static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { [NDTA_NAME] = { .type = NLA_STRING }, [NDTA_THRESH1] = { .type = NLA_U32 }, @@ -1788,6 +1827,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; @@ -1837,7 +1877,7 @@ if (tbp[NDTPA_IFINDEX]) ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); - p = lookup_neigh_params(tbl, ifindex); + p = lookup_neigh_params(tbl, net, ifindex); if (p == NULL) { err = -ENOENT; goto errout_tbl_lock; @@ -1912,6 +1952,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; @@ -1931,8 +1972,11 @@ NLM_F_MULTI) <= 0) break; - for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { - if (nidx < neigh_skip) + for (nidx = 0, p = tbl->parms.next; p; p = p->next) { + if (net != p->net) + continue; + + if (nidx++ < neigh_skip) continue; if (neightbl_fill_param_info(skb, tbl, p, @@ -2003,6 +2047,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { + struct net * net = skb->sk->sk_net; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; @@ -2013,8 +2058,12 @@ continue; if (h > s_h) s_idx = 0; - for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { - if (idx < s_idx) + for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { + int lidx; + if (n->dev->nd_net != net) + continue; + lidx = idx++; + if (lidx < s_idx) continue; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -2109,6 +2158,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; struct neighbour *n = NULL; int bucket = state->bucket; @@ -2118,6 +2168,8 @@ n = tbl->hash_buckets[bucket]; while (n) { + if (n->dev->nd_net != net) + goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; void *v; @@ -2147,6 +2199,7 @@ loff_t *pos) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; if (state->neigh_sub_iter) { @@ -2158,6 +2211,8 @@ while (1) { while (n) { + if (n->dev->nd_net != net) + goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); if (v) @@ -2204,6 +2259,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; @@ -2211,6 +2267,8 @@ state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { pn = tbl->phash_buckets[bucket]; + while (pn && (pn->net != net)) + pn = pn->next; if (pn) break; } @@ -2224,6 +2282,7 @@ loff_t *pos) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; pn = pn->next; @@ -2231,6 +2290,8 @@ if (++state->bucket > PNEIGH_HASHMASK) break; pn = tbl->phash_buckets[state->bucket]; + while (pn && (pn->net != net)) + pn = pn->next; if (pn) break; } @@ -2433,6 +2494,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) { + struct net * net = n->dev->nd_net; struct sk_buff *skb; int err = -ENOBUFS; @@ -2447,10 +2509,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_NEIGH, err); + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } void neigh_app_ns(struct neighbour *n) @@ -2648,6 +2710,7 @@ if (!t) return -ENOBUFS; + t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; @@ -2716,7 +2779,7 @@ t->neigh_proto_dir[0].child = t->neigh_neigh_dir; t->neigh_root_dir[0].child = t->neigh_proto_dir; - t->sysctl_header = register_sysctl_table(t->neigh_root_dir); + t->sysctl_header = register_net_sysctl_table(p->net, t->neigh_root_dir); if (!t->sysctl_header) { err = -ENOBUFS; goto free_procname; @@ -2738,7 +2801,7 @@ if (p->sysctl_table) { struct neigh_sysctl_table *t = p->sysctl_table; p->sysctl_table = NULL; - unregister_sysctl_table(t->sysctl_header); + unregister_net_sysctl_table(t->sysctl_header); kfree(t->neigh_dev[0].procname); kfree(t); } @@ -2771,6 +2834,7 @@ EXPORT_SYMBOL(neigh_lookup); EXPORT_SYMBOL(neigh_lookup_nodev); EXPORT_SYMBOL(neigh_parms_alloc); +EXPORT_SYMBOL(neigh_parms_alloc_default); EXPORT_SYMBOL(neigh_parms_release); EXPORT_SYMBOL(neigh_rand_reach_time); EXPORT_SYMBOL(neigh_resolve_output); diff -Nurb linux-2.6.22-570/net/core/net-sysfs.c linux-2.6.22-591/net/core/net-sysfs.c --- linux-2.6.22-570/net/core/net-sysfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/net-sysfs.c 2007-12-21 15:36:15.000000000 -0500 @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -29,16 +31,16 @@ } /* use same locking rules as GIF* ioctl's */ -static ssize_t netdev_show(const struct device *dev, +static ssize_t netdev_show(const struct device *device, struct device_attribute *attr, char *buf, ssize_t (*format)(const struct net_device *, char *)) { - struct net_device *net = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(net)) - ret = (*format)(net, buf); + if (dev_isalive(dev)) + ret = (*format)(dev, buf); read_unlock(&dev_base_lock); return ret; @@ -46,9 +48,9 @@ /* generate a show function for simple field */ #define NETDEVICE_SHOW(field, format_string) \ -static ssize_t format_##field(const struct net_device *net, char *buf) \ +static ssize_t format_##field(const struct net_device *dev, char *buf) \ { \ - return sprintf(buf, format_string, net->field); \ + return sprintf(buf, format_string, dev->field); \ } \ static ssize_t show_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -58,11 +60,11 @@ /* use same locking and permission rules as SIF* ioctl's */ -static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, +static ssize_t netdev_store(struct device *device, struct device_attribute *attr, const char *buf, size_t len, int (*set)(struct net_device *, unsigned long)) { - struct net_device *net = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); char *endp; unsigned long new; int ret = -EINVAL; @@ -75,8 +77,8 @@ goto err; rtnl_lock(); - if (dev_isalive(net)) { - if ((ret = (*set)(net, new)) == 0) + if (dev_isalive(dev)) { + if ((ret = (*set)(dev, new)) == 0) ret = len; } rtnl_unlock(); @@ -103,45 +105,45 @@ return cp - buf; } -static ssize_t show_address(struct device *dev, struct device_attribute *attr, +static ssize_t show_address(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *net = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(net)) - ret = format_addr(buf, net->dev_addr, net->addr_len); + if (dev_isalive(dev)) + ret = format_addr(buf, dev->dev_addr, dev->addr_len); read_unlock(&dev_base_lock); return ret; } -static ssize_t show_broadcast(struct device *dev, +static ssize_t show_broadcast(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *net = to_net_dev(dev); - if (dev_isalive(net)) - return format_addr(buf, net->broadcast, net->addr_len); + struct net_device *dev = to_net_dev(device); + if (dev_isalive(dev)) + return format_addr(buf, dev->broadcast, dev->addr_len); return -EINVAL; } -static ssize_t show_carrier(struct device *dev, +static ssize_t show_carrier(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *netdev = to_net_dev(dev); - if (netif_running(netdev)) { - return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); + struct net_device *dev = to_net_dev(device); + if (netif_running(dev)) { + return sprintf(buf, fmt_dec, !!netif_carrier_ok(dev)); } return -EINVAL; } -static ssize_t show_dormant(struct device *dev, +static ssize_t show_dormant(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *netdev = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); - if (netif_running(netdev)) - return sprintf(buf, fmt_dec, !!netif_dormant(netdev)); + if (netif_running(dev)) + return sprintf(buf, fmt_dec, !!netif_dormant(dev)); return -EINVAL; } @@ -156,15 +158,15 @@ "up" }; -static ssize_t show_operstate(struct device *dev, +static ssize_t show_operstate(struct device *device, struct device_attribute *attr, char *buf) { - const struct net_device *netdev = to_net_dev(dev); + const struct net_device *dev = to_net_dev(device); unsigned char operstate; read_lock(&dev_base_lock); - operstate = netdev->operstate; - if (!netif_running(netdev)) + operstate = dev->operstate; + if (!netif_running(dev)) operstate = IF_OPER_DOWN; read_unlock(&dev_base_lock); @@ -177,57 +179,57 @@ /* read-write attributes */ NETDEVICE_SHOW(mtu, fmt_dec); -static int change_mtu(struct net_device *net, unsigned long new_mtu) +static int change_mtu(struct net_device *dev, unsigned long new_mtu) { - return dev_set_mtu(net, (int) new_mtu); + return dev_set_mtu(dev, (int) new_mtu); } -static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, +static ssize_t store_mtu(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_mtu); + return netdev_store(device, attr, buf, len, change_mtu); } NETDEVICE_SHOW(flags, fmt_hex); -static int change_flags(struct net_device *net, unsigned long new_flags) +static int change_flags(struct net_device *dev, unsigned long new_flags) { - return dev_change_flags(net, (unsigned) new_flags); + return dev_change_flags(dev, (unsigned) new_flags); } -static ssize_t store_flags(struct device *dev, struct device_attribute *attr, +static ssize_t store_flags(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_flags); + return netdev_store(device, attr, buf, len, change_flags); } NETDEVICE_SHOW(tx_queue_len, fmt_ulong); -static int change_tx_queue_len(struct net_device *net, unsigned long new_len) +static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - net->tx_queue_len = new_len; + dev->tx_queue_len = new_len; return 0; } -static ssize_t store_tx_queue_len(struct device *dev, +static ssize_t store_tx_queue_len(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_tx_queue_len); + return netdev_store(device, attr, buf, len, change_tx_queue_len); } NETDEVICE_SHOW(weight, fmt_dec); -static int change_weight(struct net_device *net, unsigned long new_weight) +static int change_weight(struct net_device *dev, unsigned long new_weight) { - net->weight = new_weight; + dev->weight = new_weight; return 0; } -static ssize_t store_weight(struct device *dev, struct device_attribute *attr, +static ssize_t store_weight(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_weight); + return netdev_store(device, attr, buf, len, change_weight); } static struct device_attribute net_class_attributes[] = { @@ -447,6 +449,23 @@ kfree((char *)dev - dev->padded); } +static const void *net_current_tag(void) +{ + return current->nsproxy->net_ns; +} + +static const void *net_kobject_tag(struct kobject *kobj) +{ + struct net_device *dev; + dev = container_of(kobj, struct net_device, dev.kobj); + return dev->nd_net; +} + +static const struct shadow_dir_operations net_shadow_dir_operations = { + .current_tag = net_current_tag, + .kobject_tag = net_kobject_tag, +}; + static struct class net_class = { .name = "net", .dev_release = netdev_release, @@ -454,42 +473,43 @@ #ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, #endif + .shadow_ops = &net_shadow_dir_operations, }; /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -void netdev_unregister_sysfs(struct net_device * net) +void netdev_unregister_sysfs(struct net_device * dev) { - struct device *dev = &(net->dev); + struct device *device = &(dev->dev); - kobject_get(&dev->kobj); - device_del(dev); + kobject_get(&device->kobj); + device_del(device); } /* Create sysfs entries for network device. */ -int netdev_register_sysfs(struct net_device *net) +int netdev_register_sysfs(struct net_device *dev) { - struct device *dev = &(net->dev); - struct attribute_group **groups = net->sysfs_groups; + struct device *device = &(dev->dev); + struct attribute_group **groups = dev->sysfs_groups; - device_initialize(dev); - dev->class = &net_class; - dev->platform_data = net; - dev->groups = groups; + device_initialize(device); + device->class = &net_class; + device->platform_data = dev; + device->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); + strlcpy(device->bus_id, dev->name, BUS_ID_SIZE); - if (net->get_stats) + if (dev->get_stats) *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT - if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) + if (dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif - return device_add(dev); + return device_add(device); } int netdev_sysfs_init(void) diff -Nurb linux-2.6.22-570/net/core/net_namespace.c linux-2.6.22-591/net/core/net_namespace.c --- linux-2.6.22-570/net/core/net_namespace.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/net/core/net_namespace.c 2007-12-21 15:36:15.000000000 -0500 @@ -0,0 +1,332 @@ +#include +#include +#include +#include +#include +#include +#include + +/* + * Our network namespace constructor/destructor lists + */ + +static LIST_HEAD(pernet_list); +static struct list_head *first_device = &pernet_list; +static DEFINE_MUTEX(net_mutex); + +static DEFINE_MUTEX(net_list_mutex); +LIST_HEAD(net_namespace_list); + +static struct kmem_cache *net_cachep; + +struct net init_net; +EXPORT_SYMBOL_GPL(init_net); + +void net_lock(void) +{ + mutex_lock(&net_list_mutex); +} + +void net_unlock(void) +{ + mutex_unlock(&net_list_mutex); +} + +static struct net *net_alloc(void) +{ + return kmem_cache_alloc(net_cachep, GFP_KERNEL); +} + +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct list_head *ptr; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + net_lock(); + list_del(&net->list); + net_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_prev(ptr, &pernet_list) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +/* + * setup_net runs the initializers for the network namespace object. + */ +static int setup_net(struct net *net) +{ + /* Must be called with net_mutex held */ + struct pernet_operations *ops; + struct list_head *ptr; + int error; + + memset(net, 0, sizeof(struct net)); + atomic_set(&net->count, 1); + atomic_set(&net->use_count, 0); + + error = 0; + list_for_each(ptr, &pernet_list) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->init) { + error = ops->init(net); + if (error < 0) + goto out_undo; + } + } +out: + return error; +out_undo: + /* Walk through the list backwards calling the exit functions + * for the pernet modules whose init functions did not fail. + */ + for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->exit) + ops->exit(net); + } + goto out; +} + +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + struct net *new_net = NULL; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return old_net; + + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + err = -ENOMEM; + new_net = net_alloc(); + if (!new_net) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + net_lock(); + list_add_tail(&new_net->list, &net_namespace_list); + net_unlock(); + + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + if (err) { + net_free(new_net); + new_net = ERR_PTR(err); + } + return new_net; +} + +static int __init net_ns_init(void) +{ + int err; + + printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); + net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), + SMP_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); + mutex_lock(&net_mutex); + err = setup_net(&init_net); + + net_lock(); + list_add_tail(&init_net.list, &net_namespace_list); + net_unlock(); + + mutex_unlock(&net_mutex); + if (err) + panic("Could not setup the initial network namespace"); + + return 0; +} + +pure_initcall(net_ns_init); + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + struct net *net, *undo_net; + int error; + + error = 0; + list_add_tail(&ops->list, list); + for_each_net(net) { + if (ops->init) { + error = ops->init(net); + if (error) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* If I have an error cleanup all namespaces I initialized */ + list_del(&ops->list); + for_each_net(undo_net) { + if (undo_net == net) + goto undone; + if (ops->exit) + ops->exit(undo_net); + } +undone: + goto out; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + struct net *net; + + list_del(&ops->list); + for_each_net(net) + if (ops->exit) + ops->exit(net); +} + +/** + * register_pernet_subsys - register a network namespace subsystem + * @ops: pernet operations structure for the subsystem + * + * Register a subsystem which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_subsys(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(first_device, ops); + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_subsys); + +/** + * unregister_pernet_subsys - unregister a network namespace subsystem + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_subsys(struct pernet_operations *module) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(module); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_subsys); + +/** + * register_pernet_device - register a network namespace device + * @ops: pernet operations structure for the subsystem + * + * Register a device which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_device(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(&pernet_list, ops); + if (!error && (first_device == &pernet_list)) + first_device = &ops->list; + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_device); + +/** + * unregister_pernet_device - unregister a network namespace netdevice + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_device(struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_device); diff -Nurb linux-2.6.22-570/net/core/netpoll.c linux-2.6.22-591/net/core/netpoll.c --- linux-2.6.22-570/net/core/netpoll.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/core/netpoll.c 2007-12-21 15:36:15.000000000 -0500 @@ -503,7 +503,8 @@ np->rx_hook(np, ntohs(uh->source), (char *)(uh+1), - ulen - sizeof(struct udphdr)); + ulen - sizeof(struct udphdr), + skb); kfree_skb(skb); return 1; @@ -633,7 +634,7 @@ int err; if (np->dev_name) - ndev = dev_get_by_name(np->dev_name); + ndev = dev_get_by_name(&init_net, np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); diff -Nurb linux-2.6.22-570/net/core/pktgen.c linux-2.6.22-591/net/core/pktgen.c --- linux-2.6.22-570/net/core/pktgen.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/core/pktgen.c 2007-12-21 15:36:15.000000000 -0500 @@ -155,6 +155,7 @@ #include #include #include +#include #include #include #include @@ -1903,6 +1904,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ @@ -1933,7 +1937,7 @@ pkt_dev->odev = NULL; } - odev = dev_get_by_name(ifname); + odev = dev_get_by_name(&init_net, ifname); if (!odev) { printk("pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -3284,6 +3288,8 @@ set_current_state(TASK_INTERRUPTIBLE); + set_freezable(); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3568,7 +3574,7 @@ printk(version); - pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) return -ENODEV; pg_proc_dir->owner = THIS_MODULE; @@ -3577,7 +3583,7 @@ if (pe == NULL) { printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3600,7 +3606,7 @@ printk("pktgen: ERROR: Initialization failed for all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -ENODEV; } @@ -3627,7 +3633,7 @@ /* Clean up proc file system */ remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); } module_init(pg_init); diff -Nurb linux-2.6.22-570/net/core/rtnetlink.c linux-2.6.22-591/net/core/rtnetlink.c --- linux-2.6.22-570/net/core/rtnetlink.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/core/rtnetlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -59,7 +59,6 @@ }; static DEFINE_MUTEX(rtnl_mutex); -static struct sock *rtnl; void rtnl_lock(void) { @@ -73,9 +72,17 @@ void rtnl_unlock(void) { + struct net *net; mutex_unlock(&rtnl_mutex); + + net_lock(); + for_each_net(net) { + struct sock *rtnl = net->rtnl; if (rtnl && rtnl->sk_receive_queue.qlen) rtnl->sk_data_ready(rtnl, 0); + } + net_unlock(); + netdev_run_todo(); } @@ -97,6 +104,19 @@ return 0; } +int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len) +{ + if (RTA_PAYLOAD(rta) < len) + return -1; + if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { + rta = RTA_DATA(rta) + RTA_ALIGN(len); + return rtattr_parse_nested(tb, maxattr, rta); + } + memset(tb, 0, sizeof(struct rtattr *) * maxattr); + return 0; +} + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; static inline int rtm_msgindex(int msgtype) @@ -243,6 +263,143 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all); +static LIST_HEAD(link_ops); + +/** + * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that create devices during module initialization. It + * must be called before registering the devices. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_link_register(struct rtnl_link_ops *ops) +{ + list_add_tail(&ops->list, &link_ops); + return 0; +} + +EXPORT_SYMBOL_GPL(__rtnl_link_register); + +/** + * rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_link_register(struct rtnl_link_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_link_register(ops); + rtnl_unlock(); + return err; +} + +EXPORT_SYMBOL_GPL(rtnl_link_register); + +/** + * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that unregister devices during module unloading. It must + * be called after unregistering the devices. + */ +void __rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + list_del(&ops->list); +} + +EXPORT_SYMBOL_GPL(__rtnl_link_unregister); + +/** + * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + */ +void rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + rtnl_lock(); + __rtnl_link_unregister(ops); + rtnl_unlock(); +} + +EXPORT_SYMBOL_GPL(rtnl_link_unregister); + +static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +{ + const struct rtnl_link_ops *ops; + + list_for_each_entry(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) + return ops; + } + return NULL; +} + +static size_t rtnl_link_get_size(const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + size_t size; + + if (!ops) + return 0; + + size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ + nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ + + if (ops->get_size) + /* IFLA_INFO_DATA + nested data */ + size += nlmsg_total_size(sizeof(struct nlattr)) + + ops->get_size(dev); + + if (ops->get_xstats_size) + size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ + + return size; +} + +static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + struct nlattr *linkinfo, *data; + int err = -EMSGSIZE; + + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + if (linkinfo == NULL) + goto out; + + if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) + goto err_cancel_link; + if (ops->fill_xstats) { + err = ops->fill_xstats(skb, dev); + if (err < 0) + goto err_cancel_link; + } + if (ops->fill_info) { + data = nla_nest_start(skb, IFLA_INFO_DATA); + if (data == NULL) + goto err_cancel_link; + err = ops->fill_info(skb, dev); + if (err < 0) + goto err_cancel_data; + nla_nest_end(skb, data); + } + + nla_nest_end(skb, linkinfo); + return 0; + +err_cancel_data: + nla_nest_cancel(skb, data); +err_cancel_link: + nla_nest_cancel(skb, linkinfo); +out: + return err; +} + static const int rtm_min[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), @@ -296,8 +453,9 @@ return ret; } -int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) { + struct sock *rtnl = net->rtnl; int err = 0; NETLINK_CB(skb).dst_group = group; @@ -309,14 +467,17 @@ return err; } -int rtnl_unicast(struct sk_buff *skb, u32 pid) +int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) { + struct sock *rtnl = net->rtnl; + return nlmsg_unicast(rtnl, skb, pid); } -int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { + struct sock *rtnl = net->rtnl; int report = 0; if (nlh) @@ -325,8 +486,10 @@ return nlmsg_notify(rtnl, skb, pid, group, report, flags); } -void rtnl_set_sk_err(u32 group, int error) +void rtnl_set_sk_err(struct net *net, u32 group, int error) { + struct sock *rtnl = net->rtnl; + netlink_set_err(rtnl, 0, group, error); } @@ -437,7 +600,7 @@ a->tx_compressed = b->tx_compressed; }; -static inline size_t if_nlmsg_size(void) +static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -452,7 +615,8 @@ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, @@ -522,6 +686,11 @@ } } + if (dev->rtnl_link_ops) { + if (rtnl_link_fill(skb, dev) < 0) + goto nla_put_failure; + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -531,12 +700,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx; int s_idx = cb->args[0]; struct net_device *dev; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (!nx_dev_visible(skb->sk->sk_nx_info, dev)) continue; if (idx < s_idx) @@ -555,6 +725,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, + [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, @@ -563,44 +735,16 @@ [IFLA_LINKMODE] = { .type = NLA_U8 }, }; -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct ifinfomsg *ifm; - struct net_device *dev; - int err, send_addr_notify = 0, modified = 0; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - err = -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); - else - goto errout; - - if (dev == NULL) { - err = -ENODEV; - goto errout; - } - - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - goto errout_dev; +static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { + [IFLA_INFO_KIND] = { .type = NLA_STRING }, + [IFLA_INFO_DATA] = { .type = NLA_NESTED }, +}; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - goto errout_dev; +static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, + struct nlattr **tb, char *ifname, int modified) +{ + int send_addr_notify = 0; + int err; if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; @@ -608,12 +752,12 @@ if (!dev->set_config) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } u_map = nla_data(tb[IFLA_MAP]); @@ -626,7 +770,7 @@ err = dev->set_config(dev, &k_map); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -637,19 +781,19 @@ if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto errout_dev; + goto errout; } sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), @@ -657,7 +801,7 @@ err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto errout_dev; + goto errout; send_addr_notify = 1; modified = 1; } @@ -665,7 +809,7 @@ if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -677,7 +821,7 @@ if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -686,7 +830,6 @@ send_addr_notify = 1; } - if (ifm->ifi_flags || ifm->ifi_change) { unsigned int flags = ifm->ifi_flags; @@ -714,7 +857,7 @@ err = 0; -errout_dev: +errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " "some changes comitted already. Interface %s may " @@ -723,14 +866,237 @@ if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} + +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = skb->sk->sk_net; + struct ifinfomsg *ifm; + struct net_device *dev; + int err; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + err = -EINVAL; + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = dev_get_by_index(net, ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = dev_get_by_name(net, ifname); + else + goto errout; + + if (dev == NULL) { + err = -ENODEV; + goto errout; + } + + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; + + err = do_setlink(dev, ifm, tb, ifname, 0); +errout_dev: dev_put(dev); errout: return err; } +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = skb->sk->sk_net; + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(net, ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(net, ifname); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + ops = dev->rtnl_link_ops; + if (!ops) + return -EOPNOTSUPP; + + ops->dellink(dev); + return 0; +} + +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = skb->sk->sk_net; + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char kind[MODULE_NAME_LEN]; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + int err; + +replay: + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(net, ifm->ifi_index); + else if (ifname[0]) + dev = __dev_get_by_name(net, ifname); + else + dev = NULL; + + if (tb[IFLA_LINKINFO]) { + err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], ifla_info_policy); + if (err < 0) + return err; + } else + memset(linkinfo, 0, sizeof(linkinfo)); + + if (linkinfo[IFLA_INFO_KIND]) { + nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); + } else { + kind[0] = '\0'; + ops = NULL; + } + + if (1) { + struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + + if (ops) { + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + err = nla_parse_nested(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy); + if (err < 0) + return err; + data = attr; + } + if (ops->validate) { + err = ops->validate(tb, data); + if (err < 0) + return err; + } + } + + if (dev) { + int modified = 0; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || + !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data); + if (err < 0) + return err; + modified = 1; + } + + return do_setlink(dev, ifm, tb, ifname, modified); + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENODEV; + + if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) + return -EOPNOTSUPP; + if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] || + tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { +#ifdef CONFIG_KMOD + if (kind[0]) { + __rtnl_unlock(); + request_module("rtnl-link-%s", kind); + rtnl_lock(); + ops = rtnl_link_ops_get(kind); + if (ops) + goto replay; + } +#endif + return -EOPNOTSUPP; + } + + if (!ifname[0]) + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + return -ENOMEM; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + err = ops->newlink(dev, tb, data); +err_free: + if (err < 0) + free_netdev(dev); + return err; + } +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -743,13 +1109,13 @@ ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); if (dev == NULL) return -ENODEV; } else return -EINVAL; - nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -763,7 +1129,7 @@ kfree_skb(nskb); goto errout; } - err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); errout: dev_put(dev); @@ -796,13 +1162,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { + struct net *net = dev->nd_net; struct sk_buff *skb; int err = -ENOBUFS; if (!nx_dev_visible(current->nx_info, dev)) return; - skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (skb == NULL) goto errout; @@ -813,10 +1180,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ @@ -827,6 +1194,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = skb->sk->sk_net; rtnl_doit_func doit; int sz_idx, kind; int min_len; @@ -855,6 +1223,7 @@ return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + struct sock *rtnl; rtnl_dumpit_func dumpit; dumpit = rtnl_get_dumpit(family, type); @@ -862,6 +1231,7 @@ return -EOPNOTSUPP; __rtnl_unlock(); + rtnl = net->rtnl; err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); rtnl_lock(); return err; @@ -911,6 +1281,10 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); @@ -936,6 +1310,36 @@ .notifier_call = rtnetlink_event, }; + +static int rtnetlink_net_init(struct net *net) +{ + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); + if (!sk) + return -ENOMEM; + + /* Don't hold an extra reference on the namespace */ + put_net(sk->sk_net); + net->rtnl = sk; + return 0; +} + +static void rtnetlink_net_exit(struct net *net) +{ + /* At the last minute lie and say this is a socket for the + * initial network namespace. So the socket will be safe to + * free. + */ + net->rtnl->sk_net = get_net(&init_net); + sock_put(net->rtnl); +} + +static struct pernet_operations rtnetlink_net_ops = { + .init = rtnetlink_net_init, + .exit = rtnetlink_net_exit, +}; + void __init rtnetlink_init(void) { int i; @@ -948,15 +1352,16 @@ if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - &rtnl_mutex, THIS_MODULE); - if (rtnl == NULL) + if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); + netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); @@ -965,6 +1370,7 @@ EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); +EXPORT_SYMBOL(__rtattr_parse_nested_compat); EXPORT_SYMBOL(rtnetlink_put_metrics); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); diff -Nurb linux-2.6.22-570/net/core/skbuff.c linux-2.6.22-591/net/core/skbuff.c --- linux-2.6.22-570/net/core/skbuff.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/core/skbuff.c 2007-12-21 15:36:12.000000000 -0500 @@ -417,6 +417,7 @@ C(csum); C(local_df); n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->nohdr = 0; C(pkt_type); C(ip_summed); @@ -681,6 +682,7 @@ skb->network_header += off; skb->mac_header += off; skb->cloned = 0; + skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; @@ -2012,13 +2014,13 @@ skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY, NULL, NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY, NULL, NULL); } diff -Nurb linux-2.6.22-570/net/core/sock.c linux-2.6.22-591/net/core/sock.c --- linux-2.6.22-570/net/core/sock.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/core/sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -123,6 +123,7 @@ #include #include #include +#include #include #include @@ -360,6 +361,7 @@ char __user *optval, int optlen) { struct sock *sk=sock->sk; + struct net *net = sk->sk_net; struct sk_filter *filter; int val; int valbool; @@ -614,7 +616,7 @@ if (devname[0] == '\0') { sk->sk_bound_dev_if = 0; } else { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(net, devname); if (!dev) { ret = -ENODEV; break; @@ -867,7 +869,7 @@ * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, gfp_t priority, +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -888,6 +890,7 @@ */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); + sk->sk_net = get_net(net); } sock_vx_init(sk); sock_nx_init(sk); @@ -929,6 +932,7 @@ __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + put_net(sk->sk_net); vx_sock_dec(sk); clr_vx_info(&sk->sk_vx_info); sk->sk_xid = -1; @@ -943,7 +947,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); if (newsk != NULL) { struct sk_filter *filter; @@ -2017,7 +2021,7 @@ static int __init proto_init(void) { /* register /proc/net/protocols */ - return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; } subsys_initcall(proto_init); diff -Nurb linux-2.6.22-570/net/core/sysctl_net_core.c linux-2.6.22-591/net/core/sysctl_net_core.c --- linux-2.6.22-570/net/core/sysctl_net_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/core/sysctl_net_core.c 2007-12-21 15:36:15.000000000 -0500 @@ -9,25 +9,10 @@ #include #include #include +#include +#include #include -#ifdef CONFIG_SYSCTL - -extern int netdev_max_backlog; -extern int weight_p; - -extern __u32 sysctl_wmem_max; -extern __u32 sysctl_rmem_max; - -extern int sysctl_core_destroy_delay; - -#ifdef CONFIG_XFRM -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; -extern int sysctl_xfrm_larval_drop; -extern u32 sysctl_xfrm_acq_expires; -#endif - ctl_table core_table[] = { #ifdef CONFIG_NET { @@ -103,11 +88,32 @@ .mode = 0644, .proc_handler = &proc_dointvec }, +#endif /* CONFIG_NET */ + { + .ctl_name = NET_CORE_BUDGET, + .procname = "netdev_budget", + .data = &netdev_budget, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_CORE_WARNINGS, + .procname = "warnings", + .data = &net_msg_warn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + +struct ctl_table multi_core_table[] = { #ifdef CONFIG_XFRM { .ctl_name = NET_CORE_AEVENT_ETIME, .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, + .data = &init_net.sysctl_xfrm_aevent_etime, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = &proc_dointvec @@ -115,7 +121,7 @@ { .ctl_name = NET_CORE_AEVENT_RSEQTH, .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, + .data = &init_net.sysctl_xfrm_aevent_rseqth, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = &proc_dointvec @@ -123,7 +129,7 @@ { .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_larval_drop", - .data = &sysctl_xfrm_larval_drop, + .data = &init_net.sysctl_xfrm_larval_drop, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -131,38 +137,19 @@ { .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_acq_expires", - .data = &sysctl_xfrm_acq_expires, + .data = &init_net.sysctl_xfrm_acq_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, #endif /* CONFIG_XFRM */ -#endif /* CONFIG_NET */ { .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", - .data = &sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_BUDGET, - .procname = "netdev_budget", - .data = &netdev_budget, + .data = &init_net.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, - { - .ctl_name = NET_CORE_WARNINGS, - .procname = "warnings", - .data = &net_msg_warn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = 0 } + {} }; - -#endif diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.c linux-2.6.22-591/net/dccp/ccids/ccid3.c --- linux-2.6.22-570/net/dccp/ccids/ccid3.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/ccids/ccid3.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/dccp/ccids/ccid3.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald * * An implementation of the DCCP protocol * @@ -49,7 +49,6 @@ static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; -static struct dccp_li_hist *ccid3_li_hist; /* * Transmitter Half-Connection Routines @@ -194,25 +193,20 @@ * The algorithm is not applicable if RTT < 4 microseconds. */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, - struct timeval *now) + ktime_t now) { - suseconds_t delta; u32 quarter_rtts; if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ return; - delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); - DCCP_BUG_ON(delta < 0); - - quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); + quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); + quarter_rtts /= hctx->ccid3hctx_rtt / 4; if (quarter_rtts > 0) { - hctx->ccid3hctx_t_last_win_count = *now; + hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ - - ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); } } @@ -312,8 +306,8 @@ { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; - suseconds_t delay; + ktime_t now = ktime_get_real(); + s64 delay; BUG_ON(hctx == NULL); @@ -325,8 +319,6 @@ if (unlikely(skb->len == 0)) return -EBADMSG; - dccp_timestamp(sk, &now); - switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, @@ -349,7 +341,7 @@ ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = now; + hctx->ccid3hctx_t_ld = ktime_to_timeval(now); } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -361,7 +353,7 @@ break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); + delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions [RFC 3448, 4.6] @@ -371,10 +363,10 @@ * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) - return delay / 1000L; + if (delay - (s64)hctx->ccid3hctx_delta >= 1000) + return (u32)delay / 1000L; - ccid3_hc_tx_update_win_count(hctx, &now); + ccid3_hc_tx_update_win_count(hctx, now); break; case TFRC_SSTATE_TERM: DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); @@ -387,8 +379,8 @@ hctx->ccid3hctx_idle = 0; /* set the nominal send time for the next following packet */ - timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); - + hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); return 0; } @@ -819,154 +811,6 @@ return 0; } -/* calculate first loss interval - * - * returns estimated loss interval in usecs */ - -static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) -{ - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_rx_hist_entry *entry, *next, *tail = NULL; - u32 x_recv, p; - suseconds_t rtt, delta; - struct timeval tstamp = { 0, }; - int interval = 0; - int win_count = 0; - int step = 0; - u64 fval; - - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (dccp_rx_hist_entry_data_packet(entry)) { - tail = entry; - - switch (step) { - case 0: - tstamp = entry->dccphrx_tstamp; - win_count = entry->dccphrx_ccval; - step = 1; - break; - case 1: - interval = win_count - entry->dccphrx_ccval; - if (interval < 0) - interval += TFRC_WIN_COUNT_LIMIT; - if (interval > 4) - goto found; - break; - } - } - } - - if (unlikely(step == 0)) { - DCCP_WARN("%s(%p), packet history has no data packets!\n", - dccp_role(sk), sk); - return ~0; - } - - if (unlikely(interval == 0)) { - DCCP_WARN("%s(%p), Could not find a win_count interval > 0." - "Defaulting to 1\n", dccp_role(sk), sk); - interval = 1; - } -found: - if (!tail) { - DCCP_CRIT("tail is null\n"); - return ~0; - } - - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); - DCCP_BUG_ON(delta < 0); - - rtt = delta * 4 / interval; - ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", - dccp_role(sk), sk, (int)rtt); - - /* - * Determine the length of the first loss interval via inverse lookup. - * Assume that X_recv can be computed by the throughput equation - * s - * X_recv = -------- - * R * fval - * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. - */ - if (rtt == 0) { /* would result in divide-by-zero */ - DCCP_WARN("RTT==0\n"); - return ~0; - } - - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); - DCCP_BUG_ON(delta <= 0); - - x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); - if (x_recv == 0) { /* would also trigger divide-by-zero */ - DCCP_WARN("X_recv==0\n"); - if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { - DCCP_BUG("stored value of X_recv is zero"); - return ~0; - } - } - - fval = scaled_div(hcrx->ccid3hcrx_s, rtt); - fval = scaled_div32(fval, x_recv); - p = tfrc_calc_x_reverse_lookup(fval); - - ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " - "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); - - if (p == 0) - return ~0; - else - return 1000000 / p; -} - -static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) -{ - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_li_hist_entry *head; - u64 seq_temp; - - if (list_empty(&hcrx->ccid3hcrx_li_hist)) { - if (!dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) - return; - - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); - head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } else { - struct dccp_li_hist_entry *entry; - struct list_head *tail; - - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); - /* FIXME win count check removed as was wrong */ - /* should make this check with receive history */ - /* and compare there as per section 10.2 of RFC4342 */ - - /* new loss event detected */ - /* calculate last interval length */ - seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); - - if (entry == NULL) { - DCCP_BUG("out of memory - can not allocate entry"); - return; - } - - list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); - - tail = hcrx->ccid3hcrx_li_hist.prev; - list_del(tail); - kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); - - /* Create the newest interval */ - entry->dccplih_seqno = seq_loss; - entry->dccplih_interval = seq_temp; - entry->dccplih_win_count = win_loss; - } -} - static int ccid3_hc_rx_detect_loss(struct sock *sk, struct dccp_rx_hist_entry *packet) { @@ -992,7 +836,14 @@ while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) > TFRC_RECV_NUM_LATE_LOSS) { loss = 1; - ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, + dccp_li_update_li(sk, + &hcrx->ccid3hcrx_li_hist, + &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_s, + hcrx->ccid3hcrx_bytes_recv, + hcrx->ccid3hcrx_x_recv, + hcrx->ccid3hcrx_seqno_nonloss, hcrx->ccid3hcrx_ccval_nonloss); tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; dccp_inc_seqno(&tmp_seqno); @@ -1152,7 +1003,7 @@ dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ - dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); + dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist); } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) @@ -1236,19 +1087,12 @@ if (ccid3_tx_hist == NULL) goto out_free_rx; - ccid3_li_hist = dccp_li_hist_new("ccid3"); - if (ccid3_li_hist == NULL) - goto out_free_tx; - rc = ccid_register(&ccid3); if (rc != 0) - goto out_free_loss_interval_history; + goto out_free_tx; out: return rc; -out_free_loss_interval_history: - dccp_li_hist_delete(ccid3_li_hist); - ccid3_li_hist = NULL; out_free_tx: dccp_tx_hist_delete(ccid3_tx_hist); ccid3_tx_hist = NULL; @@ -1271,10 +1115,6 @@ dccp_rx_hist_delete(ccid3_rx_hist); ccid3_rx_hist = NULL; } - if (ccid3_li_hist != NULL) { - dccp_li_hist_delete(ccid3_li_hist); - ccid3_li_hist = NULL; - } } module_exit(ccid3_module_exit); diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.h linux-2.6.22-591/net/dccp/ccids/ccid3.h --- linux-2.6.22-570/net/dccp/ccids/ccid3.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/ccids/ccid3.h 2007-12-21 15:36:12.000000000 -0500 @@ -36,6 +36,7 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ +#include #include #include #include @@ -108,10 +109,10 @@ enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; - struct timeval ccid3hctx_t_last_win_count; + ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; - struct timeval ccid3hctx_t_nom; + ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.c --- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/dccp/ccids/lib/loss_interval.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify @@ -15,58 +15,38 @@ #include #include "../../dccp.h" #include "loss_interval.h" +#include "packet_history.h" +#include "tfrc.h" -struct dccp_li_hist *dccp_li_hist_new(const char *name) -{ - struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_li_hist_mask[] = "li_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; +#define DCCP_LI_HIST_IVAL_F_LENGTH 8 - sprintf(slab_name, dccp_li_hist_mask, name); - hist->dccplih_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_li_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccplih_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} +struct dccp_li_hist_entry { + struct list_head dccplih_node; + u64 dccplih_seqno:48, + dccplih_win_count:4; + u32 dccplih_interval; +}; -EXPORT_SYMBOL_GPL(dccp_li_hist_new); +struct kmem_cache *dccp_li_cachep __read_mostly; -void dccp_li_hist_delete(struct dccp_li_hist *hist) +static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) { - const char* name = kmem_cache_name(hist->dccplih_slab); - - kmem_cache_destroy(hist->dccplih_slab); - kfree(name); - kfree(hist); + return kmem_cache_alloc(dccp_li_cachep, prio); } -EXPORT_SYMBOL_GPL(dccp_li_hist_delete); +static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(dccp_li_cachep, entry); +} -void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) +void dccp_li_hist_purge(struct list_head *list) { struct dccp_li_hist_entry *entry, *next; list_for_each_entry_safe(entry, next, list, dccplih_node) { list_del_init(&entry->dccplih_node); - kmem_cache_free(hist->dccplih_slab, entry); + kmem_cache_free(dccp_li_cachep, entry); } } @@ -118,16 +98,16 @@ EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, const u64 seq_loss, const u8 win_loss) +static int dccp_li_hist_interval_new(struct list_head *list, + const u64 seq_loss, const u8 win_loss) { struct dccp_li_hist_entry *entry; int i; for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); + entry = dccp_li_hist_entry_new(GFP_ATOMIC); if (entry == NULL) { - dccp_li_hist_purge(hist, list); + dccp_li_hist_purge(list); DCCP_BUG("loss interval list entry is NULL"); return 0; } @@ -140,4 +120,176 @@ return 1; } -EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); +/* calculate first loss interval + * + * returns estimated loss interval in usecs */ +static u32 dccp_li_calc_first_li(struct sock *sk, + struct list_head *hist_list, + struct timeval *last_feedback, + u16 s, u32 bytes_recv, + u32 previous_x_recv) +{ + struct dccp_rx_hist_entry *entry, *next, *tail = NULL; + u32 x_recv, p; + suseconds_t rtt, delta; + struct timeval tstamp = { 0, 0 }; + int interval = 0; + int win_count = 0; + int step = 0; + u64 fval; + + list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { + if (dccp_rx_hist_entry_data_packet(entry)) { + tail = entry; + + switch (step) { + case 0: + tstamp = entry->dccphrx_tstamp; + win_count = entry->dccphrx_ccval; + step = 1; + break; + case 1: + interval = win_count - entry->dccphrx_ccval; + if (interval < 0) + interval += TFRC_WIN_COUNT_LIMIT; + if (interval > 4) + goto found; + break; + } + } + } + + if (unlikely(step == 0)) { + DCCP_WARN("%s(%p), packet history has no data packets!\n", + dccp_role(sk), sk); + return ~0; + } + + if (unlikely(interval == 0)) { + DCCP_WARN("%s(%p), Could not find a win_count interval > 0." + "Defaulting to 1\n", dccp_role(sk), sk); + interval = 1; + } +found: + if (!tail) { + DCCP_CRIT("tail is null\n"); + return ~0; + } + + delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + + rtt = delta * 4 / interval; + dccp_pr_debug("%s(%p), approximated RTT to %dus\n", + dccp_role(sk), sk, (int)rtt); + + /* + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = -------- + * R * fval + * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. + */ + if (rtt == 0) { /* would result in divide-by-zero */ + DCCP_WARN("RTT==0\n"); + return ~0; + } + + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, last_feedback); + DCCP_BUG_ON(delta <= 0); + + x_recv = scaled_div32(bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if (previous_x_recv == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0; + } + x_recv = previous_x_recv; + } + + fval = scaled_div(s, rtt); + fval = scaled_div32(fval, x_recv); + p = tfrc_calc_x_reverse_lookup(fval); + + dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " + "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); + + if (p == 0) + return ~0; + else + return 1000000 / p; +} + +void dccp_li_update_li(struct sock *sk, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, u16 s, u32 bytes_recv, + u32 previous_x_recv, u64 seq_loss, u8 win_loss) +{ + struct dccp_li_hist_entry *head; + u64 seq_temp; + + if (list_empty(li_hist_list)) { + if (!dccp_li_hist_interval_new(li_hist_list, seq_loss, + win_loss)) + return; + + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list, + last_feedback, + s, bytes_recv, + previous_x_recv); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(GFP_ATOMIC); + + if (entry == NULL) { + DCCP_BUG("out of memory - can not allocate entry"); + return; + } + + list_add(&entry->dccplih_node, li_hist_list); + + tail = li_hist_list->prev; + list_del(tail); + kmem_cache_free(dccp_li_cachep, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } +} + +EXPORT_SYMBOL_GPL(dccp_li_update_li); + +static __init int dccp_li_init(void) +{ + dccp_li_cachep = kmem_cache_create("dccp_li_hist", + sizeof(struct dccp_li_hist_entry), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + return dccp_li_cachep == NULL ? -ENOBUFS : 0; +} + +static __exit void dccp_li_exit(void) +{ + kmem_cache_destroy(dccp_li_cachep); +} + +module_init(dccp_li_init); +module_exit(dccp_li_exit); diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.h --- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.h 2007-12-21 15:36:12.000000000 -0500 @@ -3,8 +3,8 @@ /* * net/dccp/ccids/lib/loss_interval.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify it @@ -14,44 +14,16 @@ */ #include -#include #include -#define DCCP_LI_HIST_IVAL_F_LENGTH 8 - -struct dccp_li_hist { - struct kmem_cache *dccplih_slab; -}; - -extern struct dccp_li_hist *dccp_li_hist_new(const char *name); -extern void dccp_li_hist_delete(struct dccp_li_hist *hist); - -struct dccp_li_hist_entry { - struct list_head dccplih_node; - u64 dccplih_seqno:48, - dccplih_win_count:4; - u32 dccplih_interval; -}; - -static inline struct dccp_li_hist_entry * - dccp_li_hist_entry_new(struct dccp_li_hist *hist, - const gfp_t prio) -{ - return kmem_cache_alloc(hist->dccplih_slab, prio); -} - -static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, - struct dccp_li_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccplih_slab, entry); -} - -extern void dccp_li_hist_purge(struct dccp_li_hist *hist, - struct list_head *list); +extern void dccp_li_hist_purge(struct list_head *list); extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, const u64 seq_loss, const u8 win_loss); +extern void dccp_li_update_li(struct sock *sk, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, u16 s, + u32 bytes_recv, u32 previous_x_recv, + u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff -Nurb linux-2.6.22-570/net/dccp/dccp.h linux-2.6.22-591/net/dccp/dccp.h --- linux-2.6.22-570/net/dccp/dccp.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/dccp.h 2007-12-21 15:36:12.000000000 -0500 @@ -184,7 +184,7 @@ /* * Checksumming routines */ -static inline int dccp_csum_coverage(const struct sk_buff *skb) +static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb) { const struct dccp_hdr* dh = dccp_hdr(skb); @@ -195,7 +195,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) { - int cov = dccp_csum_coverage(skb); + unsigned int cov = dccp_csum_coverage(skb); if (cov >= skb->len) dccp_hdr(skb)->dccph_cscov = 0; diff -Nurb linux-2.6.22-570/net/dccp/ipv4.c linux-2.6.22-591/net/dccp/ipv4.c --- linux-2.6.22-570/net/dccp/ipv4.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/ipv4.c 2007-12-21 15:36:15.000000000 -0500 @@ -202,6 +202,7 @@ */ static void dccp_v4_err(struct sk_buff *skb, u32 info) { + struct net *net = skb->dev->nd_net; const struct iphdr *iph = (struct iphdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); @@ -213,13 +214,16 @@ __u64 seq; int err; + if (skb->dev->nd_net != &init_net) + return; + if (skb->len < (iph->ihl << 2) + 8) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; } sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); + iph->saddr, dh->dccph_sport, inet_iif(skb), net); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -441,7 +445,7 @@ nsk = inet_lookup_established(&dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, - inet_iif(skb)); + inet_iif(skb), sk->sk_net); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -458,7 +462,8 @@ struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + struct flowi fl = { .fl_net = &init_net, + .oif = ((struct rtable *)skb->dst)->rt_iif, .nl_u = { .ip4_u = { .daddr = ip_hdr(skb)->saddr, .saddr = ip_hdr(skb)->daddr, @@ -809,11 +814,16 @@ /* this is called when real data arrives */ static int dccp_v4_rcv(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; const struct dccp_hdr *dh; const struct iphdr *iph; struct sock *sk; int min_cov; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) @@ -852,7 +862,7 @@ * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, inet_iif(skb)); + iph->daddr, dh->dccph_dport, inet_iif(skb), net); /* * Step 2: * If no socket ... diff -Nurb linux-2.6.22-570/net/dccp/ipv6.c linux-2.6.22-591/net/dccp/ipv6.c --- linux-2.6.22-570/net/dccp/ipv6.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/ipv6.c 2007-12-21 15:36:15.000000000 -0500 @@ -94,6 +94,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = skb->dev->nd_net; struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct ipv6_pinfo *np; @@ -102,7 +103,7 @@ __u64 seq; sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, - &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); + &hdr->saddr, dh->dccph_sport, inet6_iif(skb), net); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -142,6 +143,7 @@ for now. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -242,6 +244,7 @@ int err = -1; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net, fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); @@ -358,6 +361,7 @@ &rxip6h->daddr); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); @@ -407,7 +411,7 @@ nsk = __inet6_lookup_established(&dccp_hashinfo, &iph->saddr, dh->dccph_sport, &iph->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + inet6_iif(skb), sk->sk_net); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -584,6 +588,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); if (opt != NULL && opt->srcrt != NULL) { @@ -819,6 +824,7 @@ { const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; + struct net *net = skb->dev->nd_net; struct sock *sk; int min_cov; @@ -849,7 +855,7 @@ sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr, dh->dccph_sport, &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + inet6_iif(skb), net); /* * Step 2: * If no socket ... @@ -937,6 +943,7 @@ return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; diff -Nurb linux-2.6.22-570/net/dccp/probe.c linux-2.6.22-591/net/dccp/probe.c --- linux-2.6.22-570/net/dccp/probe.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/dccp/probe.c 2007-12-21 15:36:15.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include "dccp.h" #include "ccid.h" @@ -168,7 +169,7 @@ if (IS_ERR(dccpw.fifo)) return PTR_ERR(dccpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; ret = register_jprobe(&dccp_send_probe); @@ -178,7 +179,7 @@ pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(dccpw.fifo); return ret; @@ -188,7 +189,7 @@ static __exit void dccpprobe_exit(void) { kfifo_free(dccpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); } diff -Nurb linux-2.6.22-570/net/decnet/af_decnet.c linux-2.6.22-591/net/decnet/af_decnet.c --- linux-2.6.22-570/net/decnet/af_decnet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/af_decnet.c 2007-12-21 15:36:15.000000000 -0500 @@ -131,6 +131,7 @@ #include #include #include +#include #include #include #include @@ -470,10 +471,10 @@ .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) { struct dn_scp *scp; - struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); if (!sk) goto out; @@ -674,10 +675,13 @@ -static int dn_create(struct socket *sock, int protocol) +static int dn_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + switch(sock->type) { case SOCK_SEQPACKET: if (protocol != DNPROTO_NSP) @@ -690,7 +694,7 @@ } - if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -747,7 +751,7 @@ if (dn_ntohs(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -943,6 +947,7 @@ err = -EHOSTUNREACH; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.oif = sk->sk_bound_dev_if; fl.fld_dst = dn_saddr2dn(&scp->peer); fl.fld_src = dn_saddr2dn(&scp->addr); @@ -1090,7 +1095,7 @@ cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); @@ -2085,6 +2090,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_UP: dn_dev_up(dev); @@ -2399,7 +2407,7 @@ dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops); + proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2428,7 +2436,7 @@ dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove("decnet"); + proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); } diff -Nurb linux-2.6.22-570/net/decnet/dn_dev.c linux-2.6.22-591/net/decnet/dn_dev.c --- linux-2.6.22-570/net/decnet/dn_dev.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/decnet/dn_dev.c 2007-12-21 15:36:15.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -513,7 +514,7 @@ ifr->ifr_name[IFNAMSIZ-1] = 0; #ifdef CONFIG_KMOD - dev_load(ifr->ifr_name); + dev_load(&init_net, ifr->ifr_name); #endif switch(cmd) { @@ -531,7 +532,7 @@ rtnl_lock(); - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) { + if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { ret = -ENODEV; goto done; } @@ -629,7 +630,7 @@ { struct net_device *dev; struct dn_dev *dn_dev = NULL; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev) { dn_dev = dev->dn_ptr; dev_put(dev); @@ -647,12 +648,16 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; struct dn_ifaddr *ifa, **ifap; int err = -EADDRNOTAVAIL; + if (net != &init_net) + goto errout; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) goto errout; @@ -679,6 +684,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct net_device *dev; struct dn_dev *dn_db; @@ -686,6 +692,9 @@ struct dn_ifaddr *ifa; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) return err; @@ -694,7 +703,7 @@ return -EINVAL; ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; if ((dn_db = dev->dn_ptr) == NULL) { @@ -783,24 +792,28 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); } static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, dn_idx = 0, skip_ndevs, skip_naddr; struct net_device *dev; struct dn_dev *dn_db; struct dn_ifaddr *ifa; + if (net != &init_net) + return 0; + skip_ndevs = cb->args[0]; skip_naddr = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < skip_ndevs) goto cont; else if (idx > skip_ndevs) { @@ -869,10 +882,10 @@ rv = dn_dev_get_first(dev, addr); read_unlock(&dev_base_lock); dev_put(dev); - if (rv == 0 || dev == &loopback_dev) + if (rv == 0 || dev == &init_net.loopback_dev) return rv; } - dev = &loopback_dev; + dev = &init_net.loopback_dev; dev_hold(dev); goto last_chance; } @@ -1299,7 +1312,7 @@ struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) dn_dev_down(dev); rtnl_unlock(); @@ -1310,7 +1323,7 @@ struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->flags & IFF_UP) dn_dev_up(dev); } @@ -1344,7 +1357,7 @@ return SEQ_START_TOKEN; i = 1; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1363,9 +1376,9 @@ dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1465,7 +1478,7 @@ rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); - proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1486,7 +1499,7 @@ } #endif /* CONFIG_SYSCTL */ - proc_net_remove("decnet_dev"); + proc_net_remove(&init_net, "decnet_dev"); dn_dev_devices_off(); } diff -Nurb linux-2.6.22-570/net/decnet/dn_fib.c linux-2.6.22-591/net/decnet/dn_fib.c --- linux-2.6.22-570/net/decnet/dn_fib.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/dn_fib.c 2007-12-21 15:36:15.000000000 -0500 @@ -203,8 +203,6 @@ struct flowi fl; struct dn_fib_res res; - memset(&fl, 0, sizeof(fl)); - if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; @@ -212,7 +210,7 @@ return -EINVAL; if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -223,6 +221,7 @@ } memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fld_dst = nh->nh_gw; fl.oif = nh->nh_oif; fl.fld_scope = r->rtm_scope + 1; @@ -255,7 +254,7 @@ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - dev = __dev_get_by_index(nh->nh_oif); + dev = __dev_get_by_index(&init_net, nh->nh_oif); if (dev == NULL || dev->dn_ptr == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) @@ -355,7 +354,7 @@ if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -506,10 +505,14 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); + if (net != &init_net) + return -EINVAL; + if (dn_fib_check_attr(r, rta)) return -EINVAL; @@ -522,10 +525,14 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); + if (net != &init_net) + return -EINVAL; + if (dn_fib_check_attr(r, rta)) return -EINVAL; @@ -602,7 +609,7 @@ /* Scan device list */ read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; diff -Nurb linux-2.6.22-570/net/decnet/dn_neigh.c linux-2.6.22-591/net/decnet/dn_neigh.c --- linux-2.6.22-570/net/decnet/dn_neigh.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/dn_neigh.c 2007-12-21 15:36:15.000000000 -0500 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -591,6 +592,7 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -598,12 +600,20 @@ goto out; } +static int dn_neigh_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct neigh_seq_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + static const struct file_operations dn_neigh_seq_fops = { .owner = THIS_MODULE, .open = dn_neigh_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = dn_neigh_seq_release, }; #endif @@ -611,11 +621,11 @@ void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove("decnet_neigh"); + proc_net_remove(&init_net, "decnet_neigh"); neigh_table_clear(&dn_neigh_table); } diff -Nurb linux-2.6.22-570/net/decnet/dn_nsp_out.c linux-2.6.22-591/net/decnet/dn_nsp_out.c --- linux-2.6.22-570/net/decnet/dn_nsp_out.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/dn_nsp_out.c 2007-12-21 15:36:15.000000000 -0500 @@ -91,6 +91,7 @@ } memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.oif = sk->sk_bound_dev_if; fl.fld_src = dn_saddr2dn(&scp->addr); fl.fld_dst = dn_saddr2dn(&scp->peer); diff -Nurb linux-2.6.22-570/net/decnet/dn_route.c linux-2.6.22-591/net/decnet/dn_route.c --- linux-2.6.22-570/net/decnet/dn_route.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/dn_route.c 2007-12-21 15:36:15.000000000 -0500 @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -583,6 +584,9 @@ struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; + if (dev->nd_net != &init_net) + goto dump_it; + if (dn == NULL) goto dump_it; @@ -877,13 +881,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .nl_u = { .dn_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .dn_u = { .daddr = oldflp->fld_dst, .saddr = oldflp->fld_src, .scope = RT_SCOPE_UNIVERSE, } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = init_net.loopback_dev.ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; @@ -900,11 +905,11 @@ "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), dn_ntohs(oldflp->fld_src), - oldflp->mark, loopback_dev.ifindex, oldflp->oif); + oldflp->mark, init_net.loopback_dev.ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -925,7 +930,7 @@ goto out; } read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -953,7 +958,7 @@ err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; dev_hold(dev_out); if (!fl.fld_dst) { fl.fld_dst = @@ -962,7 +967,7 @@ if (!fl.fld_dst) goto out; } - fl.oif = loopback_dev.ifindex; + fl.oif = init_net.loopback_dev.ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -995,7 +1000,7 @@ * here */ if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); + neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); if (neigh) { if ((oldflp->oif && (neigh->dev->ifindex != oldflp->oif)) || @@ -1008,7 +1013,7 @@ if (dev_out) dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; res.type = RTN_LOCAL; } else { dev_out = neigh->dev; @@ -1029,7 +1034,7 @@ /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; dev_hold(dev_out); res.type = RTN_LOCAL; goto select_source; @@ -1065,7 +1070,7 @@ fl.fld_src = fl.fld_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -1103,6 +1108,7 @@ atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.flags = DST_HOST; + rt->fl.fl_net = &init_net; rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; rt->fl.oif = oldflp->oif; @@ -1226,7 +1232,8 @@ int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .nl_u = { .dn_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .dn_u = { .daddr = cb->dst, .saddr = cb->src, .scope = RT_SCOPE_UNIVERSE, @@ -1374,6 +1381,7 @@ rt->rt_dst_map = fl.fld_dst; rt->rt_src_map = fl.fld_src; + rt->fl.fl_net = &init_net; rt->fl.fld_src = cb->src; rt->fl.fld_dst = cb->dst; rt->fl.oif = 0; @@ -1526,6 +1534,7 @@ */ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct dn_route *rt = NULL; @@ -1534,7 +1543,11 @@ struct sk_buff *skb; struct flowi fl; + if (net != &init_net) + return -EINVAL; + memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); @@ -1552,7 +1565,7 @@ if (fl.iif) { struct net_device *dev; - if ((dev = dev_get_by_index(fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1598,7 +1611,7 @@ goto out_free; } - return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); out_free: kfree_skb(skb); @@ -1611,10 +1624,14 @@ */ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct dn_route *rt; int h, s_h; int idx, s_idx; + if (net != &init_net) + return 0; + if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) return -EINVAL; if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) @@ -1814,7 +1831,7 @@ dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); @@ -1829,6 +1846,6 @@ del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove("decnet_cache"); + proc_net_remove(&init_net, "decnet_cache"); } diff -Nurb linux-2.6.22-570/net/decnet/dn_rules.c linux-2.6.22-591/net/decnet/dn_rules.c --- linux-2.6.22-570/net/decnet/dn_rules.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/dn_rules.c 2007-12-21 15:36:15.000000000 -0500 @@ -186,7 +186,10 @@ unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .dn_u = { .daddr = addr } } + }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); @@ -223,7 +226,7 @@ return -ENOBUFS; } -static u32 dn_fib_rule_default_pref(void) +static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) { struct list_head *pos; struct fib_rule *rule; @@ -240,7 +243,7 @@ return 0; } -static void dn_fib_rule_flush_cache(void) +static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) { dn_rt_cache_flush(-1); } @@ -265,12 +268,12 @@ void __init dn_fib_rules_init(void) { list_add_tail(&default_rule.common.list, &dn_fib_rules); - fib_rules_register(&dn_fib_rules_ops); + fib_rules_register(&init_net, &dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - fib_rules_unregister(&dn_fib_rules_ops); + fib_rules_unregister(&init_net, &dn_fib_rules_ops); } diff -Nurb linux-2.6.22-570/net/decnet/dn_table.c linux-2.6.22-591/net/decnet/dn_table.c --- linux-2.6.22-570/net/decnet/dn_table.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/dn_table.c 2007-12-21 15:36:15.000000000 -0500 @@ -375,10 +375,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); + rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); } static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, @@ -463,12 +463,16 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; struct hlist_node *node; int dumped = 0; + if (net != &init_net) + return 0; + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return dn_cache_dump(skb, cb); diff -Nurb linux-2.6.22-570/net/decnet/netfilter/dn_rtmsg.c linux-2.6.22-591/net/decnet/netfilter/dn_rtmsg.c --- linux-2.6.22-570/net/decnet/netfilter/dn_rtmsg.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/netfilter/dn_rtmsg.c 2007-12-21 15:36:15.000000000 -0500 @@ -93,6 +93,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + dnrmg_send_peer(*pskb); return NF_ACCEPT; } @@ -137,7 +141,8 @@ { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg = netlink_kernel_create(&init_net, + NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, dnrmg_receive_user_sk, NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); diff -Nurb linux-2.6.22-570/net/decnet/sysctl_net_decnet.c linux-2.6.22-591/net/decnet/sysctl_net_decnet.c --- linux-2.6.22-570/net/decnet/sysctl_net_decnet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/decnet/sysctl_net_decnet.c 2007-12-21 15:36:15.000000000 -0500 @@ -259,7 +259,7 @@ devname[newlen] = 0; - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; @@ -299,7 +299,7 @@ devname[*lenp] = 0; strip_it(devname); - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; diff -Nurb linux-2.6.22-570/net/econet/af_econet.c linux-2.6.22-591/net/econet/af_econet.c --- linux-2.6.22-570/net/econet/af_econet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/econet/af_econet.c 2007-12-21 15:36:15.000000000 -0500 @@ -608,12 +608,15 @@ * Create an Econet socket */ -static int econet_create(struct socket *sock, int protocol) +static int econet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct econet_sock *eo; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + /* Econet only provides datagram services. */ if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; @@ -621,7 +624,7 @@ sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1); + sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); if (sk == NULL) goto out; @@ -659,7 +662,7 @@ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; @@ -1062,6 +1065,9 @@ struct sock *sk; struct ec_device *edev = dev->ec_ptr; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1116,6 +1122,9 @@ struct net_device *dev = (struct net_device *)data; struct ec_device *edev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (msg) { case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ diff -Nurb linux-2.6.22-570/net/ieee80211/ieee80211_module.c linux-2.6.22-591/net/ieee80211/ieee80211_module.c --- linux-2.6.22-570/net/ieee80211/ieee80211_module.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ieee80211/ieee80211_module.c 2007-12-21 15:36:15.000000000 -0500 @@ -264,7 +264,7 @@ struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); @@ -273,7 +273,7 @@ e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, ieee80211_proc); if (!e) { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; return -EIO; } @@ -293,7 +293,7 @@ #ifdef CONFIG_IEEE80211_DEBUG if (ieee80211_proc) { remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; } #endif /* CONFIG_IEEE80211_DEBUG */ diff -Nurb linux-2.6.22-570/net/ipv4/Kconfig linux-2.6.22-591/net/ipv4/Kconfig --- linux-2.6.22-570/net/ipv4/Kconfig 2007-12-21 15:36:02.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -116,48 +116,6 @@ equal "cost" and chooses one of them in a non-deterministic fashion if a matching packet arrives. -config IP_ROUTE_MULTIPATH_CACHED - bool "IP: equal cost multipath with caching support (EXPERIMENTAL)" - depends on IP_ROUTE_MULTIPATH - help - Normally, equal cost multipath routing is not supported by the - routing cache. If you say Y here, alternative routes are cached - and on cache lookup a route is chosen in a configurable fashion. - - If unsure, say N. - -config IP_ROUTE_MULTIPATH_RR - tristate "MULTIPATH: round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen according to Round Robin - -config IP_ROUTE_MULTIPATH_RANDOM - tristate "MULTIPATH: random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a random fashion. Actually, - there is no weight for a route. The advantage of this policy - is that it is implemented stateless and therefore introduces only - a very small delay. - -config IP_ROUTE_MULTIPATH_WRANDOM - tristate "MULTIPATH: weighted random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a weighted random fashion. - The per route weights are the weights visible via ip route 2. As the - corresponding state management introduces some overhead routing delay - is increased. - -config IP_ROUTE_MULTIPATH_DRR - tristate "MULTIPATH: interface round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Connections are distributed in a round robin fashion over the - available interfaces. This policy makes sense if the connections - should be primarily distributed on interfaces and not on routes. - config IP_ROUTE_VERBOSE bool "IP: verbose route monitoring" depends on IP_ADVANCED_ROUTER diff -Nurb linux-2.6.22-570/net/ipv4/Makefile linux-2.6.22-591/net/ipv4/Makefile --- linux-2.6.22-570/net/ipv4/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -29,14 +29,9 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff -Nurb linux-2.6.22-570/net/ipv4/af_inet.c linux-2.6.22-591/net/ipv4/af_inet.c --- linux-2.6.22-570/net/ipv4/af_inet.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/af_inet.c 2007-12-21 15:36:15.000000000 -0500 @@ -244,7 +244,7 @@ * Create an inet socket. */ -static int inet_create(struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct list_head *p; @@ -310,6 +310,10 @@ goto out_rcu_unlock; } + err = -EPROTONOSUPPORT; + if (!(answer->flags & INET_PROTOSW_NETNS) && (net != &init_net)) + goto out_rcu_unlock; + err = -EPERM; if ((protocol == IPPROTO_ICMP) && nx_capable(answer->capability, NXC_RAW_ICMP)) @@ -326,7 +330,7 @@ BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -344,7 +348,7 @@ inet->hdrincl = 1; } - if (ipv4_config.no_pmtu_disc) + if (net->sysctl_ipv4_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -423,12 +427,12 @@ } /* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind __read_mostly; int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct inet_sock *inet = inet_sk(sk); struct nx_v4_sock_addr nsa; unsigned short snum; @@ -448,7 +452,7 @@ if (err) goto out; - chk_addr_ret = inet_addr_type(nsa.saddr); + chk_addr_ret = inet_addr_type(net, nsa.saddr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -458,7 +462,7 @@ * is temporarily down) */ err = -EADDRNOTAVAIL; - if (!sysctl_ip_nonlocal_bind && + if (!net->sysctl_ip_nonlocal_bind && !inet->freebind && nsa.saddr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && @@ -787,6 +791,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; int err = 0; switch (cmd) { @@ -799,12 +804,12 @@ case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - err = ip_rt_ioctl(cmd, (void __user *)arg); + err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - err = arp_ioctl(cmd, (void __user *)arg); + err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -817,7 +822,7 @@ case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(cmd, (void __user *)arg); + err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: if (sk->sk_prot->ioctl) @@ -927,7 +932,8 @@ .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | - INET_PROTOSW_ICSK, + INET_PROTOSW_ICSK | + INET_PROTOSW_NETNS, }, { @@ -937,7 +943,8 @@ .ops = &inet_dgram_ops, .capability = -1, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_NETNS, }, @@ -948,7 +955,8 @@ .ops = &inet_sockraw_ops, .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_REUSE, + .flags = INET_PROTOSW_REUSE | + INET_PROTOSW_NETNS, } }; @@ -1029,8 +1037,6 @@ * Shall we try to damage output packets if routing dev changes? */ -int sysctl_ip_dynaddr __read_mostly; - static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -1059,7 +1065,7 @@ if (new_saddr == old_saddr) return 0; - if (sysctl_ip_dynaddr > 1) { + if (sk->sk_net->sysctl_ip_dynaddr > 1) { printk(KERN_INFO "%s(): shifting inet->" "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", __FUNCTION__, @@ -1098,6 +1104,7 @@ daddr = inet->opt->faddr; { struct flowi fl = { + .fl_net = sk->sk_net, .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { @@ -1127,7 +1134,7 @@ * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sysctl_ip_dynaddr || + if (!sk->sk_net->sysctl_ip_dynaddr || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) @@ -1183,6 +1190,9 @@ int ihl; int id; + if (!(features & NETIF_F_V4_CSUM)) + features &= ~NETIF_F_SG; + if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | SKB_GSO_UDP | @@ -1353,6 +1363,24 @@ .gso_segment = inet_gso_segment, }; + +static int inet_net_init(struct net *net) +{ + net->sysctl_ip_default_ttl = IPDEFTTL; + net->sysctl_ip_dynaddr = 0; + + return 0; +} + +static void inet_net_exit(struct net *net) +{ +} + +static struct pernet_operations inet_net_ops = { + .init = inet_net_init, + .exit = inet_net_exit, +}; + static int __init inet_init(void) { struct sk_buff *dummy_skb; @@ -1374,6 +1402,10 @@ if (rc) goto out_unregister_udp_proto; + rc = register_pernet_subsys(&inet_net_ops); + if (rc) + goto out_unregister_raw_proto; + /* * Tell SOCKET that we are alive... */ @@ -1450,6 +1482,8 @@ rc = 0; out: return rc; +out_unregister_raw_proto: + proto_unregister(&raw_prot); out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: @@ -1472,15 +1506,11 @@ goto out_tcp; if (udp4_proc_init()) goto out_udp; - if (fib_proc_init()) - goto out_fib; if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: - fib_proc_exit(); -out_fib: udp4_proc_exit(); out_udp: tcp4_proc_exit(); @@ -1516,4 +1546,3 @@ EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); EXPORT_SYMBOL(net_statistics); -EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); diff -Nurb linux-2.6.22-570/net/ipv4/ah4.c linux-2.6.22-591/net/ipv4/ah4.c --- linux-2.6.22-570/net/ipv4/ah4.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/ah4.c 2007-12-21 15:36:15.000000000 -0500 @@ -198,6 +198,9 @@ struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; + if (skb->dev->nd_net != &init_net) + return; + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; @@ -339,3 +342,4 @@ module_init(ah4_init); module_exit(ah4_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH); diff -Nurb linux-2.6.22-570/net/ipv4/arp.c linux-2.6.22-591/net/ipv4/arp.c --- linux-2.6.22-570/net/ipv4/arp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/arp.c 2007-12-21 15:36:15.000000000 -0500 @@ -109,6 +109,7 @@ #include #include #include +#include #include #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) #include @@ -235,10 +236,11 @@ { __be32 addr = *(__be32*)neigh->primary_key; struct net_device *dev = neigh->dev; + struct net *net = dev->nd_net; struct in_device *in_dev; struct neigh_parms *parms; - neigh->type = inet_addr_type(addr); + neigh->type = inet_addr_type(net, addr); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -332,6 +334,7 @@ __be32 saddr = 0; u8 *dst_ha = NULL; struct net_device *dev = neigh->dev; + struct net *net = dev->nd_net; __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); struct in_device *in_dev = in_dev_get(dev); @@ -342,14 +345,14 @@ switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ - if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL) + if (skb && inet_addr_type(net, ip_hdr(skb)->saddr) == RTN_LOCAL) saddr = ip_hdr(skb)->saddr; break; case 1: /* Restrict announcements of saddr in same subnet */ if (!skb) break; saddr = ip_hdr(skb)->saddr; - if (inet_addr_type(saddr) == RTN_LOCAL) { + if (inet_addr_type(net, saddr) == RTN_LOCAL) { /* saddr should be known to target */ if (inet_addr_onlink(in_dev, target, saddr)) break; @@ -386,6 +389,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev, __be32 sip, __be32 tip) { + struct net *net = dev->nd_net; int scope; switch (IN_DEV_ARP_IGNORE(in_dev)) { @@ -416,13 +420,15 @@ default: return 0; } - return !inet_confirm_addr(dev, sip, tip, scope); + return !inet_confirm_addr(net, dev, sip, tip, scope); } static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, - .saddr = tip } } }; + struct flowi fl = { + .fl_net = dev->nd_net, + .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } + }; struct rtable *rt; int flag = 0; /*unsigned long now; */ @@ -469,6 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; __be32 paddr; struct neighbour *n; @@ -480,7 +487,7 @@ paddr = ((struct rtable*)skb->dst)->rt_gateway; - if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) + if (arp_set_predefined(inet_addr_type(net, paddr), haddr, paddr, dev)) return 0; n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); @@ -704,6 +711,7 @@ static int arp_process(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; struct in_device *in_dev = in_dev_get(dev); struct arphdr *arp; unsigned char *arp_ptr; @@ -824,7 +832,7 @@ /* Special case: IPv4 duplicate address detection packet (RFC2131) */ if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && - inet_addr_type(tip) == RTN_LOCAL && + inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev,dev,sip,tip)) arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr); goto out; @@ -854,7 +862,7 @@ } else if (IN_DEV_FORWARD(in_dev)) { if ((rt->rt_flags&RTCF_DNAT) || (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) { + (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); @@ -877,14 +885,14 @@ n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { + if (IPV4_DEVCONF_ALL(net, ARP_ACCEPT)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && - inet_addr_type(sip) == RTN_UNICAST) + inet_addr_type(net, sip) == RTN_UNICAST) n = __neigh_lookup(&arp_tbl, &sip, dev, -1); } @@ -966,7 +974,7 @@ * Set (create) an ARP cache entry. */ -static int arp_req_set(struct arpreq *r, struct net_device * dev) +static int arp_req_set(struct net *net, struct arpreq *r, struct net_device * dev) { __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; @@ -977,17 +985,17 @@ if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); + dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; } if (mask) { - if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL) + if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL) return -ENOBUFS; return 0; } if (dev == NULL) { - IPV4_DEVCONF_ALL(PROXY_ARP) = 1; + IPV4_DEVCONF_ALL(net, PROXY_ARP) = 1; return 0; } if (__in_dev_get_rtnl(dev)) { @@ -1000,8 +1008,10 @@ if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } + }; struct rtable * rt; if ((err = ip_route_output_key(&rt, &fl)) != 0) return err; @@ -1080,7 +1090,7 @@ return err; } -static int arp_req_delete(struct arpreq *r, struct net_device * dev) +static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device * dev) { int err; __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; @@ -1090,10 +1100,10 @@ __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; if (mask == htonl(0xFFFFFFFF)) - return pneigh_delete(&arp_tbl, &ip, dev); + return pneigh_delete(&arp_tbl, net, &ip, dev); if (mask == 0) { if (dev == NULL) { - IPV4_DEVCONF_ALL(PROXY_ARP) = 0; + IPV4_DEVCONF_ALL(net, PROXY_ARP) = 0; return 0; } if (__in_dev_get_rtnl(dev)) { @@ -1107,8 +1117,10 @@ } if (dev == NULL) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } + }; struct rtable * rt; if ((err = ip_route_output_key(&rt, &fl)) != 0) return err; @@ -1133,7 +1145,7 @@ * Handle an ARP layer I/O control request. */ -int arp_ioctl(unsigned int cmd, void __user *arg) +int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) { int err; struct arpreq r; @@ -1165,7 +1177,7 @@ rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - if ((dev = __dev_get_by_name(r.arp_dev)) == NULL) + if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ @@ -1181,10 +1193,10 @@ switch (cmd) { case SIOCDARP: - err = arp_req_delete(&r, dev); + err = arp_req_delete(net, &r, dev); break; case SIOCSARP: - err = arp_req_set(&r, dev); + err = arp_req_set(net, &r, dev); break; case SIOCGARP: err = arp_req_get(&r, dev); @@ -1201,6 +1213,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); @@ -1227,6 +1242,54 @@ } +static int arp_proc_init(struct net *net); +static void arp_proc_exit(struct net *net); + + +static int arp_net_init(struct net *net) +{ + int error; + if ((error = arp_proc_init(net))) + goto out_proc; + + error = -ENOMEM; + net->arp_neigh_parms_default = neigh_parms_alloc_default(&arp_tbl, net); + if (!net->arp_neigh_parms_default) + goto out_parm; + +#ifdef CONFIG_SYSCTL + if ((error = neigh_sysctl_register( + NULL, net->arp_neigh_parms_default, + NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL))) + goto out_sysctl; +#endif + +out: + return error; + +#ifdef CONFIG_SYSCTL +out_sysctl: + neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default); +#endif +out_parm: + arp_proc_exit(net); +out_proc: + goto out; +} + +static void arp_net_exit(struct net *net) +{ +#ifdef CONFIG_SYSCTL + neigh_sysctl_unregister(net->arp_neigh_parms_default); +#endif + neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default); + arp_proc_exit(net); +} + +static struct pernet_operations arp_net_ops = { + .init = arp_net_init, + .exit = arp_net_exit, +}; /* * Called once on startup. */ @@ -1236,18 +1299,12 @@ .func = arp_rcv, }; -static int arp_proc_init(void); - void __init arp_init(void) { neigh_table_init(&arp_tbl); dev_add_pack(&arp_packet_type); - arp_proc_init(); -#ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL, NULL); -#endif + register_pernet_subsys(&arp_net_ops); register_netdevice_notifier(&arp_netdev_notifier); } @@ -1383,6 +1440,8 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); + out: return rc; out_kfree: @@ -1390,28 +1449,46 @@ goto out; } +static int arp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct neigh_seq_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + static const struct file_operations arp_seq_fops = { .owner = THIS_MODULE, .open = arp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = arp_seq_release, }; -static int __init arp_proc_init(void) +static int arp_proc_init(struct net *net) { - if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } +static void arp_proc_exit(struct net *net) +{ + proc_net_remove(net, "arp"); +} + #else /* CONFIG_PROC_FS */ -static int __init arp_proc_init(void) +static int arp_proc_init(struct net *net) { return 0; } +static void arp_proc_exit(struct net *net) +{ + return; +} + #endif /* CONFIG_PROC_FS */ EXPORT_SYMBOL(arp_broken_ops); diff -Nurb linux-2.6.22-570/net/ipv4/devinet.c linux-2.6.22-591/net/ipv4/devinet.c --- linux-2.6.22-570/net/ipv4/devinet.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/devinet.c 2007-12-21 15:36:15.000000000 -0500 @@ -63,7 +63,7 @@ #include #include -struct ipv4_devconf ipv4_devconf = { +static struct ipv4_devconf ipv4_devconf_template = { .data = { [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, @@ -72,7 +72,7 @@ }, }; -static struct ipv4_devconf ipv4_devconf_dflt = { +static struct ipv4_devconf ipv4_devconf_dflt_template = { .data = { [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, @@ -82,7 +82,7 @@ }, }; -#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) +#define IPV4_DEVCONF_DFLT(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf_dflt), attr) static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, @@ -98,7 +98,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); #ifdef CONFIG_SYSCTL -static void devinet_sysctl_register(struct in_device *in_dev, +static void devinet_sysctl_register(struct net *net, struct in_device *in_dev, struct ipv4_devconf *p); static void devinet_sysctl_unregister(struct ipv4_devconf *p); #endif @@ -149,6 +149,7 @@ static struct in_device *inetdev_init(struct net_device *dev) { + struct net *net = dev->nd_net; struct in_device *in_dev; ASSERT_RTNL(); @@ -157,7 +158,7 @@ if (!in_dev) goto out; INIT_RCU_HEAD(&in_dev->rcu_head); - memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); + memcpy(&in_dev->cnf, &net->ipv4_devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) @@ -173,7 +174,7 @@ in_dev_hold(in_dev); #ifdef CONFIG_SYSCTL - devinet_sysctl_register(in_dev, &in_dev->cnf); + devinet_sysctl_register(net, in_dev, &in_dev->cnf); #endif ip_mc_init_dev(in_dev); if (dev->flags & IFF_UP) @@ -203,8 +204,6 @@ ASSERT_RTNL(); dev = in_dev->dev; - if (dev == &loopback_dev) - return; in_dev->dead = 1; @@ -415,12 +414,12 @@ return inet_insert_ifa(ifa); } -struct in_device *inetdev_by_index(int ifindex) +struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) in_dev = in_dev_get(dev); read_unlock(&dev_base_lock); @@ -444,6 +443,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; @@ -457,7 +457,7 @@ goto errout; ifm = nlmsg_data(nlh); - in_dev = inetdev_by_index(ifm->ifa_index); + in_dev = inetdev_by_index(net, ifm->ifa_index); if (in_dev == NULL) { err = -ENODEV; goto errout; @@ -488,7 +488,7 @@ return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -507,7 +507,7 @@ goto errout; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if (dev == NULL) { err = -ENODEV; goto errout; @@ -564,11 +564,12 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct in_ifaddr *ifa; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(nlh); + ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -600,7 +601,7 @@ } -int devinet_ioctl(unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; struct sockaddr_in sin_orig; @@ -629,7 +630,7 @@ *colon = 0; #ifdef CONFIG_KMOD - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); #endif switch (cmd) { @@ -670,7 +671,7 @@ rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) goto done; if (colon) @@ -889,6 +890,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { + struct net *net = dev->nd_net; __be32 addr = 0; struct in_device *in_dev; @@ -919,7 +921,7 @@ */ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -982,7 +984,7 @@ * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) +__be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope) { __be32 addr = 0; struct in_device *in_dev; @@ -998,7 +1000,7 @@ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -1059,6 +1061,7 @@ void *ptr) { struct net_device *dev = ptr; + struct net *net = dev->nd_net; struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1066,7 +1069,7 @@ if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); - if (dev == &loopback_dev) { + if (dev == &net->loopback_dev) { if (!in_dev) panic("devinet: " "Failed to create loopback\n"); @@ -1085,7 +1088,7 @@ case NETDEV_UP: if (dev->mtu < 68) break; - if (dev == &loopback_dev) { + if (dev == &net->loopback_dev) { struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { ifa->ifa_local = @@ -1122,7 +1125,7 @@ neigh_sysctl_unregister(in_dev->arp_parms); neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL); - devinet_sysctl_register(in_dev, &in_dev->cnf); + devinet_sysctl_register(net, in_dev, &in_dev->cnf); #endif break; } @@ -1185,6 +1188,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, ip_idx; struct net_device *dev; struct in_device *in_dev; @@ -1194,7 +1198,7 @@ s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1228,6 +1232,7 @@ u32 pid) { struct sk_buff *skb; + struct net *net = ifa->ifa_dev->dev->nd_net; u32 seq = nlh ? nlh->nlmsg_seq : 0; int err = -ENOBUFS; @@ -1242,25 +1247,25 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } #ifdef CONFIG_SYSCTL -static void devinet_copy_dflt_conf(int i) +static void devinet_copy_dflt_conf(struct net *net, int i) { struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev && !test_bit(i, in_dev->cnf.state)) - in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; + in_dev->cnf.data[i] = net->ipv4_devconf_dflt->data[i]; rcu_read_unlock(); } read_unlock(&dev_base_lock); @@ -1274,12 +1279,13 @@ if (write) { struct ipv4_devconf *cnf = ctl->extra1; + struct net *net = ctl->extra2; int i = (int *)ctl->data - cnf->data; set_bit(i, cnf->state); - if (cnf == &ipv4_devconf_dflt) - devinet_copy_dflt_conf(i); + if (cnf == net->ipv4_devconf_dflt) + devinet_copy_dflt_conf(net, i); } return ret; @@ -1291,6 +1297,7 @@ { struct ipv4_devconf *cnf; int *valp = table->data; + struct net *net; int new; int i; @@ -1325,26 +1332,27 @@ *valp = new; cnf = table->extra1; + net = table->extra2; i = (int *)table->data - cnf->data; set_bit(i, cnf->state); - if (cnf == &ipv4_devconf_dflt) - devinet_copy_dflt_conf(i); + if (cnf == net->ipv4_devconf_dflt) + devinet_copy_dflt_conf(net, i); return 1; } -void inet_forward_change(void) +void inet_forward_change(struct net *net) { struct net_device *dev; - int on = IPV4_DEVCONF_ALL(FORWARDING); + int on = IPV4_DEVCONF_ALL(net, FORWARDING); - IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on; - IPV4_DEVCONF_DFLT(FORWARDING) = on; + IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; + IPV4_DEVCONF_DFLT(net, FORWARDING) = on; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -1364,11 +1372,12 @@ int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + struct net *net = ctl->extra2; if (write && *valp != val) { - if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) - inet_forward_change(); - else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) + if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) + inet_forward_change(net); + else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) rt_cache_flush(0); } @@ -1407,13 +1416,14 @@ { \ .ctl_name = NET_IPV4_CONF_ ## attr, \ .procname = name, \ - .data = ipv4_devconf.data + \ + .data = ipv4_devconf_template.data + \ NET_IPV4_CONF_ ## attr - 1, \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ .strategy = sysctl, \ - .extra1 = &ipv4_devconf, \ + .extra1 = &ipv4_devconf_template, \ + .extra2 = &init_net, \ } #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ @@ -1503,25 +1513,29 @@ }, }; -static void devinet_sysctl_register(struct in_device *in_dev, +static void devinet_sysctl_register(struct net *net, struct in_device *in_dev, struct ipv4_devconf *p) { int i; struct net_device *dev = in_dev ? in_dev->dev : NULL; - struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t), - GFP_KERNEL); + struct devinet_sysctl_table *t; char *dev_name = NULL; + t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); if (!t) return; for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { - t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; + t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf_template; t->devinet_vars[i].extra1 = p; + t->devinet_vars[i].extra2 = net; } if (dev) { dev_name = dev->name; t->devinet_dev[0].ctl_name = dev->ifindex; + } else if (p == net->ipv4_devconf) { + dev_name = "all"; + t->devinet_dev[0].ctl_name = NET_PROTO_CONF_ALL; } else { dev_name = "default"; t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; @@ -1542,7 +1556,7 @@ t->devinet_proto_dir[0].child = t->devinet_conf_dir; t->devinet_root_dir[0].child = t->devinet_proto_dir; - t->sysctl_header = register_sysctl_table(t->devinet_root_dir); + t->sysctl_header = register_net_sysctl_table(net, t->devinet_root_dir); if (!t->sysctl_header) goto free_procname; @@ -1562,26 +1576,59 @@ if (p->sysctl) { struct devinet_sysctl_table *t = p->sysctl; p->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); + unregister_net_sysctl_table(t->sysctl_header); kfree(t->devinet_dev[0].procname); kfree(t); } } #endif +static int devinet_net_init(struct net *net) +{ +#ifdef CONFIG_SYSCTL + net->ipv4_devconf = kmemdup(&ipv4_devconf_template, + sizeof(ipv4_devconf_template), GFP_KERNEL); + if (!net->ipv4_devconf) + return -ENOMEM; + + net->ipv4_devconf_dflt = kmemdup(&ipv4_devconf_dflt_template, + sizeof(ipv4_devconf_template), + GFP_KERNEL); + if (!net->ipv4_devconf_dflt) { + kfree(net->ipv4_devconf); + return -ENOMEM; + } + + devinet_sysctl_register(net, NULL, net->ipv4_devconf); + devinet_sysctl_register(net, NULL, net->ipv4_devconf_dflt); + + multi_ipv4_table[0].data = &IPV4_DEVCONF_ALL(net, FORWARDING); +#endif + return 0; +} + +static void devinet_net_exit(struct net *net) +{ +#ifdef CONFIG_SYSCTL + devinet_sysctl_unregister(net->ipv4_devconf_dflt); + devinet_sysctl_unregister(net->ipv4_devconf); +#endif +} + +static struct pernet_operations devinet_net_ops = { + .init = devinet_net_init, + .exit = devinet_net_exit, +}; + void __init devinet_init(void) { + register_pernet_subsys(&devinet_net_ops); register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); -#ifdef CONFIG_SYSCTL - devinet_sysctl.sysctl_header = - register_sysctl_table(devinet_sysctl.devinet_root_dir); - devinet_sysctl_register(NULL, &ipv4_devconf_dflt); -#endif } EXPORT_SYMBOL(in_dev_finish_destroy); diff -Nurb linux-2.6.22-570/net/ipv4/esp4.c linux-2.6.22-591/net/ipv4/esp4.c --- linux-2.6.22-570/net/ipv4/esp4.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/esp4.c 2007-12-21 15:36:15.000000000 -0500 @@ -307,6 +307,9 @@ struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; + if (skb->dev->nd_net != &init_net) + return; + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; @@ -481,3 +484,4 @@ module_init(esp4_init); module_exit(esp4_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP); diff -Nurb linux-2.6.22-570/net/ipv4/fib_frontend.c linux-2.6.22-591/net/ipv4/fib_frontend.c --- linux-2.6.22-570/net/ipv4/fib_frontend.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/fib_frontend.c 2007-12-21 15:36:15.000000000 -0500 @@ -51,38 +51,34 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -struct fib_table *ip_fib_local_table; -struct fib_table *ip_fib_main_table; - #define FIB_TABLE_HASHSZ 1 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; #else #define FIB_TABLE_HASHSZ 256 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -struct fib_table *fib_new_table(u32 id) +struct fib_table *fib_new_table(struct net *net, u32 id) { struct fib_table *tb; unsigned int h; if (id == 0) id = RT_TABLE_MAIN; - tb = fib_get_table(id); + tb = fib_get_table(net, id); if (tb) return tb; tb = fib_hash_init(id); if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); - hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); + hlist_add_head_rcu(&tb->tb_hlist, &net->ip_fib_table_hash[h]); return tb; } -struct fib_table *fib_get_table(u32 id) +struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; + struct hlist_head *head; struct hlist_node *node; unsigned int h; @@ -90,7 +86,8 @@ id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + head = &net->ip_fib_table_hash[h]; + hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -99,9 +96,10 @@ rcu_read_unlock(); return NULL; } + #endif /* CONFIG_IP_MULTIPLE_TABLES */ -static void fib_flush(void) +static void fib_flush(struct net *net) { int flushed = 0; struct fib_table *tb; @@ -109,7 +107,8 @@ unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + struct hlist_head *head = &net->ip_fib_table_hash[h]; + hlist_for_each_entry(tb, node, head, tb_hlist) flushed += tb->tb_flush(tb); } @@ -121,18 +120,23 @@ * Find the first device with a given source address. */ -struct net_device * ip_dev_find(__be32 addr) +struct net_device * ip_dev_find(struct net *net, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = addr } } + }; struct fib_result res; struct net_device *dev = NULL; + struct fib_table *local_table; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - if (!ip_fib_local_table || - ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res)) + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (!local_table || + local_table->tb_lookup(local_table, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; @@ -145,11 +149,15 @@ return dev; } -unsigned inet_addr_type(__be32 addr) +unsigned inet_addr_type(struct net *net, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = addr } } + }; struct fib_result res; unsigned ret = RTN_BROADCAST; + struct fib_table *local_table; if (ZERONET(addr) || BADCLASS(addr)) return RTN_BROADCAST; @@ -160,10 +168,10 @@ res.r = NULL; #endif - if (ip_fib_local_table) { + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (local_table) { ret = RTN_UNICAST; - if (!ip_fib_local_table->tb_lookup(ip_fib_local_table, - &fl, &res)) { + if (!local_table->tb_lookup(local_table, &fl, &res)) { ret = res.type; fib_res_put(&res); } @@ -183,7 +191,8 @@ struct net_device *dev, __be32 *spec_dst, u32 *itag) { struct in_device *in_dev; - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = dev->nd_net, + .nl_u = { .ip4_u = { .daddr = src, .saddr = dst, .tos = tos } }, @@ -267,13 +276,16 @@ return len + nla_total_size(4); } -static int rtentry_to_fib_config(int cmd, struct rtentry *rt, +static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, struct fib_config *cfg) { __be32 addr; int plen; memset(cfg, 0, sizeof(*cfg)); + cfg->fc_nlinfo.pid = 0; + cfg->fc_nlinfo.nlh = NULL; + cfg->fc_nlinfo.net = net; if (rt->rt_dst.sa_family != AF_INET) return -EAFNOSUPPORT; @@ -334,7 +346,7 @@ colon = strchr(devname, ':'); if (colon) *colon = 0; - dev = __dev_get_by_name(devname); + dev = __dev_get_by_name(net, devname); if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; @@ -357,7 +369,7 @@ if (rt->rt_gateway.sa_family == AF_INET && addr) { cfg->fc_gw = addr; if (rt->rt_flags & RTF_GATEWAY && - inet_addr_type(addr) == RTN_UNICAST) + inet_addr_type(net, addr) == RTN_UNICAST) cfg->fc_scope = RT_SCOPE_UNIVERSE; } @@ -398,7 +410,7 @@ * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ -int ip_rt_ioctl(unsigned int cmd, void __user *arg) +int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct fib_config cfg; struct rtentry rt; @@ -414,18 +426,18 @@ return -EFAULT; rtnl_lock(); - err = rtentry_to_fib_config(cmd, &rt, &cfg); + err = rtentry_to_fib_config(net, cmd, &rt, &cfg); if (err == 0) { struct fib_table *tb; if (cmd == SIOCDELRT) { - tb = fib_get_table(cfg.fc_table); + tb = fib_get_table(net, cfg.fc_table); if (tb) err = tb->tb_delete(tb, &cfg); else err = -ESRCH; } else { - tb = fib_new_table(cfg.fc_table); + tb = fib_new_table(net, cfg.fc_table); if (tb) err = tb->tb_insert(tb, &cfg); else @@ -453,7 +465,6 @@ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, - [RTA_MP_ALGO] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -481,6 +492,7 @@ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; + cfg->fc_nlinfo.net = skb->sk->sk_net; if (cfg->fc_type > RTN_MAX) { err = -EINVAL; @@ -515,9 +527,6 @@ case RTA_FLOW: cfg->fc_flow = nla_get_u32(attr); break; - case RTA_MP_ALGO: - cfg->fc_mp_alg = nla_get_u32(attr); - break; case RTA_TABLE: cfg->fc_table = nla_get_u32(attr); break; @@ -531,6 +540,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_config cfg; struct fib_table *tb; int err; @@ -539,7 +549,7 @@ if (err < 0) goto errout; - tb = fib_get_table(cfg.fc_table); + tb = fib_get_table(net, cfg.fc_table); if (tb == NULL) { err = -ESRCH; goto errout; @@ -552,6 +562,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_config cfg; struct fib_table *tb; int err; @@ -560,7 +571,7 @@ if (err < 0) goto errout; - tb = fib_new_table(cfg.fc_table); + tb = fib_new_table(net, cfg.fc_table); if (tb == NULL) { err = -ENOBUFS; goto errout; @@ -573,6 +584,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; @@ -587,8 +599,9 @@ s_e = cb->args[1]; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + struct hlist_head *head = &net->ip_fib_table_hash[h]; e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + hlist_for_each_entry(tb, node, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -617,6 +630,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { + struct net *net = ifa->ifa_dev->dev->nd_net; struct fib_table *tb; struct fib_config cfg = { .fc_protocol = RTPROT_KERNEL, @@ -626,12 +640,13 @@ .fc_prefsrc = ifa->ifa_local, .fc_oif = ifa->ifa_dev->dev->ifindex, .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, + .fc_nlinfo.net = net, }; if (type == RTN_UNICAST) - tb = fib_new_table(RT_TABLE_MAIN); + tb = fib_new_table(net, RT_TABLE_MAIN); else - tb = fib_new_table(RT_TABLE_LOCAL); + tb = fib_new_table(net, RT_TABLE_LOCAL); if (tb == NULL) return; @@ -692,6 +707,7 @@ { struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; + struct net *net = dev->nd_net; struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa; __be32 brd = ifa->ifa_address|~ifa->ifa_mask; @@ -740,15 +756,15 @@ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); /* Check, that this local address finally disappeared. */ - if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { + if (inet_addr_type(net, ifa->ifa_local) != RTN_LOCAL) { /* And the last, but not the least thing. We must flush stray FIB entries. First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down(ifa->ifa_local, NULL, 0)) - fib_flush(); + if (fib_sync_down(net, ifa->ifa_local, NULL, 0)) + fib_flush(net); } } #undef LOCAL_OK @@ -757,11 +773,12 @@ #undef BRD1_OK } -static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) +static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn, struct fib_table *tb ) { struct fib_result res; - struct flowi fl = { .mark = frn->fl_mark, + struct flowi fl = { .fl_net = net, + .mark = frn->fl_mark, .nl_u = { .ip4_u = { .daddr = frn->fl_addr, .tos = frn->fl_tos, .scope = frn->fl_scope } } }; @@ -790,6 +807,7 @@ static void nl_fib_input(struct sock *sk, int len) { + struct net *net = sk->sk_net; struct sk_buff *skb = NULL; struct nlmsghdr *nlh = NULL; struct fib_result_nl *frn; @@ -808,9 +826,9 @@ } frn = (struct fib_result_nl *) NLMSG_DATA(nlh); - tb = fib_get_table(frn->tb_id_in); + tb = fib_get_table(net, frn->tb_id_in); - nl_fib_lookup(frn, tb); + nl_fib_lookup(net, frn, tb); pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ @@ -818,16 +836,36 @@ netlink_unicast(sk, skb, pid, MSG_DONTWAIT); } -static void nl_fib_lookup_init(void) +static int nl_fib_lookup_init(struct net *net) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + int error = -ENOMEM; + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, + NULL, THIS_MODULE); + if (sk) { + /* Don't hold an extra reference on the namespace */ + put_net(sk->sk_net); + net->nlfl = sk; + error = 0; + } + return error; +} + +static void nl_fib_lookup_exit(struct net *net) +{ + /* At the last minute lie and say this is a socket for the + * initial network namespace. So the socket will be safe to + * free. + */ + net->nlfl->sk_net = get_net(&init_net); + sock_put(net->nlfl); } static void fib_disable_ip(struct net_device *dev, int force) { - if (fib_sync_down(0, dev, force)) - fib_flush(); + struct net *net = dev->nd_net; + if (fib_sync_down(net, 0, dev, force)) + fib_flush(net); rt_cache_flush(0); arp_ifdown(dev); } @@ -864,6 +902,9 @@ struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); return NOTIFY_DONE; @@ -893,6 +934,85 @@ return NOTIFY_DONE; } +static int ip_fib_net_init(struct net *net) +{ + unsigned int i; + + net->ip_fib_table_hash = kzalloc( + sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); + if (!net->ip_fib_table_hash) + return -ENOMEM; + + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&net->ip_fib_table_hash[i]); +#ifndef CONFIG_IP_MULTIPLE_TABLES + net->ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&net->ip_fib_local_table->tb_hlist, + &net->ip_fib_table_hash[0]); + net->ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&net->ip_fib_main_table->tb_hlist, + &net->ip_fib_table_hash[0]); +#else + fib4_rules_init(net); +#endif + return 0; +} + +static void ip_fib_net_exit(struct net *net) +{ + unsigned int i; + +#ifdef CONFIG_IP_MULTIPLE_TABLES + fib4_rules_exit(net); +#endif + + synchronize_rcu(); /* needed? */ + for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + struct fib_table *tb; + struct hlist_head *head; + struct hlist_node *node, *tmp; + + head = &net->ip_fib_table_hash[i]; + hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { + hlist_del(node); + fib_hash_exit(tb); + } + } + kfree(net->ip_fib_table_hash); +} + +static int fib_net_init(struct net *net) +{ + int error; + + error = 0; + if ((error = ip_fib_net_init(net))) + goto out; + if ((error = fib_info_init(net))) + goto out_info; + if ((error = nl_fib_lookup_init(net))) + goto out_nlfl; + if ((error = fib_proc_init(net))) + goto out_proc; +out: + return error; +out_proc: + nl_fib_lookup_exit(net); +out_nlfl: + fib_info_exit(net); +out_info: + ip_fib_net_exit(net); + goto out; +} + +static void fib_net_exit(struct net *net) +{ + fib_proc_exit(net); + nl_fib_lookup_exit(net); + fib_info_exit(net); + ip_fib_net_exit(net); +} + static struct notifier_block fib_inetaddr_notifier = { .notifier_call =fib_inetaddr_event, }; @@ -901,28 +1021,20 @@ .notifier_call =fib_netdev_event, }; +static struct pernet_operations fib_net_ops = { + .init = fib_net_init, + .exit = fib_net_exit, +}; + void __init ip_fib_init(void) { - unsigned int i; - - for (i = 0; i < FIB_TABLE_HASHSZ; i++) - INIT_HLIST_HEAD(&fib_table_hash[i]); -#ifndef CONFIG_IP_MULTIPLE_TABLES - ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); - ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); - hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); -#else - fib4_rules_init(); -#endif - - register_netdevice_notifier(&fib_netdev_notifier); - register_inetaddr_notifier(&fib_inetaddr_notifier); - nl_fib_lookup_init(); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); + + register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); + register_inetaddr_notifier(&fib_inetaddr_notifier); } EXPORT_SYMBOL(inet_addr_type); diff -Nurb linux-2.6.22-570/net/ipv4/fib_hash.c linux-2.6.22-591/net/ipv4/fib_hash.c --- linux-2.6.22-570/net/ipv4/fib_hash.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/fib_hash.c 2007-12-21 15:36:15.000000000 -0500 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -274,11 +275,10 @@ return err; } -static int fn_hash_last_dflt=-1; - static void fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; int order, last_idx; struct hlist_node *node; struct fib_node *f; @@ -316,12 +316,12 @@ if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, &fn_hash_last_dflt)) { + &last_idx, &net->fn_hash_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - fn_hash_last_dflt = order; + net->fn_hash_last_dflt = order; goto out; } fi = next_fi; @@ -330,16 +330,16 @@ } if (order <= 0 || fi == NULL) { - fn_hash_last_dflt = -1; + net->fn_hash_last_dflt = -1; goto out; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) { + if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->fn_hash_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - fn_hash_last_dflt = order; + net->fn_hash_last_dflt = order; goto out; } @@ -350,7 +350,7 @@ if (last_resort) atomic_inc(&last_resort->fib_clntref); } - fn_hash_last_dflt = last_idx; + net->fn_hash_last_dflt = last_idx; out: read_unlock(&fib_hash_lock); } @@ -759,11 +759,15 @@ return skb->len; } -#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_hash_exit(struct fib_table *tb) +{ + if (!tb) + return; + fn_hash_flush(tb); + kfree(tb); +} + struct fib_table * fib_hash_init(u32 id) -#else -struct fib_table * __init fib_hash_init(u32 id) -#endif { struct fib_table *tb; @@ -799,6 +803,7 @@ #ifdef CONFIG_PROC_FS struct fib_iter_state { + struct net *net; struct fn_zone *zone; int bucket; struct hlist_head *hash_head; @@ -812,7 +817,8 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) { struct fib_iter_state *iter = seq->private; - struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data; + struct fib_table *main_table = fib_get_table(iter->net, RT_TABLE_MAIN); + struct fn_hash *table = (struct fn_hash *) main_table->tb_data; iter->bucket = 0; iter->hash_head = NULL; @@ -948,10 +954,11 @@ static void *fib_seq_start(struct seq_file *seq, loff_t *pos) { + struct fib_iter_state *iter = seq->private; void *v = NULL; read_lock(&fib_hash_lock); - if (ip_fib_main_table) + if (fib_get_table(iter->net, RT_TABLE_MAIN)) v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; return v; } @@ -1051,6 +1058,7 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -1058,23 +1066,32 @@ goto out; } +static int fib_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct fib_iter_state *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations fib_seq_fops = { .owner = THIS_MODULE, .open = fib_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = fib_seq_release, }; -int __init fib_proc_init(void) +int fib_proc_init(struct net *net) { - if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops)) + net->fn_hash_last_dflt = -1; + if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) return -ENOMEM; return 0; } -void __init fib_proc_exit(void) +void fib_proc_exit(struct net *net) { - proc_net_remove("route"); + proc_net_remove(net, "route"); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/ipv4/fib_rules.c linux-2.6.22-591/net/ipv4/fib_rules.c --- linux-2.6.22-570/net/ipv4/fib_rules.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/fib_rules.c 2007-12-21 15:36:15.000000000 -0500 @@ -32,8 +32,6 @@ #include #include -static struct fib_rules_ops fib4_rules_ops; - struct fib4_rule { struct fib_rule common; @@ -49,35 +47,14 @@ #endif }; -static struct fib4_rule default_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFF, - .table = RT_TABLE_DEFAULT, - .action = FR_ACT_TO_TBL, - }, +struct fib4_rule_table { + struct list_head fib4_rules; + struct fib4_rule default_rule; + struct fib4_rule main_rule; + struct fib4_rule local_rule; + struct fib_rules_ops fib4_rules_ops; }; -static struct fib4_rule main_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFE, - .table = RT_TABLE_MAIN, - .action = FR_ACT_TO_TBL, - }, -}; - -static struct fib4_rule local_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .table = RT_TABLE_LOCAL, - .action = FR_ACT_TO_TBL, - .flags = FIB_RULE_PERMANENT, - }, -}; - -static LIST_HEAD(fib4_rules); - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -87,12 +64,14 @@ int fib_lookup(struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; + struct fib4_rule_table *table = net->fib4_table; struct fib_lookup_arg arg = { .result = res, }; int err; - err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(&table->fib4_rules_ops, flp, 0, &arg); res->r = arg.rule; return err; @@ -122,7 +101,7 @@ goto errout; } - if ((tbl = fib_get_table(rule->table)) == NULL) + if ((tbl = fib_get_table(flp->fl_net, rule->table)) == NULL) goto errout; err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); @@ -138,7 +117,7 @@ if (res->r && res->r->action == FR_ACT_TO_TBL && FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { struct fib_table *tb; - if ((tb = fib_get_table(res->r->table)) != NULL) + if ((tb = fib_get_table(flp->fl_net, res->r->table)) != NULL) tb->tb_select_default(tb, flp, res); } } @@ -159,13 +138,13 @@ return 1; } -static struct fib_table *fib_empty_table(void) +static struct fib_table *fib_empty_table(struct net *net) { u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_get_table(id) == NULL) - return fib_new_table(id); + if (fib_get_table(net, id) == NULL) + return fib_new_table(net, id); return NULL; } @@ -178,6 +157,7 @@ struct nlmsghdr *nlh, struct fib_rule_hdr *frh, struct nlattr **tb) { + struct net *net = skb->sk->sk_net; int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; @@ -188,7 +168,7 @@ if (rule->action == FR_ACT_TO_TBL) { struct fib_table *table; - table = fib_empty_table(); + table = fib_empty_table(net); if (table == NULL) { err = -ENOBUFS; goto errout; @@ -274,14 +254,15 @@ return -ENOBUFS; } -static u32 fib4_rule_default_pref(void) +static u32 fib4_rule_default_pref(struct fib_rules_ops *ops) { - struct list_head *pos; + struct list_head *list, *pos; struct fib_rule *rule; - if (!list_empty(&fib4_rules)) { - pos = fib4_rules.next; - if (pos->next != &fib4_rules) { + list = ops->rules_list; + if (!list_empty(list)) { + pos = list->next; + if (pos->next != list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -298,12 +279,37 @@ + nla_total_size(4); /* flow */ } -static void fib4_rule_flush_cache(void) +static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { rt_cache_flush(-1); } -static struct fib_rules_ops fib4_rules_ops = { +static struct fib4_rule_table fib4_rule_table = { + .default_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFF, + .table = RT_TABLE_DEFAULT, + .action = FR_ACT_TO_TBL, + }, + }, + .main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, + }, + .local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .table = RT_TABLE_LOCAL, + .action = FR_ACT_TO_TBL, + .flags = FIB_RULE_PERMANENT, + }, + }, + .fib4_rules_ops = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), @@ -317,15 +323,34 @@ .flush_cache = fib4_rule_flush_cache, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, - .rules_list = &fib4_rules, + .rules_list = &fib4_rule_table.fib4_rules, /* &fib4_rules, */ .owner = THIS_MODULE, + }, }; -void __init fib4_rules_init(void) + +void fib4_rules_init(struct net *net) { - list_add_tail(&local_rule.common.list, &fib4_rules); - list_add_tail(&main_rule.common.list, &fib4_rules); - list_add_tail(&default_rule.common.list, &fib4_rules); + struct fib4_rule_table *table; + table = kmemdup(&fib4_rule_table, sizeof(*table), GFP_KERNEL); + if (!table) + return; + INIT_LIST_HEAD(&table->fib4_rules); + list_add_tail(&table->local_rule.common.list, &table->fib4_rules); + list_add_tail(&table->main_rule.common.list, &table->fib4_rules); + list_add_tail(&table->default_rule.common.list, &table->fib4_rules); + table->fib4_rules_ops.rules_list = &table->fib4_rules; + if (fib_rules_register(net, &table->fib4_rules_ops)) { + kfree(table); + return; + } + net->fib4_table = table; +} - fib_rules_register(&fib4_rules_ops); +void fib4_rules_exit(struct net *net) +{ + struct fib4_rule_table *table = net->fib4_table; + if (table) + fib_rules_unregister(net, &table->fib4_rules_ops); + kfree(table); } diff -Nurb linux-2.6.22-570/net/ipv4/fib_semantics.c linux-2.6.22-591/net/ipv4/fib_semantics.c --- linux-2.6.22-570/net/ipv4/fib_semantics.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/fib_semantics.c 2007-12-21 15:36:15.000000000 -0500 @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -51,14 +50,9 @@ #define FSprintk(a...) static DEFINE_SPINLOCK(fib_info_lock); -static struct hlist_head *fib_info_hash; -static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_hash_size; -static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) -static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -154,7 +148,8 @@ dev_put(nh->nh_dev); nh->nh_dev = NULL; } endfor_nexthops(fi); - fib_info_cnt--; + fi->fib_net->fib_info_cnt--; + release_net(fi->fib_net); kfree(fi); } @@ -197,9 +192,9 @@ return 0; } -static inline unsigned int fib_info_hashfn(const struct fib_info *fi) +static inline unsigned int fib_info_hashfn(struct net *net, const struct fib_info *fi) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = net->fib_info_hash_size - 1; unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; @@ -209,15 +204,15 @@ return (val ^ (val >> 7) ^ (val >> 12)) & mask; } -static struct fib_info *fib_find_info(const struct fib_info *nfi) +static struct fib_info *fib_find_info(struct net *net, const struct fib_info *nfi) { struct hlist_head *head; struct hlist_node *node; struct fib_info *fi; unsigned int hash; - hash = fib_info_hashfn(nfi); - head = &fib_info_hash[hash]; + hash = fib_info_hashfn(net, nfi); + head = &net->fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { if (fi->fib_nhs != nfi->fib_nhs) @@ -250,6 +245,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { + struct net *net = dev->nd_net; struct hlist_head *head; struct hlist_node *node; struct fib_nh *nh; @@ -258,7 +254,7 @@ spin_lock(&fib_info_lock); hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = &net->fib_info_devhash[hash]; hlist_for_each_entry(nh, node, head, nh_hash) { if (nh->nh_dev == dev && nh->nh_gw == gw && @@ -321,11 +317,11 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + err = rtnl_notify(skb, info->net, info->pid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); + rtnl_set_sk_err(info->net, RTNLGRP_IPV4_ROUTE, err); } /* Return the first fib alias matching TOS with @@ -518,6 +514,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { + struct net *net = cfg->fc_nlinfo.net; int err; if (nh->nh_gw) { @@ -532,9 +529,9 @@ if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; - if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) + if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -545,6 +542,7 @@ } { struct flowi fl = { + .fl_net = net, .nl_u = { .ip4_u = { .daddr = nh->nh_gw, @@ -581,7 +579,7 @@ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - in_dev = inetdev_by_index(nh->nh_oif); + in_dev = inetdev_by_index(net, nh->nh_oif); if (in_dev == NULL) return -ENODEV; if (!(in_dev->dev->flags&IFF_UP)) { @@ -596,9 +594,9 @@ return 0; } -static inline unsigned int fib_laddr_hashfn(__be32 val) +static inline unsigned int fib_laddr_hashfn(struct net *net, __be32 val) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = net->fib_info_hash_size - 1; return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } @@ -623,21 +621,22 @@ free_pages((unsigned long) hash, get_order(bytes)); } -static void fib_hash_move(struct hlist_head *new_info_hash, +static void fib_hash_move(struct net *net, + struct hlist_head *new_info_hash, struct hlist_head *new_laddrhash, unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_hash_size; + unsigned int old_size = net->fib_info_hash_size; unsigned int i, bytes; spin_lock_bh(&fib_info_lock); - old_info_hash = fib_info_hash; - old_laddrhash = fib_info_laddrhash; - fib_hash_size = new_size; + old_info_hash = net->fib_info_hash; + old_laddrhash = net->fib_info_laddrhash; + net->fib_info_hash_size = new_size; for (i = 0; i < old_size; i++) { - struct hlist_head *head = &fib_info_hash[i]; + struct hlist_head *head = &net->fib_info_hash[i]; struct hlist_node *node, *n; struct fib_info *fi; @@ -647,15 +646,15 @@ hlist_del(&fi->fib_hash); - new_hash = fib_info_hashfn(fi); + new_hash = fib_info_hashfn(net, fi); dest = &new_info_hash[new_hash]; hlist_add_head(&fi->fib_hash, dest); } } - fib_info_hash = new_info_hash; + net->fib_info_hash = new_info_hash; for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &fib_info_laddrhash[i]; + struct hlist_head *lhead = &net->fib_info_laddrhash[i]; struct hlist_node *node, *n; struct fib_info *fi; @@ -665,12 +664,12 @@ hlist_del(&fi->fib_lhash); - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); + new_hash = fib_laddr_hashfn(net, fi->fib_prefsrc); ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); } } - fib_info_laddrhash = new_laddrhash; + net->fib_info_laddrhash = new_laddrhash; spin_unlock_bh(&fib_info_lock); @@ -681,6 +680,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) { + struct net *net = cfg->fc_nlinfo.net; int err; struct fib_info *fi = NULL; struct fib_info *ofi; @@ -697,17 +697,10 @@ goto err_inval; } #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (cfg->fc_mp_alg) { - if (cfg->fc_mp_alg < IP_MP_ALG_NONE || - cfg->fc_mp_alg > IP_MP_ALG_MAX) - goto err_inval; - } -#endif err = -ENOBUFS; - if (fib_info_cnt >= fib_hash_size) { - unsigned int new_size = fib_hash_size << 1; + if (net->fib_info_cnt >= net->fib_info_hash_size) { + unsigned int new_size = net->fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; @@ -724,18 +717,19 @@ memset(new_info_hash, 0, bytes); memset(new_laddrhash, 0, bytes); - fib_hash_move(new_info_hash, new_laddrhash, new_size); + fib_hash_move(net, new_info_hash, new_laddrhash, new_size); } - if (!fib_hash_size) + if (!net->fib_info_hash_size) goto failure; } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; - fib_info_cnt++; + net->fib_info_cnt++; + fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; @@ -791,10 +785,6 @@ #endif } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = cfg->fc_mp_alg; -#endif - if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; @@ -811,7 +801,7 @@ if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -825,12 +815,12 @@ if (fi->fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fi->fib_prefsrc != cfg->fc_dst) - if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) + if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } link_it: - if ((ofi = fib_find_info(fi)) != NULL) { + if ((ofi = fib_find_info(net, fi)) != NULL) { fi->fib_dead = 1; free_fib_info(fi); ofi->fib_treeref++; @@ -841,11 +831,13 @@ atomic_inc(&fi->fib_clntref); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, - &fib_info_hash[fib_info_hashfn(fi)]); + &net->fib_info_hash[fib_info_hashfn(net, fi)]); if (fi->fib_prefsrc) { struct hlist_head *head; + unsigned int hash; - head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; + hash = fib_laddr_hashfn(net, fi->fib_prefsrc); + head = &net->fib_info_laddrhash[hash]; hlist_add_head(&fi->fib_lhash, head); } change_nexthops(fi) { @@ -855,7 +847,7 @@ if (!nh->nh_dev) continue; hash = fib_devindex_hashfn(nh->nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = &net->fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) spin_unlock_bh(&fib_info_lock); @@ -940,10 +932,6 @@ res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - res->netmask = mask; - res->network = zone & inet_make_mask(prefixlen); -#endif atomic_inc(&res->fi->fib_clntref); return 0; } @@ -1046,7 +1034,7 @@ - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down(__be32 local, struct net_device *dev, int force) +int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -1054,9 +1042,9 @@ if (force) scope = -1; - if (local && fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; + if (local && net->fib_info_laddrhash) { + unsigned int hash = fib_laddr_hashfn(net, local); + struct hlist_head *head = &net->fib_info_laddrhash[hash]; struct hlist_node *node; struct fib_info *fi; @@ -1071,7 +1059,7 @@ if (dev) { struct fib_info *prev_fi = NULL; unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = &net->fib_info_devhash[hash]; struct hlist_node *node; struct fib_nh *nh; @@ -1124,6 +1112,7 @@ int fib_sync_up(struct net_device *dev) { + struct net *net = dev->nd_net; struct fib_info *prev_fi; unsigned int hash; struct hlist_head *head; @@ -1136,7 +1125,7 @@ prev_fi = NULL; hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = &net->fib_info_devhash[hash]; ret = 0; hlist_for_each_entry(nh, node, head, nh_hash) { @@ -1226,3 +1215,17 @@ spin_unlock_bh(&fib_multipath_lock); } #endif + +int fib_info_init(struct net *net) +{ + net->fib_info_devhash = kzalloc( + sizeof(struct hlist_head)*DEVINDEX_HASHSIZE, GFP_KERNEL); + if (!net->fib_info_devhash) + return -ENOMEM; + return 0; +} + +void fib_info_exit(struct net *net) +{ + kfree(net->fib_info_devhash); +} diff -Nurb linux-2.6.22-570/net/ipv4/fib_trie.c linux-2.6.22-591/net/ipv4/fib_trie.c --- linux-2.6.22-570/net/ipv4/fib_trie.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/fib_trie.c 2007-12-21 15:36:15.000000000 -0500 @@ -78,6 +78,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -172,7 +173,6 @@ static void tnode_free(struct tnode *tn); static struct kmem_cache *fn_alias_kmem __read_mostly; -static struct trie *trie_local = NULL, *trie_main = NULL; /* rcu_read_lock needs to be hold by caller from readside */ @@ -290,11 +290,10 @@ WARN_ON(tn && tn->pos+tn->bits > 32); } -static int halve_threshold = 25; -static int inflate_threshold = 50; -static int halve_threshold_root = 8; -static int inflate_threshold_root = 15; - +static const int halve_threshold = 25; +static const int inflate_threshold = 50; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) { @@ -1771,11 +1770,10 @@ return found; } -static int trie_last_dflt = -1; - static void fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; struct trie *t = (struct trie *) tb->tb_data; int order, last_idx; struct fib_info *fi = NULL; @@ -1819,28 +1817,28 @@ if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, &trie_last_dflt)) { + &last_idx, &net->trie_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - trie_last_dflt = order; + net->trie_last_dflt = order; goto out; } fi = next_fi; order++; } if (order <= 0 || fi == NULL) { - trie_last_dflt = -1; + net->trie_last_dflt = -1; goto out; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { + if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->trie_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - trie_last_dflt = order; + net->trie_last_dflt = order; goto out; } if (last_idx >= 0) { @@ -1850,7 +1848,7 @@ if (last_resort) atomic_inc(&last_resort->fib_clntref); } - trie_last_dflt = last_idx; + net->trie_last_dflt = last_idx; out:; rcu_read_unlock(); } @@ -1957,11 +1955,15 @@ /* Fix more generic FIB names for init later */ -#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_hash_exit(struct fib_table *tb) +{ + if (!tb) + return; + fn_trie_flush(tb); + kfree(tb); +} + struct fib_table * fib_hash_init(u32 id) -#else -struct fib_table * __init fib_hash_init(u32 id) -#endif { struct fib_table *tb; struct trie *t; @@ -1991,11 +1993,6 @@ trie_init(t); if (id == RT_TABLE_LOCAL) - trie_local = t; - else if (id == RT_TABLE_MAIN) - trie_main = t; - - if (id == RT_TABLE_LOCAL) printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; @@ -2004,6 +2001,8 @@ #ifdef CONFIG_PROC_FS /* Depth first Trie walk iterator */ struct fib_trie_iter { + struct net *net; + struct trie *trie_local, *trie_main; struct tnode *tnode; struct trie *trie; unsigned index; @@ -2170,7 +2169,21 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; + struct trie *trie_local, *trie_main; struct trie_stat *stat; + struct fib_table *tb; + + trie_local = NULL; + tb = fib_get_table(net, RT_TABLE_LOCAL); + if (tb) + trie_local = (struct trie *) tb->tb_data; + + trie_main = NULL; + tb = fib_get_table(net, RT_TABLE_MAIN); + if (tb) + trie_main = (struct trie *) tb->tb_data; + stat = kmalloc(sizeof(*stat), GFP_KERNEL); if (!stat) @@ -2197,7 +2210,15 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - return single_open(file, fib_triestat_seq_show, NULL); + return single_open(file, fib_triestat_seq_show, + get_net(PROC_NET(inode))); +} + +static int fib_triestat_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + put_net(seq->private); + return single_release(inode, file); } static const struct file_operations fib_triestat_fops = { @@ -2205,7 +2226,7 @@ .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = fib_triestat_seq_release, }; static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, @@ -2214,13 +2235,13 @@ loff_t idx = 0; struct node *n; - for (n = fib_trie_get_first(iter, trie_local); + for (n = fib_trie_get_first(iter, iter->trie_local); n; ++idx, n = fib_trie_get_next(iter)) { if (pos == idx) return n; } - for (n = fib_trie_get_first(iter, trie_main); + for (n = fib_trie_get_first(iter, iter->trie_main); n; ++idx, n = fib_trie_get_next(iter)) { if (pos == idx) return n; @@ -2230,10 +2251,23 @@ static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { + struct fib_trie_iter *iter = seq->private; + struct fib_table *tb; + + if (!iter->trie_local) { + tb = fib_get_table(iter->net, RT_TABLE_LOCAL); + if (tb) + iter->trie_local = (struct trie *) tb->tb_data; + } + if (!iter->trie_main) { + tb = fib_get_table(iter->net, RT_TABLE_MAIN); + if (tb) + iter->trie_main = (struct trie *) tb->tb_data; + } rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; - return fib_trie_get_idx(seq->private, *pos - 1); + return fib_trie_get_idx(iter, *pos - 1); } static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -2251,8 +2285,8 @@ return v; /* continue scan in next trie */ - if (iter->trie == trie_local) - return fib_trie_get_first(iter, trie_main); + if (iter->trie == iter->trie_local) + return fib_trie_get_first(iter, iter->trie_main); return NULL; } @@ -2318,7 +2352,7 @@ return 0; if (!NODE_PARENT(n)) { - if (iter->trie == trie_local) + if (iter->trie == iter->trie_local) seq_puts(seq, ":\n"); else seq_puts(seq, "
:\n"); @@ -2384,6 +2418,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -2391,12 +2426,20 @@ goto out; } +static int fib_trie_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct fib_trie_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations fib_trie_fops = { .owner = THIS_MODULE, .open = fib_trie_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = fib_trie_seq_release, }; static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) @@ -2434,7 +2477,7 @@ return 0; } - if (iter->trie == trie_local) + if (iter->trie == iter->trie_local) return 0; if (IS_TNODE(l)) return 0; @@ -2505,6 +2548,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -2517,35 +2561,37 @@ .open = fib_route_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = fib_trie_seq_release, }; -int __init fib_proc_init(void) +int fib_proc_init(struct net *net) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + net->trie_last_dflt = -1; + + if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove("fib_triestat"); + proc_net_remove(net, "fib_triestat"); out2: - proc_net_remove("fib_trie"); + proc_net_remove(net, "fib_trie"); out1: return -ENOMEM; } -void __init fib_proc_exit(void) +void fib_proc_exit(struct net *net) { - proc_net_remove("fib_trie"); - proc_net_remove("fib_triestat"); - proc_net_remove("route"); + proc_net_remove(net, "fib_trie"); + proc_net_remove(net, "fib_triestat"); + proc_net_remove(net, "route"); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/ipv4/icmp.c linux-2.6.22-591/net/ipv4/icmp.c --- linux-2.6.22-570/net/ipv4/icmp.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/icmp.c 2007-12-21 15:36:15.000000000 -0500 @@ -229,14 +229,13 @@ * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; -#define icmp_socket __get_cpu_var(__icmp_socket) +#define icmp_socket(NET) (*per_cpu_ptr((NET)->__icmp_socket, smp_processor_id())) -static __inline__ int icmp_xmit_lock(void) +static __inline__ int icmp_xmit_lock(struct net *net) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&icmp_socket(net)->sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ @@ -246,9 +245,9 @@ return 0; } -static void icmp_xmit_unlock(void) +static void icmp_xmit_unlock(struct net *net) { - spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); + spin_unlock_bh(&icmp_socket(net)->sk->sk_lock.slock); } /* @@ -347,19 +346,20 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt) { + struct net *net = icmp_param->skb->dev->nd_net; struct sk_buff *skb; - if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + if (ip_append_data(icmp_socket(net)->sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) - ip_flush_pending_frames(icmp_socket->sk); - else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + ip_flush_pending_frames(icmp_socket(net)->sk); + else if ((skb = skb_peek(&icmp_socket(net)->sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); __wsum csum = 0; struct sk_buff *skb1; - skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { + skb_queue_walk(&icmp_socket(net)->sk->sk_write_queue, skb1) { csum = csum_add(csum, skb1->csum); } csum = csum_partial_copy_nocheck((void *)&icmp_param->data, @@ -367,7 +367,7 @@ icmp_param->head_len, csum); icmph->checksum = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; - ip_push_pending_frames(icmp_socket->sk); + ip_push_pending_frames(icmp_socket(net)->sk); } } @@ -377,7 +377,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct sock *sk = icmp_socket->sk; + struct net *net = icmp_param->skb->dev->nd_net; + struct sock *sk = icmp_socket(net)->sk; struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; @@ -386,7 +387,7 @@ if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock()) + if (icmp_xmit_lock(net)) return; icmp_param->data.icmph.checksum = 0; @@ -401,7 +402,8 @@ daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = net, + .nl_u = { .ip4_u = { .daddr = daddr, .saddr = rt->rt_spec_dst, .tos = RT_TOS(ip_hdr(skb)->tos) } }, @@ -415,7 +417,7 @@ icmp_push_reply(icmp_param, &ipc, rt); ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(net); } @@ -436,6 +438,7 @@ int room; struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable *)skb_in->dst; + struct net *net; struct ipcm_cookie ipc; __be32 saddr; u8 tos; @@ -443,6 +446,7 @@ if (!rt) goto out; + net = rt->fl.fl_net; /* * Find the original header. It is expected to be valid, of course. * Check this, icmp_send is called from the most obscure devices @@ -505,7 +509,7 @@ } } - if (icmp_xmit_lock()) + if (icmp_xmit_lock(net)) return; /* @@ -517,7 +521,7 @@ struct net_device *dev = NULL; if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(rt->fl.iif); + dev = dev_get_by_index(&init_net, rt->fl.iif); if (dev) { saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -545,12 +549,13 @@ icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); icmp_out_count(icmp_param.data.icmph.type); - inet_sk(icmp_socket->sk)->tos = tos; + inet_sk(icmp_socket(net)->sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; { struct flowi fl = { + .fl_net = net, .nl_u = { .ip4_u = { .daddr = icmp_param.replyopts.srr ? @@ -593,7 +598,7 @@ ende: ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(net); out:; } @@ -604,6 +609,7 @@ static void icmp_unreach(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; struct iphdr *iph; struct icmphdr *icmph; int hash, protocol; @@ -634,7 +640,7 @@ case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) { + if (net->sysctl_ipv4_no_pmtu_disc) { LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " "fragmentation needed " "and DF set.\n", @@ -678,7 +684,7 @@ */ if (!sysctl_icmp_ignore_bogus_error_responses && - inet_addr_type(iph->daddr) == RTN_BROADCAST) { + inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " "type %u, code %u " @@ -707,7 +713,7 @@ hash = protocol & (MAX_INET_PROTOS - 1); read_lock(&raw_v4_lock); if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { - while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, + while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, iph->saddr, skb->dev->ifindex, skb->skb_tag)) != NULL) { raw_err(raw_sk, skb, info); @@ -1179,29 +1185,54 @@ }, }; -void __init icmp_init(struct net_proto_family *ops) +static void icmp_net_exit(struct net *net) { - struct inet_sock *inet; + struct socket **sock; int i; for_each_possible_cpu(i) { + sock = percpu_ptr(net->__icmp_socket, i); + if (!*sock) + continue; + /* At the last minute lie and say this is a socket for + * the initial network namespace. So the socket will + * be safe to free. + */ + (*sock)->sk->sk_net = get_net(&init_net); + sock_release(*sock); + *sock = NULL; + } + percpu_free(net->__icmp_socket); +} + +static int icmp_net_init(struct net *net) +{ + struct socket **sock; + struct inet_sock *inet; int err; + int i; + + net->__icmp_socket = alloc_percpu(struct socket *); + if (!net->__icmp_socket) + return -ENOMEM; + + for_each_possible_cpu(i) { - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, - &per_cpu(__icmp_socket, i)); + sock = percpu_ptr(net->__icmp_socket, i); + err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, sock); if (err < 0) - panic("Failed to create the ICMP control socket.\n"); + goto fail; - per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; + (*sock)->sk->sk_allocation = GFP_ATOMIC; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - per_cpu(__icmp_socket, i)->sk->sk_sndbuf = + (*sock)->sk->sk_sndbuf = (2 * ((64 * 1024) + sizeof(struct sk_buff))); - inet = inet_sk(per_cpu(__icmp_socket, i)->sk); + inet = inet_sk((*sock)->sk); inet->uc_ttl = -1; inet->pmtudisc = IP_PMTUDISC_DONT; @@ -1209,8 +1240,27 @@ * see it, we do not wish this socket to see incoming * packets. */ - per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); + (*sock)->sk->sk_prot->unhash((*sock)->sk); + + /* Don't hold an extra reference on the namespace */ + put_net((*sock)->sk->sk_net); } + return 0; +fail: + icmp_net_exit(net); + return err; + +} + +static struct pernet_operations icmp_net_ops = { + .init = icmp_net_init, + .exit = icmp_net_exit, +}; + +void __init icmp_init(struct net_proto_family *ops) +{ + if (register_pernet_subsys(&icmp_net_ops)) + panic("Failed to create the ICMP control socket.\n"); } EXPORT_SYMBOL(icmp_err_convert); diff -Nurb linux-2.6.22-570/net/ipv4/igmp.c linux-2.6.22-591/net/ipv4/igmp.c --- linux-2.6.22-570/net/ipv4/igmp.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/igmp.c 2007-12-21 15:36:15.000000000 -0500 @@ -97,6 +97,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE #include @@ -129,12 +130,12 @@ */ #define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ ((in_dev)->mr_v1_seen && \ time_before(jiffies, (in_dev)->mr_v1_seen))) #define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) @@ -296,7 +297,8 @@ return NULL; { - struct flowi fl = { .oif = dev->ifindex, + struct flowi fl = { .fl_net = &init_net, + .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = IGMPV3_ALL_MCR } }, .proto = IPPROTO_IGMP }; @@ -646,7 +648,8 @@ dst = group; { - struct flowi fl = { .oif = dev->ifindex, + struct flowi fl = { .fl_net = &init_net, + .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = dst } }, .proto = IPPROTO_IGMP }; if (ip_route_output_key(&rt, &fl)) @@ -929,6 +932,11 @@ struct in_device *in_dev = in_dev_get(skb->dev); int len = skb->len; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (in_dev==NULL) { kfree_skb(skb); return 0; @@ -1393,20 +1401,22 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = imr->imr_multiaddr.s_addr } } }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } + }; struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(imr->imr_ifindex); + idev = inetdev_by_index(&init_net, imr->imr_ifindex); if (idev) __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(imr->imr_address.s_addr); + dev = ip_dev_find(&init_net, imr->imr_address.s_addr); if (!dev) return NULL; dev_put(dev); @@ -2234,7 +2244,7 @@ struct in_device *in_dev; inet->mc_list = iml->next; - in_dev = inetdev_by_index(iml->multi.imr_ifindex); + in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); @@ -2291,7 +2301,7 @@ struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2453,7 +2463,7 @@ state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2613,8 +2623,8 @@ int __init igmp_mc_proc_init(void) { - proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); return 0; } #endif diff -Nurb linux-2.6.22-570/net/ipv4/inet_connection_sock.c linux-2.6.22-591/net/ipv4/inet_connection_sock.c --- linux-2.6.22-570/net/ipv4/inet_connection_sock.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/inet_connection_sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -32,7 +32,7 @@ /* * This array holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; +//int sysctl_local_port_range[2] = { 32768, 61000 }; int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { @@ -74,6 +74,7 @@ sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && + (sk->sk_net == sk2->sk_net) && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || @@ -98,6 +99,7 @@ int (*bind_conflict)(const struct sock *sk, const struct inet_bind_bucket *tb)) { + struct net *net = sk->sk_net; struct inet_bind_hashbucket *head; struct hlist_node *node; struct inet_bind_bucket *tb; @@ -105,16 +107,16 @@ local_bh_disable(); if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int low = sk->sk_net->sysctl_local_port_range[0]; + int high = sk->sk_net->sysctl_local_port_range[1]; int remaining = (high - low) + 1; int rover = net_random() % (high - low) + low; do { - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) + if ((tb->port == rover) && (tb->net == net)) goto next; break; next: @@ -138,10 +140,10 @@ */ snum = rover; } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) + if ((tb->port == snum) && (tb->net==net)) goto tb_found; } tb = NULL; @@ -161,7 +163,7 @@ } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, net, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -341,7 +343,8 @@ struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : diff -Nurb linux-2.6.22-570/net/ipv4/inet_diag.c linux-2.6.22-591/net/ipv4/inet_diag.c --- linux-2.6.22-570/net/ipv4/inet_diag.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/inet_diag.c 2007-12-21 15:36:15.000000000 -0500 @@ -227,6 +227,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { + struct net *net = in_skb->sk->sk_net; int err; struct sock *sk; struct inet_diag_req *req = NLMSG_DATA(nlh); @@ -242,7 +243,7 @@ /* TODO: lback */ sk = inet_lookup(hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], - req->id.idiag_sport, req->id.idiag_if); + req->id.idiag_sport, req->id.idiag_if, net); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->idiag_family == AF_INET6) { @@ -251,7 +252,7 @@ req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, - req->id.idiag_if); + req->id.idiag_if, net); } #endif else { @@ -906,8 +907,8 @@ if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - NULL, THIS_MODULE); + idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, + inet_diag_rcv, NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; diff -Nurb linux-2.6.22-570/net/ipv4/inet_hashtables.c linux-2.6.22-591/net/ipv4/inet_hashtables.c --- linux-2.6.22-570/net/ipv4/inet_hashtables.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/inet_hashtables.c 2007-12-21 15:36:15.000000000 -0500 @@ -29,11 +29,13 @@ */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, + struct net *net, const unsigned short snum) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { + tb->net = net; tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -66,7 +68,7 @@ */ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) { - const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(sk->sk_net, inet_sk(sk)->num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -127,7 +129,7 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, const __be32 daddr, const unsigned short hnum, - const int dif) + const int dif, struct net *net) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -149,6 +151,8 @@ continue; score += 2; } + if (sk->sk_net != net) + continue; if (score == 5) return sk; if (score > hiscore) { @@ -163,22 +167,22 @@ /* Optimize the common listener case. */ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, - const int dif) + const int dif, struct net *net) { struct sock *sk = NULL; const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + head = &hashinfo->listening_hash[net, inet_lhashfn(net, hnum)]; if (!hlist_empty(head)) { const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && v4_inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) + !sk->sk_bound_dev_if && (sk->sk_net == net)) goto sherry_cache; - sk = inet_lookup_listener_slow(head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(head, daddr, hnum, dif,net ); } if (sk) { sherry_cache: @@ -196,12 +200,13 @@ { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); + struct net *net = sk->sk_net; __be32 daddr = inet->rcv_saddr; __be32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct sock *sk2; const struct hlist_node *node; @@ -214,7 +219,7 @@ sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) { if (twsk_unique(sk, sk2, twp)) goto unique; else @@ -225,7 +230,7 @@ /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) goto not_unique; } @@ -271,6 +276,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + struct net *net = sk->sk_net; struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->num; struct inet_bind_hashbucket *head; @@ -278,8 +284,8 @@ int ret; if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int low = sk->sk_net->sysctl_local_port_range[0]; + int high = sk->sk_net->sysctl_local_port_range[1]; int range = high - low; int i; int port; @@ -291,7 +297,7 @@ local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -299,7 +305,7 @@ * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { + if ((tb->port == port) && (tb->net == net)) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -311,7 +317,7 @@ } } - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, net, port); if (!tb) { spin_unlock(&head->lock); break; @@ -346,7 +352,7 @@ goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { diff -Nurb linux-2.6.22-570/net/ipv4/inet_timewait_sock.c linux-2.6.22-591/net/ipv4/inet_timewait_sock.c --- linux-2.6.22-570/net/ipv4/inet_timewait_sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/inet_timewait_sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -31,7 +31,7 @@ write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_net, tw->tw_num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); @@ -65,7 +65,7 @@ Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(sk->sk_net, inet->num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; BUG_TRAP(icsk->icsk_bind_hash); diff -Nurb linux-2.6.22-570/net/ipv4/inetpeer.c linux-2.6.22-591/net/ipv4/inetpeer.c --- linux-2.6.22-570/net/ipv4/inetpeer.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/inetpeer.c 2007-12-21 15:36:15.000000000 -0500 @@ -81,71 +81,94 @@ .avl_height = 0 }; #define peer_avl_empty (&peer_fake_node) -static struct inet_peer *peer_root = peer_avl_empty; static DEFINE_RWLOCK(peer_pool_lock); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static int peer_total; -/* Exported for sysctl_net_ipv4. */ -int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more - * aggressively at this stage */ -int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ -int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -int inet_peer_gc_mintime __read_mostly = 10 * HZ; -int inet_peer_gc_maxtime __read_mostly = 120 * HZ; - -static struct inet_peer *inet_peer_unused_head; -static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; static DEFINE_SPINLOCK(inet_peer_unused_lock); static void peer_check_expire(unsigned long dummy); -static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); +static int inet_peers_net_init(struct net *net); +static void inet_peers_net_exit(struct net *net); +static struct pernet_operations inet_peers_net_ops = { + .init = inet_peers_net_init, + .exit = inet_peers_net_exit, +}; /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { + peer_cachep = kmem_cache_create("inet_peer_cache", + sizeof(struct inet_peer), + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL, NULL); + + register_pernet_subsys(&inet_peers_net_ops); +} + +static int inet_peers_net_init(struct net *net) +{ struct sysinfo si; + net->peer_root = peer_avl_empty; + net->inet_peer_unused_tailp = &net->inet_peer_unused_head; + + net->inet_peer_threshold = 65536 + 128; /* start to throw entries more + * aggressively at this stage */ + net->inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */ + net->inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */ + net->inet_peer_gc_mintime = 10 * HZ; + net->inet_peer_gc_maxtime = 120 * HZ; + /* Use the straight interface to information about memory. */ si_meminfo(&si); + /* The values below were suggested by Alexey Kuznetsov * . I don't have any opinion about the values * myself. --SAW */ if (si.totalram <= (32768*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ + net->inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ if (si.totalram <= (16384*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* about 512KB */ + net->inet_peer_threshold >>= 1; /* about 512KB */ if (si.totalram <= (8192*1024)/PAGE_SIZE) - inet_peer_threshold >>= 2; /* about 128KB */ + net->inet_peer_threshold >>= 2; /* about 128KB */ - peer_cachep = kmem_cache_create("inet_peer_cache", - sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + init_timer(&net->peer_periodic_timer); + net->peer_periodic_timer.function = peer_check_expire; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ - peer_periodic_timer.expires = jiffies - + net_random() % inet_peer_gc_maxtime - + inet_peer_gc_maxtime; - add_timer(&peer_periodic_timer); + net->peer_periodic_timer.expires = jiffies + + net_random() % net->inet_peer_gc_maxtime + + net->inet_peer_gc_maxtime; + /* Remember our namespace */ + net->peer_periodic_timer.data = (unsigned long)net; + add_timer(&net->peer_periodic_timer); + + return 0; +} + +static void inet_peers_net_exit(struct net *net) +{ + del_timer(&net->peer_periodic_timer); + /* CHECKME do I need to do something to release all of the peers */ } /* Called with or without local BH being disabled. */ -static void unlink_from_unused(struct inet_peer *p) +static void unlink_from_unused(struct net *net, struct inet_peer *p) { spin_lock_bh(&inet_peer_unused_lock); if (p->unused_prevp != NULL) { /* On unused list. */ - *p->unused_prevp = p->unused_next; - if (p->unused_next != NULL) - p->unused_next->unused_prevp = p->unused_prevp; + *p->unused_prevp = p->u.unused_next; + if (p->u.unused_next != NULL) + p->u.unused_next->unused_prevp = p->unused_prevp; else - inet_peer_unused_tailp = p->unused_prevp; + net->inet_peer_unused_tailp = p->unused_prevp; p->unused_prevp = NULL; /* mark it as removed */ + p->u.net = hold_net(net); /* Remember the net */ } spin_unlock_bh(&inet_peer_unused_lock); } @@ -160,9 +183,9 @@ struct inet_peer *u, **v; \ if (_stack) { \ stackptr = _stack; \ - *stackptr++ = &peer_root; \ + *stackptr++ = &net->peer_root; \ } \ - for (u = peer_root; u != peer_avl_empty; ) { \ + for (u = net->peer_root; u != peer_avl_empty; ) { \ if (_daddr == u->v4daddr) \ break; \ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ @@ -279,7 +302,7 @@ } while(0) /* May be called with local BH enabled. */ -static void unlink_from_pool(struct inet_peer *p) +static void unlink_from_pool(struct net *net, struct inet_peer *p) { int do_free; @@ -317,7 +340,7 @@ delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); - peer_total--; + net->peer_total--; do_free = 1; } write_unlock_bh(&peer_pool_lock); @@ -335,13 +358,13 @@ } /* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl) +static int cleanup_once(struct net *net, unsigned long ttl) { struct inet_peer *p; /* Remove the first entry from the list of unused nodes. */ spin_lock_bh(&inet_peer_unused_lock); - p = inet_peer_unused_head; + p = net->inet_peer_unused_head; if (p != NULL) { __u32 delta = (__u32)jiffies - p->dtime; if (delta < ttl) { @@ -349,12 +372,13 @@ spin_unlock_bh(&inet_peer_unused_lock); return -1; } - inet_peer_unused_head = p->unused_next; - if (p->unused_next != NULL) - p->unused_next->unused_prevp = p->unused_prevp; + net->inet_peer_unused_head = p->u.unused_next; + if (p->u.unused_next != NULL) + p->u.unused_next->unused_prevp = p->unused_prevp; else - inet_peer_unused_tailp = p->unused_prevp; + net->inet_peer_unused_tailp = p->unused_prevp; p->unused_prevp = NULL; /* mark as not on the list */ + p->u.net = hold_net(net); /* Grab an extra reference to prevent node disappearing * before unlink_from_pool() call. */ atomic_inc(&p->refcnt); @@ -367,12 +391,12 @@ * happen because of entry limits in route cache. */ return -1; - unlink_from_pool(p); + unlink_from_pool(net, p); return 0; } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__be32 daddr, int create) +struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create) { struct inet_peer *p, *n; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; @@ -387,7 +411,7 @@ if (p != peer_avl_empty) { /* The existing node has been found. */ /* Remove the entry from unused list if it was there. */ - unlink_from_unused(p); + unlink_from_unused(net, p); return p; } @@ -413,13 +437,13 @@ /* Link the node. */ link_to_pool(n); n->unused_prevp = NULL; /* not on the list */ - peer_total++; + n->u.net = hold_net(net); /* Remember the net */ + net->peer_total++; write_unlock_bh(&peer_pool_lock); - if (peer_total >= inet_peer_threshold) + if (net->peer_total >= net->inet_peer_threshold) /* Remove one less-recently-used entry. */ - cleanup_once(0); - + cleanup_once(net, 0); return n; out_free: @@ -427,25 +451,26 @@ atomic_inc(&p->refcnt); write_unlock_bh(&peer_pool_lock); /* Remove the entry from unused list if it was there. */ - unlink_from_unused(p); + unlink_from_unused(net, p); /* Free preallocated the preallocated node. */ kmem_cache_free(peer_cachep, n); return p; } /* Called with local BH disabled. */ -static void peer_check_expire(unsigned long dummy) +static void peer_check_expire(unsigned long arg) { + struct net *net = (void *)arg; unsigned long now = jiffies; int ttl; - if (peer_total >= inet_peer_threshold) - ttl = inet_peer_minttl; + if (net->peer_total >= net->inet_peer_threshold) + ttl = net->inet_peer_minttl; else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peer_total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl)) { + ttl = net->inet_peer_maxttl + - (net->inet_peer_maxttl - net->inet_peer_minttl) / HZ * + net->peer_total / net->inet_peer_threshold * HZ; + while (!cleanup_once(net, ttl)) { if (jiffies != now) break; } @@ -453,25 +478,30 @@ /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peer_total >= inet_peer_threshold) - peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; + if (net->peer_total >= net->inet_peer_threshold) + net->peer_periodic_timer.expires = jiffies + + net->inet_peer_gc_mintime; else - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; - add_timer(&peer_periodic_timer); + net->peer_periodic_timer.expires = jiffies + + net->inet_peer_gc_maxtime + - (net->inet_peer_gc_maxtime - net->inet_peer_gc_mintime) / HZ * + net->peer_total / net->inet_peer_threshold * HZ; + add_timer(&net->peer_periodic_timer); } void inet_putpeer(struct inet_peer *p) { spin_lock_bh(&inet_peer_unused_lock); if (atomic_dec_and_test(&p->refcnt)) { - p->unused_prevp = inet_peer_unused_tailp; - p->unused_next = NULL; - *inet_peer_unused_tailp = p; - inet_peer_unused_tailp = &p->unused_next; + struct net *net = p->u.net; + + p->unused_prevp = net->inet_peer_unused_tailp; + p->u.unused_next = NULL; + *net->inet_peer_unused_tailp = p; + net->inet_peer_unused_tailp = &p->u.unused_next; p->dtime = (__u32)jiffies; + + release_net(net); } spin_unlock_bh(&inet_peer_unused_lock); } diff -Nurb linux-2.6.22-570/net/ipv4/ip_fragment.c linux-2.6.22-591/net/ipv4/ip_fragment.c --- linux-2.6.22-570/net/ipv4/ip_fragment.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ip_fragment.c 2007-12-21 15:36:15.000000000 -0500 @@ -49,21 +49,6 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - -int sysctl_ipfrag_max_dist __read_mostly = 64; - -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; - struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -96,6 +81,7 @@ int iif; unsigned int rid; struct inet_peer *peer; + struct net *net; }; /* Hash table. */ @@ -103,17 +89,13 @@ #define IPQ_HASHSZ 64 /* Per-bucket lock is easy to add now. */ -static struct hlist_head ipq_hash[IPQ_HASHSZ]; static DEFINE_RWLOCK(ipfrag_lock); -static u32 ipfrag_hash_rnd; -static LIST_HEAD(ipq_lru_list); -int ip_frag_nqueues = 0; static __inline__ void __ipq_unlink(struct ipq *qp) { hlist_del(&qp->list); list_del(&qp->lru_list); - ip_frag_nqueues--; + qp->net->ip_frag_nqueues--; } static __inline__ void ipq_unlink(struct ipq *ipq) @@ -123,70 +105,71 @@ write_unlock(&ipfrag_lock); } -static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) +static unsigned int ipqhashfn(struct net *net, __be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + net->ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); } -static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static void ipfrag_secret_rebuild(unsigned long arg) { + struct net *net = (void *)arg; unsigned long now = jiffies; int i; write_lock(&ipfrag_lock); - get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); + get_random_bytes(&net->ipfrag_hash_rnd, sizeof(u32)); for (i = 0; i < IPQ_HASHSZ; i++) { struct ipq *q; + struct hlist_head *head; struct hlist_node *p, *n; - hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, + head = &net->ipq_hash[i]; + hlist_for_each_entry_safe(q, p, n, head, list) { + unsigned int hval = ipqhashfn(net, q->id, q->saddr, q->daddr, q->protocol); if (hval != i) { hlist_del(&q->list); /* Relink to new hash chain. */ - hlist_add_head(&q->list, &ipq_hash[hval]); + hlist_add_head(&q->list, &net->ipq_hash[hval]); } } } write_unlock(&ipfrag_lock); - mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); + mod_timer(&net->ipfrag_secret_timer, + now + net->sysctl_ipfrag_secret_interval); } -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ - /* Memory Tracking Functions. */ -static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) +static __inline__ void frag_kfree_skb(struct net *net, struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip_frag_mem); + atomic_sub(skb->truesize, &net->ip_frag_mem); kfree_skb(skb); } static __inline__ void frag_free_queue(struct ipq *qp, int *work) { + struct net *net = qp->net; if (work) *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip_frag_mem); + atomic_sub(sizeof(struct ipq), &net->ip_frag_mem); + release_net(net); kfree(qp); } -static __inline__ struct ipq *frag_alloc_queue(void) +static __inline__ struct ipq *frag_alloc_queue(struct net *net) { struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); if (!qp) return NULL; - atomic_add(sizeof(struct ipq), &ip_frag_mem); + atomic_add(sizeof(struct ipq), &net->ip_frag_mem); return qp; } @@ -209,7 +192,7 @@ while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp, work); + frag_kfree_skb(qp->net, fp, work); fp = xp; } @@ -241,23 +224,23 @@ /* Memory limiting on fragments. Evictor trashes the oldest * fragment queue until we are back under the threshold. */ -static void ip_evictor(void) +static void ip_evictor(struct net *net) { struct ipq *qp; struct list_head *tmp; int work; - work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; + work = atomic_read(&net->ip_frag_mem) - net->sysctl_ipfrag_low_thresh; if (work <= 0) return; while (work > 0) { read_lock(&ipfrag_lock); - if (list_empty(&ipq_lru_list)) { + if (list_empty(&net->ipq_lru_list)) { read_unlock(&ipfrag_lock); return; } - tmp = ipq_lru_list.next; + tmp = net->ipq_lru_list.next; qp = list_entry(tmp, struct ipq, lru_list); atomic_inc(&qp->refcnt); read_unlock(&ipfrag_lock); @@ -292,7 +275,7 @@ if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(qp->net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } @@ -304,7 +287,7 @@ /* Creation primitives. */ -static struct ipq *ip_frag_intern(struct ipq *qp_in) +static struct ipq *ip_frag_intern(struct net *net, struct ipq *qp_in) { struct ipq *qp; #ifdef CONFIG_SMP @@ -313,14 +296,14 @@ unsigned int hash; write_lock(&ipfrag_lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, + hash = ipqhashfn(net, qp_in->id, qp_in->saddr, qp_in->daddr, qp_in->protocol); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { + hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) { if (qp->id == qp_in->id && qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && @@ -336,26 +319,27 @@ #endif qp = qp_in; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) + if (!mod_timer(&qp->timer, jiffies + net->sysctl_ipfrag_time)) atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &ipq_hash[hash]); + hlist_add_head(&qp->list, &net->ipq_hash[hash]); INIT_LIST_HEAD(&qp->lru_list); - list_add_tail(&qp->lru_list, &ipq_lru_list); - ip_frag_nqueues++; + list_add_tail(&qp->lru_list, &net->ipq_lru_list); + net->ip_frag_nqueues++; write_unlock(&ipfrag_lock); return qp; } /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) +static struct ipq *ip_frag_create(struct net *net, struct iphdr *iph, u32 user) { struct ipq *qp; - if ((qp = frag_alloc_queue()) == NULL) + if ((qp = frag_alloc_queue(net)) == NULL) goto out_nomem; + qp->net = hold_net(net); qp->protocol = iph->protocol; qp->last_in = 0; qp->id = iph->id; @@ -366,7 +350,8 @@ qp->meat = 0; qp->fragments = NULL; qp->iif = 0; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; + qp->peer = net->sysctl_ipfrag_max_dist ? + inet_getpeer(net, iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -375,7 +360,7 @@ spin_lock_init(&qp->lock); atomic_set(&qp->refcnt, 1); - return ip_frag_intern(qp); + return ip_frag_intern(net, qp); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); @@ -385,7 +370,7 @@ /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) +static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) { __be16 id = iph->id; __be32 saddr = iph->saddr; @@ -396,8 +381,8 @@ struct hlist_node *n; read_lock(&ipfrag_lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { + hash = ipqhashfn(net, id, saddr, daddr, protocol); + hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) { if (qp->id == id && qp->saddr == saddr && qp->daddr == daddr && @@ -410,14 +395,14 @@ } read_unlock(&ipfrag_lock); - return ip_frag_create(iph, user); + return ip_frag_create(net, iph, user); } /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; - unsigned int max = sysctl_ipfrag_max_dist; + unsigned int max = qp->net->sysctl_ipfrag_max_dist; unsigned int start, end; int rc; @@ -442,7 +427,7 @@ { struct sk_buff *fp; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + if (!mod_timer(&qp->timer, jiffies + qp->net->sysctl_ipfrag_time)) { atomic_inc(&qp->refcnt); return -ETIMEDOUT; } @@ -450,7 +435,7 @@ fp = qp->fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp, NULL); + frag_kfree_skb(qp->net, fp, NULL); fp = xp; } while (fp); @@ -466,6 +451,7 @@ /* Add new segment to existing queue. */ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { + struct net *net = qp->net; struct sk_buff *prev, *next; int flags, offset; int ihl, end; @@ -576,7 +562,7 @@ qp->fragments = next; qp->meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(net, free_it, NULL); } } @@ -594,12 +580,12 @@ skb->dev = NULL; qp->stamp = skb->tstamp; qp->meat += skb->len; - atomic_add(skb->truesize, &ip_frag_mem); + atomic_add(skb->truesize, &net->ip_frag_mem); if (offset == 0) qp->last_in |= FIRST_IN; write_lock(&ipfrag_lock); - list_move_tail(&qp->lru_list, &ipq_lru_list); + list_move_tail(&qp->lru_list, &net->ipq_lru_list); write_unlock(&ipfrag_lock); return; @@ -613,6 +599,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) { + struct net *net = qp->net; struct iphdr *iph; struct sk_buff *fp, *head = qp->fragments; int len; @@ -654,12 +641,12 @@ head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip_frag_mem); + atomic_add(clone->truesize, &net->ip_frag_mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip_frag_mem); + atomic_sub(head->truesize, &net->ip_frag_mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -669,7 +656,7 @@ else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip_frag_mem); + atomic_sub(fp->truesize, &net->ip_frag_mem); } head->next = NULL; @@ -700,19 +687,20 @@ /* Process an incoming IP datagram fragment. */ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) { + struct net *net = skb->dev->nd_net; struct ipq *qp; struct net_device *dev; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) - ip_evictor(); + if (atomic_read(&net->ip_frag_mem) > net->sysctl_ipfrag_high_thresh) + ip_evictor(net); dev = skb->dev; /* Lookup (or create) queue header */ - if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { + if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { struct sk_buff *ret = NULL; spin_lock(&qp->lock); @@ -733,15 +721,70 @@ return NULL; } -void __init ipfrag_init(void) +static int ipfrag_net_init(struct net *net) { - ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + struct timer_list *secret_timer; + int i; + + /* Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to measurably + * harm machine performance. + */ + net->sysctl_ipfrag_high_thresh = 256*1024; + net->sysctl_ipfrag_low_thresh = 192*1024; + net->sysctl_ipfrag_max_dist = 64; + + /* Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. + */ + net->sysctl_ipfrag_time = IP_FRAG_TIME; + + net->sysctl_ipfrag_secret_interval = 10 * 60 * HZ; + + net->ipq_hash = kzalloc(sizeof(*net->ipq_hash)*IPQ_HASHSZ, GFP_KERNEL); + if (!net->ipq_hash) + return -ENOMEM; + + for (i = 0; i < IPQ_HASHSZ; i++) + INIT_HLIST_HEAD(&net->ipq_hash[i]); + INIT_LIST_HEAD(&net->ipq_lru_list); + net->ip_frag_nqueues = 0; + atomic_set(&net->ip_frag_mem, 0); + + + net->ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ (jiffies ^ (jiffies >> 6))); - init_timer(&ipfrag_secret_timer); - ipfrag_secret_timer.function = ipfrag_secret_rebuild; - ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ipfrag_secret_timer); + secret_timer = &net->ipfrag_secret_timer; + init_timer(secret_timer); + secret_timer->function = ipfrag_secret_rebuild; + secret_timer->expires = jiffies + net->sysctl_ipfrag_secret_interval; + secret_timer->data = (unsigned long)net; + add_timer(secret_timer); + + return 0; +} + +static void ipfrag_net_exit(struct net *net) +{ + del_timer(&net->ipfrag_secret_timer); + + net->sysctl_ipfrag_low_thresh = 0; + while (atomic_read(&net->ip_frag_mem)) + ip_evictor(net); + + kfree(net->ipq_hash); +} + +static struct pernet_operations ipfrag_net_ops = { + .init = ipfrag_net_init, + .exit = ipfrag_net_exit, +}; + +void ipfrag_init(void) +{ + register_pernet_subsys(&ipfrag_net_ops); } EXPORT_SYMBOL(ip_defrag); diff -Nurb linux-2.6.22-570/net/ipv4/ip_gre.c linux-2.6.22-591/net/ipv4/ip_gre.c --- linux-2.6.22-570/net/ipv4/ip_gre.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ip_gre.c 2007-12-21 15:36:15.000000000 -0500 @@ -262,7 +262,7 @@ int i; for (i=1; i<100; i++) { sprintf(name, "gre%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -397,6 +397,9 @@ struct flowi fl; struct rtable *rt; + if (skb->dev->nd_net != &init_net) + return; + if (p[1] != htons(ETH_P_IP)) return; @@ -475,6 +478,7 @@ /* Try to guess incoming interface */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fl4_dst = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_GRE; @@ -559,6 +563,10 @@ struct ip_tunnel *tunnel; int offset = 4; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -740,7 +748,8 @@ } { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, @@ -1095,7 +1104,8 @@ struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr)) { - struct flowi fl = { .oif = t->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = t->parms.link, .nl_u = { .ip4_u = { .daddr = t->parms.iph.daddr, .saddr = t->parms.iph.saddr, @@ -1118,7 +1128,7 @@ { struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr) && t->mlink) { - struct in_device *in_dev = inetdev_by_index(t->mlink); + struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink); if (in_dev) { ip_mc_dec_group(in_dev, t->parms.iph.daddr); in_dev_put(in_dev); @@ -1168,7 +1178,8 @@ /* Guess output device to choose reasonable mtu and hard_header_len */ if (iph->daddr) { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, @@ -1195,7 +1206,7 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len; diff -Nurb linux-2.6.22-570/net/ipv4/ip_input.c linux-2.6.22-591/net/ipv4/ip_input.c --- linux-2.6.22-570/net/ipv4/ip_input.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ip_input.c 2007-12-21 15:36:15.000000000 -0500 @@ -280,6 +280,10 @@ struct iphdr *iph; struct net_device *dev = skb->dev; + + if (skb->dev->nd_net != &init_net) + goto drop; + /* It looks as overkill, because not all IP options require packet mangling. But it is the easiest for now, especially taking diff -Nurb linux-2.6.22-570/net/ipv4/ip_options.c linux-2.6.22-591/net/ipv4/ip_options.c --- linux-2.6.22-570/net/ipv4/ip_options.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/ip_options.c 2007-12-21 15:36:15.000000000 -0500 @@ -151,7 +151,7 @@ __be32 addr; memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(addr) != RTN_LOCAL) { + if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { dopt->ts_needtime = 1; soffset += 8; } @@ -400,7 +400,7 @@ { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); - if (inet_addr_type(addr) == RTN_UNICAST) + if (inet_addr_type(&init_net, addr) == RTN_UNICAST) break; if (skb) timeptr = (__be32*)&optptr[optptr[2]+3]; diff -Nurb linux-2.6.22-570/net/ipv4/ip_output.c linux-2.6.22-591/net/ipv4/ip_output.c --- linux-2.6.22-570/net/ipv4/ip_output.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ip_output.c 2007-12-21 15:36:15.000000000 -0500 @@ -83,8 +83,6 @@ #include #include -int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; - /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) { @@ -317,7 +315,8 @@ daddr = opt->faddr; { - struct flowi fl = { .oif = sk->sk_bound_dev_if, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = inet->saddr, @@ -837,7 +836,7 @@ */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features & NETIF_F_ALL_CSUM && + rt->u.dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; @@ -1352,7 +1351,8 @@ } { - struct flowi fl = { .oif = arg->bound_dev_if, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = arg->bound_dev_if, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = rt->rt_spec_dst, diff -Nurb linux-2.6.22-570/net/ipv4/ip_sockglue.c linux-2.6.22-591/net/ipv4/ip_sockglue.c --- linux-2.6.22-570/net/ipv4/ip_sockglue.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ip_sockglue.c 2007-12-21 15:36:15.000000000 -0500 @@ -411,6 +411,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { + struct net *net = sk->sk_net; struct inet_sock *inet = inet_sk(sk); int val=0,err; @@ -596,13 +597,13 @@ err = 0; break; } - dev = ip_dev_find(mreq.imr_address.s_addr); + dev = ip_dev_find(net, mreq.imr_address.s_addr); if (dev) { mreq.imr_ifindex = dev->ifindex; dev_put(dev); } } else - dev = __dev_get_by_index(mreq.imr_ifindex); + dev = __dev_get_by_index(net, mreq.imr_ifindex); err = -EADDRNOTAVAIL; @@ -956,6 +957,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { + struct net *net = sk->sk_net; struct inet_sock *inet = inet_sk(sk); int val; int len; @@ -1023,7 +1025,7 @@ break; case IP_TTL: val = (inet->uc_ttl == -1 ? - sysctl_ip_default_ttl : + net->sysctl_ip_default_ttl : inet->uc_ttl); break; case IP_HDRINCL: diff -Nurb linux-2.6.22-570/net/ipv4/ipcomp.c linux-2.6.22-591/net/ipv4/ipcomp.c --- linux-2.6.22-570/net/ipv4/ipcomp.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/ipcomp.c 2007-12-21 15:36:15.000000000 -0500 @@ -175,6 +175,9 @@ struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; + if (skb->dev->nd_net != &init_net) + return; + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; @@ -486,3 +489,4 @@ MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); MODULE_AUTHOR("James Morris "); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff -Nurb linux-2.6.22-570/net/ipv4/ipconfig.c linux-2.6.22-591/net/ipv4/ipconfig.c --- linux-2.6.22-570/net/ipv4/ipconfig.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipconfig.c 2007-12-21 15:36:15.000000000 -0500 @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -184,16 +185,18 @@ struct ic_device *d, **last; struct net_device *dev; unsigned short oflags; + struct net_device *lo; last = &ic_first_dev; rtnl_lock(); /* bring loopback device up first */ - if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); + lo = &init_net.loopback_dev; + if (dev_change_flags(lo, lo->flags | IFF_UP) < 0) + printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name); - for_each_netdev(dev) { - if (dev == &loopback_dev) + for_each_netdev(&init_net, dev) { + if (dev == lo) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : (!(dev->flags & IFF_LOOPBACK) && @@ -283,7 +286,7 @@ mm_segment_t oldfs = get_fs(); set_fs(get_ds()); - res = devinet_ioctl(cmd, (struct ifreq __user *) arg); + res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); set_fs(oldfs); return res; } @@ -294,7 +297,7 @@ mm_segment_t oldfs = get_fs(); set_fs(get_ds()); - res = ip_rt_ioctl(cmd, (void __user *) arg); + res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg); set_fs(oldfs); return res; } @@ -425,6 +428,9 @@ unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -834,6 +840,9 @@ struct ic_device *d; int len, ext_len; + if (dev->nd_net != &init_net) + goto drop; + /* Perform verifications before taking the lock. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1253,7 +1262,7 @@ __be32 addr; #ifdef CONFIG_PROC_FS - proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); + proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff -Nurb linux-2.6.22-570/net/ipv4/ipip.c linux-2.6.22-591/net/ipv4/ipip.c --- linux-2.6.22-570/net/ipv4/ipip.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipip.c 2007-12-21 15:36:15.000000000 -0500 @@ -225,7 +225,7 @@ int i; for (i=1; i<100; i++) { sprintf(name, "tunl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -403,6 +403,7 @@ /* Try to guess incoming interface */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fl4_daddr = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_IPIP; @@ -542,7 +543,8 @@ } { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, @@ -806,7 +808,8 @@ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); if (iph->daddr) { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, @@ -821,7 +824,7 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff -Nurb linux-2.6.22-570/net/ipv4/ipmr.c linux-2.6.22-591/net/ipv4/ipmr.c --- linux-2.6.22-570/net/ipv4/ipmr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipmr.c 2007-12-21 15:36:15.000000000 -0500 @@ -62,6 +62,7 @@ #include #include #include +#include #include #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) @@ -124,7 +125,7 @@ { struct net_device *dev; - dev = __dev_get_by_name("tunl0"); + dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { int err; @@ -148,7 +149,7 @@ dev = NULL; - if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { + if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); @@ -320,7 +321,7 @@ e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, NETLINK_CB(skb).pid); + rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -422,7 +423,7 @@ return -ENOBUFS; break; case 0: - dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); + dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); if (!dev) return -EADDRNOTAVAIL; dev_put(dev); @@ -532,7 +533,7 @@ memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, NETLINK_CB(skb).pid); + rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } @@ -848,7 +849,7 @@ { rtnl_lock(); if (sk == mroute_socket) { - IPV4_DEVCONF_ALL(MC_FORWARDING)--; + IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; write_lock_bh(&mrt_lock); mroute_socket=NULL; @@ -897,7 +898,7 @@ mroute_socket=sk; write_unlock_bh(&mrt_lock); - IPV4_DEVCONF_ALL(MC_FORWARDING)++; + IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; } rtnl_unlock(); return ret; @@ -1082,13 +1083,18 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; struct vif_device *v; int ct; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; for (ct=0;ctdev==ptr) + if (v->dev==dev) vif_delete(ct); } return NOTIFY_DONE; @@ -1171,7 +1177,8 @@ #endif if (vif->flags&VIFF_TUNNEL) { - struct flowi fl = { .oif = vif->link, + struct flowi fl = { .fl_net = &init_net, + .oif = vif->link, .nl_u = { .ip4_u = { .daddr = vif->remote, .saddr = vif->local, @@ -1181,7 +1188,8 @@ goto out_free; encap = sizeof(struct iphdr); } else { - struct flowi fl = { .oif = vif->link, + struct flowi fl = { .fl_net = &init_net, + .oif = vif->link, .nl_u = { .ip4_u = { .daddr = iph->daddr, .tos = RT_TOS(iph->tos) } }, @@ -1498,6 +1506,10 @@ struct iphdr *encap; struct net_device *reg_dev = NULL; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -1922,7 +1934,7 @@ ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); + proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); + proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); #endif } diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_app.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_app.c 2007-12-21 15:36:15.000000000 -0500 @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -616,12 +617,12 @@ int ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); + proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } void ip_vs_app_cleanup(void) { - proc_net_remove("ip_vs_app"); + proc_net_remove(&init_net, "ip_vs_app"); } diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_conn.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_conn.c 2007-12-21 15:36:15.000000000 -0500 @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -922,7 +923,7 @@ rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); @@ -938,6 +939,6 @@ /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove("ip_vs_conn"); + proc_net_remove(&init_net, "ip_vs_conn"); vfree(ip_vs_conn_tab); } diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_core.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_core.c 2007-12-21 15:36:15.000000000 -0500 @@ -460,7 +460,7 @@ and the destination is RTN_UNICAST (and not local), then create a cache_bypass connection entry */ if (sysctl_ip_vs_cache_bypass && svc->fwmark - && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { + && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { int ret, cs; struct ip_vs_conn *cp; @@ -530,6 +530,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ @@ -734,6 +738,10 @@ struct ip_vs_conn *cp; int ihl; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + EnterFunction(11); if (skb->ipvs_property) @@ -818,7 +826,7 @@ * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + if (ip_route_me_harder(&init_net, pskb, RTN_LOCAL) != 0) goto drop; skb = *pskb; @@ -956,12 +964,16 @@ int ret, restart; int ihl; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* * Big tappo: only PACKET_HOST (neither loopback nor mcasts) * ... don't know why 1st test DOES NOT include 2nd (?) */ if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev == &loopback_dev || skb->sk)) { + || skb->dev == &init_net.loopback_dev || skb->sk)) { IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, ip_hdr(skb)->protocol, @@ -1062,6 +1074,10 @@ { int r; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_ctl.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_ctl.c 2007-12-21 15:36:15.000000000 -0500 @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -679,7 +680,7 @@ conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ - if (inet_addr_type(udest->addr) == RTN_LOCAL) { + if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | IP_VS_CONN_F_LOCALNODE; } @@ -731,7 +732,7 @@ EnterFunction(2); - atype = inet_addr_type(udest->addr); + atype = inet_addr_type(&init_net, udest->addr); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; @@ -1932,6 +1933,9 @@ struct ip_vs_service *svc; struct ip_vs_dest_user *udest; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2196,6 +2200,9 @@ unsigned char arg[128]; int ret = 0; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2356,8 +2363,8 @@ return ret; } - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); sysctl_header = register_sysctl_table(vs_root_table); @@ -2390,8 +2397,8 @@ cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); - proc_net_remove("ip_vs_stats"); - proc_net_remove("ip_vs"); + proc_net_remove(&init_net, "ip_vs_stats"); + proc_net_remove(&init_net, "ip_vs"); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_lblcr.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_lblcr.c 2007-12-21 15:36:15.000000000 -0500 @@ -843,7 +843,7 @@ INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_table(lblcr_root_table); #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); + proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); #endif return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); } @@ -852,7 +852,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_remove("ip_vs_lblcr"); + proc_net_remove(&init_net, "ip_vs_lblcr"); #endif unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_sync.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_sync.c 2007-12-21 15:36:15.000000000 -0500 @@ -387,7 +387,7 @@ struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -412,7 +412,7 @@ int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - @@ -423,7 +423,7 @@ IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) return -ENODEV; sync_recv_mesg_maxlen = dev->mtu - @@ -451,7 +451,7 @@ memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -472,7 +472,7 @@ __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_xmit.c --- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_xmit.c 2007-12-21 15:36:15.000000000 -0500 @@ -70,6 +70,7 @@ if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos, 0))) { struct flowi fl = { + .fl_net = &init_net, .oif = 0, .nl_u = { .ip4_u = { @@ -93,6 +94,7 @@ spin_unlock(&dest->dst_lock); } else { struct flowi fl = { + .fl_net = &init_net, .oif = 0, .nl_u = { .ip4_u = { @@ -160,6 +162,7 @@ u8 tos = iph->tos; int mtu; struct flowi fl = { + .fl_net = &init_net, .oif = 0, .nl_u = { .ip4_u = { diff -Nurb linux-2.6.22-570/net/ipv4/multipath.c linux-2.6.22-591/net/ipv4/multipath.c --- linux-2.6.22-570/net/ipv4/multipath.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/multipath.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,55 +0,0 @@ -/* multipath.c: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#include -#include -#include -#include - -#include - -static DEFINE_SPINLOCK(alg_table_lock); -struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1]; - -int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - int err; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX || - !ops->mp_alg_select_route) - return -EINVAL; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot != NULL) { - err = -EBUSY; - } else { - *slot = ops; - err = 0; - } - spin_unlock(&alg_table_lock); - - return err; -} -EXPORT_SYMBOL(multipath_alg_register); - -void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX) - return; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot == ops) - *slot = NULL; - spin_unlock(&alg_table_lock); - - synchronize_net(); -} -EXPORT_SYMBOL(multipath_alg_unregister); diff -Nurb linux-2.6.22-570/net/ipv4/multipath_drr.c linux-2.6.22-591/net/ipv4/multipath_drr.c --- linux-2.6.22-570/net/ipv4/multipath_drr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/multipath_drr.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,249 +0,0 @@ -/* - * Device round robin policy for multipath. - * - * - * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct multipath_device { - int ifi; /* interface index of device */ - atomic_t usecount; - int allocated; -}; - -#define MULTIPATH_MAX_DEVICECANDIDATES 10 - -static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; -static DEFINE_SPINLOCK(state_lock); - -static int inline __multipath_findslot(void) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated == 0) - return i; - } - return -1; -} - -static int inline __multipath_finddev(int ifindex) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated != 0 && - state[i].ifi == ifindex) - return i; - } - return -1; -} - -static int drr_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - int devidx; - - switch (event) { - case NETDEV_UNREGISTER: - case NETDEV_DOWN: - spin_lock_bh(&state_lock); - - devidx = __multipath_finddev(dev->ifindex); - if (devidx != -1) { - state[devidx].allocated = 0; - state[devidx].ifi = 0; - atomic_set(&state[devidx].usecount, 0); - } - - spin_unlock_bh(&state_lock); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block drr_dev_notifier = { - .notifier_call = drr_dev_event, -}; - - -static void drr_safe_inc(atomic_t *usecount) -{ - int n; - - atomic_inc(usecount); - - n = atomic_read(usecount); - if (n <= 0) { - int i; - - spin_lock_bh(&state_lock); - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) - atomic_set(&state[i].usecount, 0); - - spin_unlock_bh(&state_lock); - } -} - -static void drr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *cur_min; - int min_usecount = -1; - int devidx = -1; - int cur_min_devidx = -1; - - /* 1. make sure all alt. nexthops have the same GC related data */ - /* 2. determine the new candidate to be returned */ - result = NULL; - cur_min = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - int nh_ifidx = nh->u.dst.dev->ifindex; - - nh->u.dst.lastuse = jiffies; - nh->u.dst.__use++; - if (result != NULL) - continue; - - /* search for the output interface */ - - /* this is not SMP safe, only add/remove are - * SMP safe as wrong usecount updates have no big - * impact - */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add the interface to the array - * SMP safe - */ - spin_lock_bh(&state_lock); - - /* due to SMP: search again */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add entry for device */ - devidx = __multipath_findslot(); - if (devidx == -1) { - /* unlikely but possible */ - continue; - } - - state[devidx].allocated = 1; - state[devidx].ifi = nh_ifidx; - atomic_set(&state[devidx].usecount, 0); - min_usecount = 0; - } - - spin_unlock_bh(&state_lock); - } - - if (min_usecount == 0) { - /* if the device has not been used it is - * the primary target - */ - drr_safe_inc(&state[devidx].usecount); - result = nh; - } else { - int count = - atomic_read(&state[devidx].usecount); - - if (min_usecount == -1 || - count < min_usecount) { - cur_min = nh; - cur_min_devidx = devidx; - min_usecount = count; - } - } - } - } - - if (!result) { - if (cur_min) { - drr_safe_inc(&state[cur_min_devidx].usecount); - result = cur_min; - } else { - result = first; - } - } - - *rp = result; -} - -static struct ip_mp_alg_ops drr_ops = { - .mp_alg_select_route = drr_select_route, -}; - -static int __init drr_init(void) -{ - int err = register_netdevice_notifier(&drr_dev_notifier); - - if (err) - return err; - - err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); - if (err) - goto fail; - - return 0; - -fail: - unregister_netdevice_notifier(&drr_dev_notifier); - return err; -} - -static void __exit drr_exit(void) -{ - unregister_netdevice_notifier(&drr_dev_notifier); - multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); -} - -module_init(drr_init); -module_exit(drr_exit); -MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/net/ipv4/multipath_random.c linux-2.6.22-591/net/ipv4/multipath_random.c --- linux-2.6.22-570/net/ipv4/multipath_random.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/multipath_random.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,114 +0,0 @@ -/* - * Random policy for multipath. - * - * - * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MULTIPATH_MAX_CANDIDATES 40 - -static void random_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - unsigned char candidate_count = 0; - - /* count all candidate */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) - ++candidate_count; - } - - /* choose a random candidate */ - decision = first; - if (candidate_count > 1) { - unsigned char i = 0; - unsigned char candidate_no = (unsigned char) - (random32() % candidate_count); - - /* find chosen candidate and adjust GC data for all candidates - * to ensure they stay in cache - */ - for (rt = first; rt; rt = rt->u.dst.rt_next) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - rt->u.dst.lastuse = jiffies; - - if (i == candidate_no) - decision = rt; - - if (i >= candidate_count) - break; - - i++; - } - } - } - - decision->u.dst.__use++; - *rp = decision; -} - -static struct ip_mp_alg_ops random_ops = { - .mp_alg_select_route = random_select_route, -}; - -static int __init random_init(void) -{ - return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM); -} - -static void __exit random_exit(void) -{ - multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM); -} - -module_init(random_init); -module_exit(random_exit); -MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/net/ipv4/multipath_rr.c linux-2.6.22-591/net/ipv4/multipath_rr.c --- linux-2.6.22-570/net/ipv4/multipath_rr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/multipath_rr.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,95 +0,0 @@ -/* - * Round robin policy for multipath. - * - * - * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void rr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *min_use_cand = NULL; - int min_use = -1; - - /* 1. make sure all alt. nexthops have the same GC related data - * 2. determine the new candidate to be returned - */ - result = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - nh->u.dst.lastuse = jiffies; - - if (min_use == -1 || nh->u.dst.__use < min_use) { - min_use = nh->u.dst.__use; - min_use_cand = nh; - } - } - } - result = min_use_cand; - if (!result) - result = first; - - result->u.dst.__use++; - *rp = result; -} - -static struct ip_mp_alg_ops rr_ops = { - .mp_alg_select_route = rr_select_route, -}; - -static int __init rr_init(void) -{ - return multipath_alg_register(&rr_ops, IP_MP_ALG_RR); -} - -static void __exit rr_exit(void) -{ - multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR); -} - -module_init(rr_init); -module_exit(rr_exit); -MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/net/ipv4/multipath_wrandom.c linux-2.6.22-591/net/ipv4/multipath_wrandom.c --- linux-2.6.22-570/net/ipv4/multipath_wrandom.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/multipath_wrandom.c 1969-12-31 19:00:00.000000000 -0500 @@ -1,329 +0,0 @@ -/* - * Weighted random policy for multipath. - * - * - * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ - * - * Authors: Einar Lueck - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MULTIPATH_STATE_SIZE 15 - -struct multipath_candidate { - struct multipath_candidate *next; - int power; - struct rtable *rt; -}; - -struct multipath_dest { - struct list_head list; - - const struct fib_nh *nh_info; - __be32 netmask; - __be32 network; - unsigned char prefixlen; - - struct rcu_head rcu; -}; - -struct multipath_bucket { - struct list_head head; - spinlock_t lock; -}; - -struct multipath_route { - struct list_head list; - - int oif; - __be32 gw; - struct list_head dests; - - struct rcu_head rcu; -}; - -/* state: primarily weight per route information */ -static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; - -static unsigned char __multipath_lookup_weight(const struct flowi *fl, - const struct rtable *rt) -{ - const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; - struct multipath_route *r; - struct multipath_route *target_route = NULL; - struct multipath_dest *d; - int weight = 1; - - /* lookup the weight information for a certain route */ - rcu_read_lock(); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == rt->rt_gateway && - r->oif == rt->idev->dev->ifindex) { - target_route = r; - break; - } - } - - if (!target_route) { - /* this should not happen... but we are prepared */ - printk( KERN_CRIT"%s: missing state for gateway: %u and " \ - "device %d\n", __FUNCTION__, rt->rt_gateway, - rt->idev->dev->ifindex); - goto out; - } - - /* find state entry for destination */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - __be32 targetnetwork = fl->fl4_dst & - inet_make_mask(d->prefixlen); - - if ((targetnetwork & d->netmask) == d->network) { - weight = d->nh_info->nh_weight; - goto out; - } - } - -out: - rcu_read_unlock(); - return weight; -} - -static void wrandom_init_state(void) -{ - int i; - - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - INIT_LIST_HEAD(&state[i].head); - spin_lock_init(&state[i].lock); - } -} - -static void wrandom_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - struct multipath_candidate *first_mpc = NULL; - struct multipath_candidate *mpc, *last_mpc = NULL; - int power = 0; - int last_power; - int selector; - const size_t size_mpc = sizeof(struct multipath_candidate); - - /* collect all candidates and identify their weights */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - struct multipath_candidate* mpc = - (struct multipath_candidate*) - kmalloc(size_mpc, GFP_ATOMIC); - - if (!mpc) - return; - - power += __multipath_lookup_weight(flp, rt) * 10000; - - mpc->power = power; - mpc->rt = rt; - mpc->next = NULL; - - if (!first_mpc) - first_mpc = mpc; - else - last_mpc->next = mpc; - - last_mpc = mpc; - } - } - - /* choose a weighted random candidate */ - decision = first; - selector = random32() % power; - last_power = 0; - - /* select candidate, adjust GC data and cleanup local state */ - decision = first; - last_mpc = NULL; - for (mpc = first_mpc; mpc; mpc = mpc->next) { - mpc->rt->u.dst.lastuse = jiffies; - if (last_power <= selector && selector < mpc->power) - decision = mpc->rt; - - last_power = mpc->power; - kfree(last_mpc); - last_mpc = mpc; - } - - /* concurrent __multipath_flush may lead to !last_mpc */ - kfree(last_mpc); - - decision->u.dst.__use++; - *rp = decision; -} - -static void wrandom_set_nhinfo(__be32 network, - __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ - const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; - struct multipath_route *r, *target_route = NULL; - struct multipath_dest *d, *target_dest = NULL; - - /* store the weight information for a certain route */ - spin_lock_bh(&state[state_idx].lock); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { - target_route = r; - break; - } - } - - if (!target_route) { - const size_t size_rt = sizeof(struct multipath_route); - target_route = (struct multipath_route *) - kmalloc(size_rt, GFP_ATOMIC); - - target_route->gw = nh->nh_gw; - target_route->oif = nh->nh_oif; - memset(&target_route->rcu, 0, sizeof(struct rcu_head)); - INIT_LIST_HEAD(&target_route->dests); - - list_add_rcu(&target_route->list, &state[state_idx].head); - } - - /* find state entry for destination or add one if necessary */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - if (d->nh_info == nh) { - target_dest = d; - break; - } - } - - if (!target_dest) { - const size_t size_dst = sizeof(struct multipath_dest); - target_dest = (struct multipath_dest*) - kmalloc(size_dst, GFP_ATOMIC); - - target_dest->nh_info = nh; - target_dest->network = network; - target_dest->netmask = netmask; - target_dest->prefixlen = prefixlen; - memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); - - list_add_rcu(&target_dest->list, &target_route->dests); - } - /* else: we already stored this info for another destination => - * we are finished - */ - - spin_unlock_bh(&state[state_idx].lock); -} - -static void __multipath_free(struct rcu_head *head) -{ - struct multipath_route *rt = container_of(head, struct multipath_route, - rcu); - kfree(rt); -} - -static void __multipath_free_dst(struct rcu_head *head) -{ - struct multipath_dest *dst = container_of(head, - struct multipath_dest, - rcu); - kfree(dst); -} - -static void wrandom_flush(void) -{ - int i; - - /* defere delete to all entries */ - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - struct multipath_route *r; - - spin_lock_bh(&state[i].lock); - list_for_each_entry_rcu(r, &state[i].head, list) { - struct multipath_dest *d; - list_for_each_entry_rcu(d, &r->dests, list) { - list_del_rcu(&d->list); - call_rcu(&d->rcu, - __multipath_free_dst); - } - list_del_rcu(&r->list); - call_rcu(&r->rcu, - __multipath_free); - } - - spin_unlock_bh(&state[i].lock); - } -} - -static struct ip_mp_alg_ops wrandom_ops = { - .mp_alg_select_route = wrandom_select_route, - .mp_alg_flush = wrandom_flush, - .mp_alg_set_nhinfo = wrandom_set_nhinfo, -}; - -static int __init wrandom_init(void) -{ - wrandom_init_state(); - - return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -static void __exit wrandom_exit(void) -{ - multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -module_init(wrandom_init); -module_exit(wrandom_exit); -MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c linux-2.6.22-591/net/ipv4/netfilter/arp_tables.c --- linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/arp_tables.c 2007-12-21 15:36:15.000000000 -0500 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -773,7 +774,7 @@ int ret; struct arpt_table *t; - t = xt_find_table_lock(NF_ARP, entries->name); + t = xt_find_table_lock(&init_net, NF_ARP, entries->name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -843,7 +844,7 @@ duprintf("arp_tables: Translated table\n"); - t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name), + t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, tmp.name), "arptable_%s", tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -936,7 +937,7 @@ goto free; } - t = xt_find_table_lock(NF_ARP, tmp.name); + t = xt_find_table_lock(&init_net, NF_ARP, tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -971,6 +972,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -995,6 +999,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1016,7 +1023,7 @@ } name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -1116,7 +1123,7 @@ return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); + ret = xt_register_table(&init_net, table, &bootstrap, newinfo); if (ret != 0) { xt_free_table_info(newinfo); return ret; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c linux-2.6.22-591/net/ipv4/netfilter/arptable_filter.c --- linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/arptable_filter.c 2007-12-21 15:36:15.000000000 -0500 @@ -61,6 +61,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return arpt_do_table(pskb, hook, in, out, &packet_filter); } diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c linux-2.6.22-591/net/ipv4/netfilter/ip_queue.c --- linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ip_queue.c 2007-12-21 15:36:15.000000000 -0500 @@ -26,6 +26,7 @@ #include #include #include +#include #define IPQ_QMAX_DEFAULT 1024 #define IPQ_PROC_FS_NAME "ip_queue" @@ -556,6 +557,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -575,7 +579,7 @@ if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -667,14 +671,14 @@ struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - NULL, THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, + ipq_rcv_sk, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -695,8 +699,7 @@ cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); - + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); @@ -715,7 +718,7 @@ unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c linux-2.6.22-591/net/ipv4/netfilter/ip_tables.c --- linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ip_tables.c 2007-12-21 15:36:15.000000000 -0500 @@ -1039,7 +1039,7 @@ } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1059,7 +1059,7 @@ if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (t && !IS_ERR(t)) { struct ipt_getinfo info; @@ -1099,7 +1099,7 @@ } static int -get_entries(struct ipt_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) { int ret; struct ipt_get_entries get; @@ -1119,7 +1119,7 @@ return -EINVAL; } - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -1142,7 +1142,7 @@ } static int -__do_replace(const char *name, unsigned int valid_hooks, +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { @@ -1159,7 +1159,7 @@ goto out; } - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1211,7 +1211,7 @@ } static int -do_replace(void __user *user, unsigned int len) +do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct ipt_replace tmp; @@ -1252,7 +1252,7 @@ duprintf("ip_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1289,7 +1289,7 @@ } static int -do_add_counters(void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { unsigned int i; struct xt_counters_info tmp; @@ -1341,7 +1341,7 @@ goto free; } - t = xt_find_table_lock(AF_INET, name); + t = xt_find_table_lock(net, AF_INET, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1745,7 +1745,7 @@ } static int -compat_do_replace(void __user *user, unsigned int len) +compat_do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct compat_ipt_replace tmp; @@ -1786,7 +1786,7 @@ duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1811,11 +1811,11 @@ switch (cmd) { case IPT_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sk->sk_net, user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sk->sk_net, user, len, 1); break; default: @@ -1904,7 +1904,7 @@ } static int -compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len) { int ret; struct compat_ipt_get_entries get; @@ -1928,7 +1928,7 @@ } xt_compat_lock(AF_INET); - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1966,10 +1966,10 @@ switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sk->sk_net, user, len, 1); break; case IPT_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sk->sk_net, user, len); break; default: ret = do_ipt_get_ctl(sk, cmd, user, len); @@ -1988,11 +1988,11 @@ switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sk->sk_net, user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sk->sk_net, user, len, 0); break; default: @@ -2013,11 +2013,11 @@ switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sk->sk_net, user, len, 0); break; case IPT_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sk->sk_net, user, len); break; case IPT_SO_GET_REVISION_MATCH: @@ -2054,7 +2054,7 @@ return ret; } -int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) +int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -2082,7 +2082,7 @@ return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); + ret = xt_register_table(net, table, &bootstrap, newinfo); if (ret != 0) { xt_free_table_info(newinfo); return ret; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c linux-2.6.22-591/net/ipv4/netfilter/ipt_CLUSTERIP.c --- linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-12-21 15:36:15.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #define CLUSTERIP_VERSION "0.8" @@ -427,7 +428,7 @@ return 0; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(&init_net, e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); return 0; @@ -523,6 +524,10 @@ struct arp_payload *payload; struct clusterip_config *c; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || arp->ar_pro != htons(ETH_P_IP) @@ -735,7 +740,7 @@ goto cleanup_target; #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); + clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); if (!clusterip_procdir) { printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); ret = -ENOMEM; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.22-591/net/ipv4/netfilter/ipt_MASQUERADE.c --- linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-12-21 15:36:15.000000000 -0500 @@ -131,6 +131,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for conntracks which were associated with that device, diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.22-591/net/ipv4/netfilter/ipt_REJECT.c --- linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ipt_REJECT.c 2007-12-21 15:36:15.000000000 -0500 @@ -137,7 +137,7 @@ ) addr_type = RTN_LOCAL; - if (ip_route_me_harder(&nskb, addr_type)) + if (ip_route_me_harder(&init_net, &nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.22-591/net/ipv4/netfilter/ipt_ULOG.c --- linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ipt_ULOG.c 2007-12-21 15:36:15.000000000 -0500 @@ -419,7 +419,8 @@ for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c linux-2.6.22-591/net/ipv4/netfilter/ipt_addrtype.c --- linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ipt_addrtype.c 2007-12-21 15:36:15.000000000 -0500 @@ -24,7 +24,7 @@ static inline int match_type(__be32 addr, u_int16_t mask) { - return !!(mask & (1 << inet_addr_type(addr))); + return !!(mask & (1 << inet_addr_type(&init_net, addr))); } static int match(const struct sk_buff *skb, diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c linux-2.6.22-591/net/ipv4/netfilter/ipt_recent.c --- linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/ipt_recent.c 2007-12-21 15:36:15.000000000 -0500 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -485,7 +486,7 @@ #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", proc_net); + proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); if (proc_dir == NULL) { xt_unregister_match(&recent_match); err = -ENOMEM; @@ -499,7 +500,7 @@ BUG_ON(!list_empty(&tables)); xt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_recent", proc_net); + remove_proc_entry("ipt_recent", init_net.proc_net); #endif } diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c linux-2.6.22-591/net/ipv4/netfilter/iptable_filter.c --- linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/iptable_filter.c 2007-12-21 15:36:15.000000000 -0500 @@ -26,7 +26,7 @@ struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata = { +} initial_table = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -51,7 +51,7 @@ .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static struct xt_table ip_packet_filter_dflt = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -67,7 +67,9 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter); + struct net *net = (in?in:out)->nd_net; + + return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter); } static unsigned int @@ -77,6 +79,8 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net *net = (in?in:out)->nd_net; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { @@ -86,7 +90,7 @@ return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter); } static struct nf_hook_ops ipt_ops[] = { @@ -117,6 +121,30 @@ static int forward = NF_ACCEPT; module_param(forward, bool, 0000); +static int iptable_filter_net_init(struct net *net) +{ + /* Allocate the table */ + net->ip_packet_filter = kmemdup(&ip_packet_filter_dflt, + sizeof(*net->ip_packet_filter), + GFP_KERNEL); + if (!net->ip_packet_filter) + return -ENOMEM; + + /* Register table */ + return ipt_register_table(net, net->ip_packet_filter, &initial_table.repl); +} + +static void iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table(net->ip_packet_filter); + kfree(net->ip_packet_filter); +} + +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .exit = iptable_filter_net_exit, +}; + static int __init iptable_filter_init(void) { int ret; @@ -130,7 +158,7 @@ initial_table.entries[1].target.verdict = -forward - 1; /* Register table */ - ret = ipt_register_table(&packet_filter, &initial_table.repl); + ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; @@ -142,14 +170,14 @@ return ret; cleanup_table: - ipt_unregister_table(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); return ret; } static void __exit iptable_filter_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); } module_init(iptable_filter_init); diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c linux-2.6.22-591/net/ipv4/netfilter/iptable_mangle.c --- linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/iptable_mangle.c 2007-12-21 15:36:15.000000000 -0500 @@ -80,6 +80,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } @@ -96,6 +100,10 @@ __be32 saddr, daddr; u_int32_t mark; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { @@ -121,7 +129,7 @@ iph->daddr != daddr || (*pskb)->mark != mark || iph->tos != tos) - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC)) ret = NF_DROP; } @@ -171,7 +179,7 @@ int ret; /* Register table */ - ret = ipt_register_table(&packet_mangler, &initial_table.repl); + ret = ipt_register_table(&init_net, &packet_mangler, &initial_table.repl); if (ret < 0) return ret; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c linux-2.6.22-591/net/ipv4/netfilter/iptable_raw.c --- linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/iptable_raw.c 2007-12-21 15:36:15.000000000 -0500 @@ -52,6 +52,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ipt_do_table(pskb, hook, in, out, &packet_raw); } @@ -96,7 +100,7 @@ int ret; /* Register table */ - ret = ipt_register_table(&packet_raw, &initial_table.repl); + ret = ipt_register_table(&init_net, &packet_raw, &initial_table.repl); if (ret < 0) return ret; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c --- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-12-21 15:36:15.000000000 -0500 @@ -120,6 +120,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(pskb); } @@ -135,6 +139,10 @@ struct nf_conn_help *help; struct nf_conntrack_helper *helper; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) @@ -157,6 +165,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if ((*pskb)->nfct) @@ -180,6 +192,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return nf_conntrack_in(PF_INET, hooknum, pskb); } @@ -189,6 +205,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { @@ -325,6 +345,9 @@ struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + NF_CT_TUPLE_U_BLANK(&tuple); tuple.src.u3.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c --- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-12-21 15:36:15.000000000 -0500 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -378,16 +379,16 @@ { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto err3; @@ -397,16 +398,16 @@ return 0; err3: - proc_net_remove("ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack_expect"); err2: - proc_net_remove("ip_conntrack"); + proc_net_remove(&init_net, "ip_conntrack"); err1: return -ENOMEM; } void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", proc_net_stat); - proc_net_remove("ip_conntrack_expect"); - proc_net_remove("ip_conntrack"); + remove_proc_entry("ip_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack"); } diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_helper.c --- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_helper.c 2007-12-21 15:36:12.000000000 -0500 @@ -178,7 +178,7 @@ datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_ALL_CSUM) { + (*pskb)->dev->features & NETIF_F_V4_CSUM) { (*pskb)->ip_summed = CHECKSUM_PARTIAL; (*pskb)->csum_start = skb_headroom(*pskb) + skb_network_offset(*pskb) + @@ -265,7 +265,7 @@ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_ALL_CSUM) { + (*pskb)->dev->features & NETIF_F_V4_CSUM) { (*pskb)->ip_summed = CHECKSUM_PARTIAL; (*pskb)->csum_start = skb_headroom(*pskb) + skb_network_offset(*pskb) + diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_rule.c --- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_rule.c 2007-12-21 15:36:15.000000000 -0500 @@ -98,7 +98,10 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) { static int warned = 0; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = dstip } } + }; struct rtable *rt; if (ip_route_output_key(&rt, &fl) != 0) @@ -252,7 +255,7 @@ { int ret; - ret = ipt_register_table(&nat_table, &nat_initial_table.repl); + ret = ipt_register_table(&init_net, &nat_table, &nat_initial_table.repl); if (ret != 0) return ret; ret = xt_register_target(&ipt_snat_reg); diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_standalone.c --- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_standalone.c 2007-12-21 15:36:15.000000000 -0500 @@ -83,6 +83,10 @@ /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); @@ -172,6 +176,10 @@ unsigned int ret; __be32 daddr = ip_hdr(*pskb)->daddr; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(*pskb)->daddr) { @@ -194,6 +202,10 @@ #endif unsigned int ret; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) @@ -227,6 +239,10 @@ enum ip_conntrack_info ctinfo; unsigned int ret; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) @@ -239,7 +255,7 @@ if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC)) ret = NF_DROP; } #ifdef CONFIG_XFRM @@ -262,6 +278,10 @@ struct nf_conn *ct; enum ip_conntrack_info ctinfo; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + ct = nf_ct_get(*pskb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { DEBUGP("nf_nat_standalone: adjusting sequence number\n"); diff -Nurb linux-2.6.22-570/net/ipv4/netfilter.c linux-2.6.22-591/net/ipv4/netfilter.c --- linux-2.6.22-570/net/ipv4/netfilter.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/netfilter.c 2007-12-21 15:36:15.000000000 -0500 @@ -8,7 +8,7 @@ #include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) +int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type) { const struct iphdr *iph = ip_hdr(*pskb); struct rtable *rt; @@ -17,7 +17,8 @@ unsigned int hh_len; unsigned int type; - type = inet_addr_type(iph->saddr); + fl.fl_net = net; + type = inet_addr_type(net, iph->saddr); if (addr_type == RTN_UNSPEC) addr_type = type; @@ -155,12 +156,13 @@ const struct ip_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP_LOCAL_OUT) { + struct net *net = (info->indev?info->indev:info->outdev)->nd_net; const struct iphdr *iph = ip_hdr(*pskb); if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb, RTN_UNSPEC); + return ip_route_me_harder(net, pskb, RTN_UNSPEC); } return 0; } diff -Nurb linux-2.6.22-570/net/ipv4/proc.c linux-2.6.22-591/net/ipv4/proc.c --- linux-2.6.22-570/net/ipv4/proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -44,6 +44,7 @@ #include #include #include +#include static int fold_prot_inuse(struct proto *proto) { @@ -69,8 +70,9 @@ seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, - atomic_read(&ip_frag_mem)); + seq_printf(seq, "FRAG: inuse %d memory %d\n", + init_net.ip_frag_nqueues, + atomic_read(&init_net.ip_frag_mem)); return 0; } @@ -260,7 +262,8 @@ seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); + IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2, + init_net.sysctl_ip_default_ttl); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", @@ -380,20 +383,20 @@ { int rc = 0; - if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) goto out_snmp; - if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops)) goto out_sockstat; out: return rc; out_sockstat: - proc_net_remove("snmp"); + proc_net_remove(&init_net, "snmp"); out_snmp: - proc_net_remove("netstat"); + proc_net_remove(&init_net, "netstat"); out_netstat: rc = -ENOMEM; goto out; diff -Nurb linux-2.6.22-570/net/ipv4/raw.c linux-2.6.22-591/net/ipv4/raw.c --- linux-2.6.22-570/net/ipv4/raw.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/raw.c 2007-12-21 15:36:15.000000000 -0500 @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -101,7 +102,7 @@ write_unlock_bh(&raw_v4_lock); } -struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int tag) { @@ -110,6 +111,9 @@ sk_for_each_from(sk, node) { struct inet_sock *inet = inet_sk(sk); + if (sk->sk_net != net) + continue; + if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) && @@ -152,6 +156,7 @@ */ int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { + struct net *net = skb->dev->nd_net; struct sock *sk; struct hlist_head *head; int delivered = 0; @@ -160,7 +165,7 @@ head = &raw_v4_htable[hash]; if (hlist_empty(head)) goto out; - sk = __raw_v4_lookup(__sk_head(head), iph->protocol, + sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex, skb->skb_tag); @@ -173,7 +178,7 @@ if (clone) raw_rcv(sk, clone); } - sk = __raw_v4_lookup(sk_next(sk), iph->protocol, + sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex, skb->skb_tag); } @@ -484,7 +489,8 @@ } { - struct flowi fl = { .oif = ipc.oif, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = ipc.oif, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, @@ -574,7 +580,7 @@ if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; v4_map_sock_addr(inet, addr, &nsa); - chk_addr_ret = inet_addr_type(nsa.saddr); + chk_addr_ret = inet_addr_type(sk->sk_net, nsa.saddr); ret = -EADDRNOTAVAIL; if (nsa.saddr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -798,6 +804,7 @@ #ifdef CONFIG_PROC_FS struct raw_iter_state { + struct net *net; int bucket; }; @@ -811,11 +818,14 @@ for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; - sk_for_each(sk, node, &raw_v4_htable[state->bucket]) + sk_for_each(sk, node, &raw_v4_htable[state->bucket]) { + if (sk->sk_net != state->net) + continue; if (sk->sk_family == PF_INET && nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) goto found; } + } sk = NULL; found: return sk; @@ -830,7 +840,7 @@ try_again: ; } while (sk && (sk->sk_family != PF_INET || - !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))); + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT) || (sk->sk_net != state->net))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); @@ -933,6 +943,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -940,23 +951,46 @@ goto out; } +static int raw_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct raw_iter_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + static const struct file_operations raw_seq_fops = { .owner = THIS_MODULE, .open = raw_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = raw_seq_release, }; -int __init raw_proc_init(void) +static int raw_proc_net_init(struct net *net) { - if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops)) + if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops)) return -ENOMEM; return 0; } +static void raw_proc_net_exit(struct net *net) +{ + proc_net_remove(net, "raw"); +} + +static struct pernet_operations raw_proc_net_ops = { + .init = raw_proc_net_init, + .exit = raw_proc_net_exit, +}; + +int __init raw_proc_init(void) +{ + return register_pernet_subsys(&raw_proc_net_ops); +} + void __init raw_proc_exit(void) { - proc_net_remove("raw"); + unregister_pernet_subsys(&raw_proc_net_ops); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/ipv4/route.c linux-2.6.22-591/net/ipv4/route.c --- linux-2.6.22-570/net/ipv4/route.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/route.c 2007-12-21 15:36:15.000000000 -0500 @@ -101,8 +101,8 @@ #include #include #include -#include #include +#include #include #ifdef CONFIG_SYSCTL #include @@ -266,6 +266,7 @@ #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { + struct net *net; int bucket; }; @@ -334,6 +335,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) { + struct rt_cache_iter_state *st = seq->private; if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" @@ -343,6 +345,9 @@ struct rtable *r = v; char temp[256]; + if (r->fl.fl_net != st->net) + return 0; + sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X", r->u.dst.dev ? r->u.dst.dev->name : "*", @@ -385,6 +390,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -392,12 +398,20 @@ goto out; } +static int rt_cache_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct rt_cache_iter_state *st = seq->private; + put_net(st->net); + return seq_release_private(inode, file); +} + static const struct file_operations rt_cache_seq_fops = { .owner = THIS_MODULE, .open = rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = rt_cache_seq_release, }; @@ -495,13 +509,11 @@ static __inline__ void rt_free(struct rtable *rt) { - multipath_remove(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } static __inline__ void rt_drop(struct rtable *rt) { - multipath_remove(rt); ip_rt_put(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } @@ -565,61 +577,16 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + return (((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | (fl1->mark ^ fl2->mark) | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + (fl1->iif ^ fl2->iif)) == 0) && + fl1->fl_net == fl2->fl_net; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, - struct rtable *expentry, - int *removed_count) -{ - int passedexpired = 0; - struct rtable **nextstep = NULL; - struct rtable **rthp = chain_head; - struct rtable *rth; - - if (removed_count) - *removed_count = 0; - - while ((rth = *rthp) != NULL) { - if (rth == expentry) - passedexpired = 1; - - if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && - compare_keys(&(*rthp)->fl, &expentry->fl)) { - if (*rthp == expentry) { - *rthp = rth->u.dst.rt_next; - continue; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - if (removed_count) - ++(*removed_count); - } - } else { - if (!((*rthp)->u.dst.flags & DST_BALANCED) && - passedexpired && !nextstep) - nextstep = &rth->u.dst.rt_next; - - rthp = &rth->u.dst.rt_next; - } - } - - rt_free(expentry); - if (removed_count) - ++(*removed_count); - - return nextstep; -} -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - - /* This runs via a timer and thus is always in BH context. */ static void rt_check_expire(unsigned long dummy) { @@ -658,23 +625,9 @@ } /* Cleanup aged off entries. */ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries if necessary */ - if (rth->u.dst.flags & DST_BALANCED) { - rthp = rt_remove_balanced_route( - &rt_hash_table[i].chain, - rth, NULL); - if (!rthp) - break; - } else { *rthp = rth->u.dst.rt_next; rt_free(rth); } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - *rthp = rth->u.dst.rt_next; - rt_free(rth); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - } spin_unlock(rt_hash_lock_addr(i)); /* Fallback loop breaker. */ @@ -721,9 +674,6 @@ if (delay < 0) delay = ip_rt_min_delay; - /* flush existing multipath state*/ - multipath_flush(); - spin_lock_bh(&rt_flush_lock); if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { @@ -842,31 +792,10 @@ rthp = &rth->u.dst.rt_next; continue; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries - * if necessary - */ - if (rth->u.dst.flags & DST_BALANCED) { - int r; - - rthp = rt_remove_balanced_route( - &rt_hash_table[k].chain, - rth, - &r); - goal -= r; - if (!rthp) - break; - } else { *rthp = rth->u.dst.rt_next; rt_free(rth); goal--; } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - *rthp = rth->u.dst.rt_next; - rt_free(rth); - goal--; -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - } spin_unlock_bh(rt_hash_lock_addr(k)); if (goal <= 0) break; @@ -939,12 +868,7 @@ spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (!(rth->u.dst.flags & DST_BALANCED) && - compare_keys(&rth->fl, &rt->fl)) { -#else if (compare_keys(&rth->fl, &rt->fl)) { -#endif /* Put it first */ *rthp = rth->u.dst.rt_next; /* @@ -1055,7 +979,7 @@ static DEFINE_SPINLOCK(rt_peer_lock); struct inet_peer *peer; - peer = inet_getpeer(rt->rt_dst, create); + peer = inet_getpeer(rt->fl.fl_net, rt->rt_dst, create); spin_lock_bh(&rt_peer_lock); if (rt->peer == NULL) { @@ -1148,7 +1072,7 @@ if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) goto reject_redirect; } else { - if (inet_addr_type(new_gw) != RTN_UNICAST) + if (inet_addr_type(dev->nd_net, new_gw) != RTN_UNICAST) goto reject_redirect; } @@ -1189,6 +1113,7 @@ /* Copy all the information. */ *rt = *rth; + hold_net(rt->fl.fl_net); INIT_RCU_HEAD(&rt->u.dst.rcu_head); rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); @@ -1407,7 +1332,7 @@ __be32 daddr = iph->daddr; unsigned short est_mtu = 0; - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { @@ -1489,6 +1414,7 @@ rt->idev = NULL; in_dev_put(idev); } + release_net(rt->fl.fl_net); } static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1496,8 +1422,9 @@ { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != &loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct net *net = dev->nd_net; + if (dev != &net->loopback_dev && idev && idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(&net->loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1584,7 +1511,7 @@ rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; + rt->u.dst.metrics[RTAX_HOPLIMIT-1] = init_net.sysctl_ip_default_ttl; if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) @@ -1605,6 +1532,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { + struct net *net = dev->nd_net; unsigned hash; struct rtable *rth; __be32 spec_dst; @@ -1638,6 +1566,7 @@ rth->u.dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; + rth->fl.fl_net = hold_net(net); rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1649,7 +1578,7 @@ #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &net->loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; @@ -1774,14 +1703,11 @@ atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; -#endif if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; + rth->fl.fl_net = hold_net(in_dev->dev->nd_net); rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1812,7 +1738,7 @@ return err; } -static inline int ip_mkroute_input_def(struct sk_buff *skb, +static inline int ip_mkroute_input(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, @@ -1837,63 +1763,6 @@ return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } -static inline int ip_mkroute_input(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL, *rtres; - unsigned char hop, hopcount; - int err = -EINVAL; - unsigned int hash; - - if (res->fi) - hopcount = res->fi->fib_nhs; - else - hopcount = 1; - - /* distinguish between multipath and singlepath */ - if (hopcount < 2) - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, - saddr, tos); - - /* add all alternatives to the routing cache */ - for (hop = 0; hop < hopcount; hop++) { - res->nh_sel = hop; - - /* put reference to previous result */ - if (hop) - ip_rt_put(rtres); - - /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, - &rth); - if (err) - return err; - - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); - err = rt_intern_hash(hash, rth, &rtres); - if (err) - return err; - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - } - skb->dst = &rtres->u.dst; - return err; -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -} - - /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -1907,9 +1776,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { + struct net *net = dev->nd_net; struct fib_result res; struct in_device *in_dev = in_dev_get(dev); - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = net, + .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, .tos = tos, @@ -1967,7 +1838,7 @@ if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, - loopback_dev.ifindex, + net->loopback_dev.ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; @@ -2023,6 +1894,7 @@ rth->u.dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; + rth->fl.fl_net = hold_net(net); rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2034,7 +1906,7 @@ #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &net->loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->rt_gateway = daddr; @@ -2092,6 +1964,7 @@ struct rtable * rth; unsigned hash; int iif = dev->ifindex; + struct net *net = dev->nd_net; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif); @@ -2104,7 +1977,8 @@ rth->fl.iif == iif && rth->fl.oif == 0 && rth->fl.mark == skb->mark && - rth->fl.fl4_tos == tos) { + rth->fl.fl4_tos == tos && + rth->fl.fl_net == net) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2211,18 +2085,12 @@ atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi) { - rth->rt_multipath_alg = res->fi->fib_mp_alg; - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; - } -#endif if (IN_DEV_CONF_GET(in_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; + rth->fl.fl_net = hold_net(oldflp->fl_net); rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; @@ -2277,7 +2145,7 @@ return err; } -static inline int ip_mkroute_output_def(struct rtable **rp, +static inline int ip_mkroute_output(struct rtable **rp, struct fib_result* res, const struct flowi *fl, const struct flowi *oldflp, @@ -2295,68 +2163,6 @@ return err; } -static inline int ip_mkroute_output(struct rtable** rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - unsigned char hop; - unsigned hash; - int err = -EINVAL; - struct rtable *rth = NULL; - - if (res->fi && res->fi->fib_nhs > 1) { - unsigned char hopcount = res->fi->fib_nhs; - - for (hop = 0; hop < hopcount; hop++) { - struct net_device *dev2nexthop; - - res->nh_sel = hop; - - /* hold a work reference to the output device */ - dev2nexthop = FIB_RES_DEV(*res); - dev_hold(dev2nexthop); - - /* put reference to previous result */ - if (hop) - ip_rt_put(*rp); - - err = __mkroute_output(&rth, res, fl, oldflp, - dev2nexthop, flags); - - if (err != 0) - goto cleanup; - - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, - oldflp->oif); - err = rt_intern_hash(hash, rth, rp); - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - cleanup: - /* release work reference to output device */ - dev_put(dev2nexthop); - - if (err != 0) - return err; - } - return err; - } else { - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, - flags); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); -#endif -} - /* * Major route resolver routine. */ @@ -2364,7 +2170,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .nl_u = { .ip4_u = + struct net *net = oldflp->fl_net; + struct flowi fl = { .fl_net = net, + .nl_u = { .ip4_u = { .daddr = oldflp->fl4_dst, .saddr = oldflp->fl4_src, .tos = tos & IPTOS_RT_MASK, @@ -2373,7 +2181,7 @@ RT_SCOPE_UNIVERSE), } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = net->loopback_dev.ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; @@ -2395,7 +2203,7 @@ goto out; /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(oldflp->fl4_src); + dev_out = ip_dev_find(net, oldflp->fl4_src); if (dev_out == NULL) goto out; @@ -2434,7 +2242,7 @@ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(net, oldflp->oif); err = -ENODEV; if (dev_out == NULL) goto out; @@ -2467,9 +2275,9 @@ fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &net->loopback_dev; dev_hold(dev_out); - fl.oif = loopback_dev.ifindex; + fl.oif = net->loopback_dev.ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2514,7 +2322,7 @@ fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &net->loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -2568,19 +2376,9 @@ rth->fl.iif == 0 && rth->fl.oif == flp->oif && rth->fl.mark == flp->mark && + rth->fl.fl_net == flp->fl_net && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { - - /* check for multipath routes and choose one if - * necessary - */ - if (multipath_select_route(flp, rth, rp)) { - dst_hold(&(*rp)->u.dst); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - return 0; - } - rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2729,10 +2527,6 @@ if (rt->u.dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) - NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); -#endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) @@ -2759,7 +2553,7 @@ __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && - IPV4_DEVCONF_ALL(MC_FORWARDING)) { + IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { @@ -2790,6 +2584,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; @@ -2828,7 +2623,7 @@ if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; @@ -2845,6 +2640,7 @@ err = -rt->u.dst.error; } else { struct flowi fl = { + .fl_net = net, .nl_u = { .ip4_u = { .daddr = dst, @@ -2869,7 +2665,7 @@ if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; @@ -3182,6 +2978,48 @@ } __setup("rhash_entries=", set_rhash_entries); + +static void ip_rt_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS +# ifdef CONFIG_NET_CLS_ROUTE + proc_net_remove(net, "rt_acct"); +# endif + remove_proc_entry("rt_cache", net->proc_net_stat); + proc_net_remove(net, "rt_cache"); +#endif + rt_run_flush(0); +} + +static int ip_rt_net_init(struct net *net) +{ + int error = -ENOMEM; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *rtstat_pde; + if (!proc_net_fops_create(net, "rt_cache", S_IRUGO, &rt_cache_seq_fops)) + goto out; + if (!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, + net->proc_net_stat))) + goto out; + rtstat_pde->proc_fops = &rt_cpu_seq_fops; +# ifdef CONFIG_NET_CLS_ROUTE + if (!create_proc_read_entry("rt_acct", 0, net->proc_net, + ip_rt_acct_read, NULL)) + goto out; +# endif +#endif + error = 0; +out: + if (error) + ip_rt_net_exit(net); + return error; +} + +struct pernet_operations ip_rt_net_ops = { + .init = ip_rt_net_init, + .exit = ip_rt_net_exit, +}; + int __init ip_rt_init(void) { int rc = 0; @@ -3245,20 +3083,7 @@ ip_rt_secret_interval; add_timer(&rt_secret_timer); -#ifdef CONFIG_PROC_FS - { - struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ - if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || - !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, - proc_net_stat))) { - return -ENOMEM; - } - rtstat_pde->proc_fops = &rt_cpu_seq_fops; - } -#ifdef CONFIG_NET_CLS_ROUTE - create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); -#endif -#endif + register_pernet_subsys(&ip_rt_net_ops); #ifdef CONFIG_XFRM xfrm_init(); xfrm4_init(); diff -Nurb linux-2.6.22-570/net/ipv4/syncookies.c linux-2.6.22-591/net/ipv4/syncookies.c --- linux-2.6.22-570/net/ipv4/syncookies.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/syncookies.c 2007-12-21 15:36:15.000000000 -0500 @@ -253,7 +253,8 @@ * no easy way to do this. */ { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : ireq->rmt_addr), diff -Nurb linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c linux-2.6.22-591/net/ipv4/sysctl_net_ipv4.c --- linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c 2007-12-21 15:36:02.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/sysctl_net_ipv4.c 2007-12-21 15:36:15.000000000 -0500 @@ -29,21 +29,21 @@ static int ip_local_port_range_max[] = { 65535, 65535 }; #endif -struct ipv4_config ipv4_config; - #ifdef CONFIG_SYSCTL static int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int val = IPV4_DEVCONF_ALL(FORWARDING); + struct net *net = ctl->extra2; + int *valp = ctl->data; + int old = *valp; int ret; ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && IPV4_DEVCONF_ALL(FORWARDING) != val) - inet_forward_change(); + if (write && *valp != old) + inet_forward_change(net); return ret; } @@ -53,6 +53,7 @@ void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { + struct net *net = table->extra2; int *valp = table->data; int new; @@ -85,7 +86,7 @@ } *valp = new; - inet_forward_change(); + inet_forward_change(net); return 1; } @@ -188,22 +189,6 @@ ctl_table ipv4_table[] = { { - .ctl_name = NET_IPV4_TCP_TIMESTAMPS, - .procname = "tcp_timestamps", - .data = &sysctl_tcp_timestamps, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, - .procname = "tcp_window_scaling", - .data = &sysctl_tcp_window_scaling, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_IPV4_TCP_SACK, .procname = "tcp_sack", .data = &sysctl_tcp_sack, @@ -220,40 +205,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_FORWARD, - .procname = "ip_forward", - .data = &IPV4_DEVCONF_ALL(FORWARDING), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_sysctl_forward, - .strategy = &ipv4_sysctl_forward_strategy - }, - { - .ctl_name = NET_IPV4_DEFAULT_TTL, - .procname = "ip_default_ttl", - .data = &sysctl_ip_default_ttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_NO_PMTU_DISC, - .procname = "ip_no_pmtu_disc", - .data = &ipv4_config.no_pmtu_disc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_NONLOCAL_BIND, - .procname = "ip_nonlocal_bind", - .data = &sysctl_ip_nonlocal_bind, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_IPV4_TCP_SYN_RETRIES, .procname = "tcp_syn_retries", .data = &sysctl_tcp_syn_retries, @@ -286,39 +237,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, - .procname = "ipfrag_high_thresh", - .data = &sysctl_ipfrag_high_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, - .procname = "ipfrag_low_thresh", - .data = &sysctl_ipfrag_low_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_DYNADDR, - .procname = "ip_dynaddr", - .data = &sysctl_ip_dynaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_IPFRAG_TIME, - .procname = "ipfrag_time", - .data = &sysctl_ipfrag_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, .procname = "tcp_keepalive_time", .data = &sysctl_tcp_keepalive_time, @@ -422,17 +340,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, - .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = ip_local_port_range_min, - .extra2 = ip_local_port_range_max - }, - { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, .procname = "icmp_echo_ignore_all", .data = &sysctl_icmp_echo_ignore_all, @@ -534,50 +441,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, - .procname = "inet_peer_threshold", - .data = &inet_peer_threshold, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_INET_PEER_MINTTL, - .procname = "inet_peer_minttl", - .data = &inet_peer_minttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_INET_PEER_MAXTTL, - .procname = "inet_peer_maxttl", - .data = &inet_peer_maxttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, - .procname = "inet_peer_gc_mintime", - .data = &inet_peer_gc_mintime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, - .procname = "inet_peer_gc_maxtime", - .data = &inet_peer_gc_maxtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { .ctl_name = NET_TCP_ORPHAN_RETRIES, .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, @@ -706,24 +569,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, - .procname = "ipfrag_secret_interval", - .data = &sysctl_ipfrag_secret_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, - .procname = "ipfrag_max_dist", - .data = &sysctl_ipfrag_max_dist, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &zero - }, - { .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", .data = &sysctl_tcp_nometrics_save, @@ -865,6 +710,181 @@ { .ctl_name = 0 } }; -#endif /* CONFIG_SYSCTL */ +struct ctl_table multi_ipv4_table[] = { + { + /* .data is filled in by devinet_net_init. + * As a consequence this table entry must be the first + * entry in multi_ipv4_table. + */ + .ctl_name = NET_IPV4_FORWARD, + .procname = "ip_forward", + .data = NULL, + .extra2 = &init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ipv4_sysctl_forward, + .strategy = &ipv4_sysctl_forward_strategy + }, + { + .ctl_name = NET_IPV4_DEFAULT_TTL, + .procname = "ip_default_ttl", + .data = &init_net.sysctl_ip_default_ttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ipv4_doint_and_flush, + .strategy = &ipv4_doint_and_flush_strategy, + }, + { + .ctl_name = NET_IPV4_NO_PMTU_DISC, + .procname = "ip_no_pmtu_disc", + .data = &init_net.sysctl_ipv4_no_pmtu_disc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_NONLOCAL_BIND, + .procname = "ip_nonlocal_bind", + .data = &init_net.sysctl_ip_nonlocal_bind, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, + .procname = "ip_local_port_range", + .data = &init_net.sysctl_local_port_range, + .maxlen = sizeof(init_net.sysctl_local_port_range), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = ip_local_port_range_min, + .extra2 = ip_local_port_range_max + }, + { + .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, + .procname = "ipfrag_high_thresh", + .data = &init_net.sysctl_ipfrag_high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, + .procname = "ipfrag_low_thresh", + .data = &init_net.sysctl_ipfrag_low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_IPFRAG_TIME, + .procname = "ipfrag_time", + .data = &init_net.sysctl_ipfrag_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, + .procname = "ipfrag_secret_interval", + .data = &init_net.sysctl_ipfrag_secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, + .procname = "ipfrag_max_dist", + .data = &init_net.sysctl_ipfrag_max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero + }, + { + .ctl_name = NET_IPV4_DYNADDR, + .procname = "ip_dynaddr", + .data = &init_net.sysctl_ip_dynaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, + .procname = "ip_local_port_range", + .data = &init_net.sysctl_local_port_range, + .maxlen = sizeof(init_net.sysctl_local_port_range), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = ip_local_port_range_min, + .extra2 = ip_local_port_range_max + }, + { + .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, + .procname = "inet_peer_threshold", + .data = &init_net.inet_peer_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_INET_PEER_MINTTL, + .procname = "inet_peer_minttl", + .data = &init_net.inet_peer_minttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_INET_PEER_MAXTTL, + .procname = "inet_peer_maxttl", + .data = &init_net.inet_peer_maxttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, + .procname = "inet_peer_gc_mintime", + .data = &init_net.inet_peer_gc_mintime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, + .procname = "inet_peer_gc_maxtime", + .data = &init_net.inet_peer_gc_maxtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_TCP_TIMESTAMPS, + .procname = "tcp_timestamps", + .data = &init_net.sysctl_tcp_timestamps, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + + }, + { + .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, + .procname = "tcp_window_scaling", + .data = &init_net.sysctl_tcp_window_scaling, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; -EXPORT_SYMBOL(ipv4_config); +#endif /* CONFIG_SYSCTL */ diff -Nurb linux-2.6.22-570/net/ipv4/tcp.c linux-2.6.22-591/net/ipv4/tcp.c --- linux-2.6.22-570/net/ipv4/tcp.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/tcp.c 2007-12-21 15:36:15.000000000 -0500 @@ -2409,6 +2409,23 @@ } __setup("thash_entries=", set_thash_entries); +static int tcp_net_init(struct net *net) +{ + /* + * This array holds the first and last local port number. + */ + net->sysctl_local_port_range[0] = 32768; + net->sysctl_local_port_range[1] = 61000; + + net->sysctl_tcp_timestamps = 1; + net->sysctl_tcp_window_scaling = 1; + return 0; +} + +static struct pernet_operations tcp_net_ops = { + .init = tcp_net_init, +}; + void __init tcp_init(void) { struct sk_buff *skb = NULL; @@ -2502,6 +2519,8 @@ sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share); + register_pernet_subsys(&tcp_net_ops); + printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); diff -Nurb linux-2.6.22-570/net/ipv4/tcp_input.c linux-2.6.22-591/net/ipv4/tcp_input.c --- linux-2.6.22-570/net/ipv4/tcp_input.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/tcp_input.c 2007-12-21 15:36:15.000000000 -0500 @@ -72,8 +72,6 @@ #include #include -int sysctl_tcp_timestamps __read_mostly = 1; -int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; @@ -2922,7 +2920,7 @@ break; case TCPOPT_WINDOW: if (opsize==TCPOLEN_WINDOW && th->syn && !estab) - if (sysctl_tcp_window_scaling) { + if (init_net.sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *) ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -2938,7 +2936,7 @@ case TCPOPT_TIMESTAMP: if (opsize==TCPOLEN_TIMESTAMP) { if ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps)) { + (!estab && init_net.sysctl_tcp_timestamps)) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c linux-2.6.22-591/net/ipv4/tcp_ipv4.c --- linux-2.6.22-570/net/ipv4/tcp_ipv4.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/tcp_ipv4.c 2007-12-21 15:36:15.000000000 -0500 @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -353,6 +354,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) { + struct net *net = skb->dev->nd_net; struct iphdr *iph = (struct iphdr *)skb->data; struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); struct tcp_sock *tp; @@ -369,7 +371,7 @@ } sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, inet_iif(skb)); + th->source, inet_iif(skb), net); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1499,7 +1501,8 @@ return tcp_check_req(sk, skb, req, prev); nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + iph->daddr, th->dest, inet_iif(skb), + sk->sk_net); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1618,6 +1621,7 @@ int tcp_v4_rcv(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; const struct iphdr *iph; struct tcphdr *th; struct sock *sk; @@ -1657,7 +1661,7 @@ TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + iph->daddr, th->dest, inet_iif(skb), net); if (!sk) goto no_tcp_socket; @@ -1732,7 +1736,7 @@ case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, iph->daddr, th->dest, - inet_iif(skb)); + inet_iif(skb), net); if (sk2) { inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); inet_twsk_put(inet_twsk(sk)); @@ -1766,7 +1770,7 @@ int release_it = 0; if (!rt || rt->rt_dst != inet->daddr) { - peer = inet_getpeer(inet->daddr, 1); + peer = inet_getpeer(sk->sk_net, inet->daddr, 1); release_it = 1; } else { if (!rt->peer) @@ -1791,7 +1795,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer(tw->tw_net, tw->tw_daddr, 1); if (peer) { const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -1980,7 +1984,8 @@ if (req->sk && !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT)) continue; - if (req->rsk_ops->family == st->family) { + if ((req->rsk_ops->family == st->family) && + (req->sk->sk_net == st->net)) { cur = req; goto out; } @@ -2004,6 +2009,8 @@ } get_sk: sk_for_each_from(sk, node) { + if (sk->sk_net != st->net) + continue; vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", sk, sk->sk_nid, nx_current_nid()); if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) @@ -2054,11 +2061,10 @@ struct hlist_node *node; struct inet_timewait_sock *tw; - /* We can reschedule _before_ having picked the target: */ - cond_resched_softirq(); - - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + if (sk->sk_net != st->net) + continue; vxdprintk(VXD_CBIT(net, 6), "sk,egf: %p [#%d] (from %d)", sk, sk->sk_nid, nx_current_nid()); @@ -2072,6 +2078,8 @@ st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, &tcp_hashinfo.ehash[st->bucket].twchain) { + if (tw->tw_net != st->net) + continue; vxdprintk(VXD_CBIT(net, 6), "tw: %p [#%d] (from %d)", tw, tw->tw_nid, nx_current_nid()); @@ -2082,7 +2090,7 @@ rc = tw; goto out; } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2102,7 +2110,8 @@ tw = cur; tw = tw_next(tw); get_tw: - while (tw && (tw->tw_family != st->family || + while (tw && ((tw->tw_net != st->net) || + (tw->tw_family != st->family) || !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) { tw = tw_next(tw); } @@ -2110,14 +2119,11 @@ cur = tw; goto out; } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; - /* We can reschedule between buckets: */ - cond_resched_softirq(); - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; @@ -2130,6 +2136,8 @@ vxdprintk(VXD_CBIT(net, 6), "sk,egn: %p [#%d] (from %d)", sk, sk->sk_nid, nx_current_nid()); + if (sk->sk_net != st->net) + continue; if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) continue; if (sk->sk_family == st->family) @@ -2167,7 +2175,6 @@ if (!rc) { inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2200,7 +2207,6 @@ rc = listening_get_next(seq, v); if (!rc) { inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2232,8 +2238,7 @@ case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); - local_bh_enable(); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); break; } } @@ -2262,6 +2267,7 @@ goto out_kfree; seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -2269,20 +2275,30 @@ goto out; } -int tcp_proc_register(struct tcp_seq_afinfo *afinfo) +static int tcp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct tcp_iter_state *st = seq->private; + put_net(st->net); + return seq_release_private(inode, file); +} + +int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) { int rc = 0; struct proc_dir_entry *p; if (!afinfo) return -EINVAL; + if (net == &init_net) { afinfo->seq_fops->owner = afinfo->owner; afinfo->seq_fops->open = tcp_seq_open; afinfo->seq_fops->read = seq_read; afinfo->seq_fops->llseek = seq_lseek; - afinfo->seq_fops->release = seq_release_private; + afinfo->seq_fops->release = tcp_seq_release; + } - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -2290,11 +2306,12 @@ return rc; } -void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) +void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(net, afinfo->name); + if (net == &init_net) memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -2439,14 +2456,29 @@ .seq_fops = &tcp4_seq_fops, }; +static int tcp4_proc_net_init(struct net *net) +{ + return tcp_proc_register(net, &tcp4_seq_afinfo); +} + +static void tcp4_proc_net_exit(struct net *net) +{ + tcp_proc_unregister(net, &tcp4_seq_afinfo); +} + +static struct pernet_operations tcp4_proc_net_ops = { + .init = tcp4_proc_net_init, + .exit = tcp4_proc_net_exit, +}; + int __init tcp4_proc_init(void) { - return tcp_proc_register(&tcp4_seq_afinfo); + return register_pernet_subsys(&tcp4_proc_net_ops); } void tcp4_proc_exit(void) { - tcp_proc_unregister(&tcp4_seq_afinfo); + unregister_pernet_subsys(&tcp4_proc_net_ops); } #endif /* CONFIG_PROC_FS */ @@ -2508,6 +2540,5 @@ EXPORT_SYMBOL(tcp_proc_register); EXPORT_SYMBOL(tcp_proc_unregister); #endif -EXPORT_SYMBOL(sysctl_local_port_range); EXPORT_SYMBOL(sysctl_tcp_low_latency); diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig linux-2.6.22-591/net/ipv4/tcp_ipv4.c.orig --- linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/tcp_ipv4.c.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,2483 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Implementation of the Transmission Control Protocol(TCP). - * - * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ - * - * IPv4 specific functions - * - * - * code split from: - * linux/ipv4/tcp.c - * linux/ipv4/tcp_input.c - * linux/ipv4/tcp_output.c - * - * See tcp.c for author information - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * Changes: - * David S. Miller : New socket lookup architecture. - * This code is dedicated to John Dyson. - * David S. Miller : Change semantics of established hash, - * half is devoted to TIME_WAIT sockets - * and the rest go in the other half. - * Andi Kleen : Add support for syncookies and fixed - * some bugs: ip options weren't passed to - * the TCP layer, missed a check for an - * ACK bit. - * Andi Kleen : Implemented fast path mtu discovery. - * Fixed many serious bugs in the - * request_sock handling and moved - * most of it into the af independent code. - * Added tail drop and some other bugfixes. - * Added new listen semantics. - * Mike McLagan : Routing by source - * Juan Jose Ciarlante: ip_dynaddr bits - * Andi Kleen: various fixes. - * Vitaly E. Lavrov : Transparent proxy revived after year - * coma. - * Andi Kleen : Fix new listen. - * Andi Kleen : Fix accept error reporting. - * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which - * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind - * a single port at the same time. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -int sysctl_tcp_tw_reuse __read_mostly; -int sysctl_tcp_low_latency __read_mostly; - -/* Check TCP sequence numbers in ICMP packets. */ -#define ICMP_MIN_LENGTH 8 - -/* Socket used for sending RSTs */ -static struct socket *tcp_socket __read_mostly; - -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); - -#ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, - __be32 addr); -static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - int tcplen); -#endif - -struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), -}; - -static int tcp_v4_get_port(struct sock *sk, unsigned short snum) -{ - return inet_csk_get_port(&tcp_hashinfo, sk, snum, - inet_csk_bind_conflict); -} - -static void tcp_v4_hash(struct sock *sk) -{ - inet_hash(&tcp_hashinfo, sk); -} - -void tcp_unhash(struct sock *sk) -{ - inet_unhash(&tcp_hashinfo, sk); -} - -static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) -{ - return secure_tcp_sequence_number(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); -} - -int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) -{ - const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); - struct tcp_sock *tp = tcp_sk(sk); - - /* With PAWS, it is safe from the viewpoint - of data integrity. Even without PAWS it is safe provided sequence - spaces do not overlap i.e. at data rates <= 80Mbit/sec. - - Actually, the idea is close to VJ's one, only timestamp cache is - held not per host, but per port pair and TW bucket is used as state - holder. - - If TW bucket has been already destroyed we fall back to VJ's scheme - and use initial timestamp retrieved from peer table. - */ - if (tcptw->tw_ts_recent_stamp && - (twp == NULL || (sysctl_tcp_tw_reuse && - get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - sock_hold(sktw); - return 1; - } - - return 0; -} - -EXPORT_SYMBOL_GPL(tcp_twsk_unique); - -/* This will initiate an outgoing connection. */ -int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) -{ - struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct rtable *rt; - __be32 daddr, nexthop; - int tmp; - int err; - - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - if (usin->sin_family != AF_INET) - return -EAFNOSUPPORT; - - nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt && inet->opt->srr) { - if (!daddr) - return -EINVAL; - nexthop = inet->opt->faddr; - } - - tmp = ip_route_connect(&rt, nexthop, inet->saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_TCP, - inet->sport, usin->sin_port, sk, 1); - if (tmp < 0) { - if (tmp == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - return tmp; - } - - if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { - ip_rt_put(rt); - return -ENETUNREACH; - } - - if (!inet->opt || !inet->opt->srr) - daddr = rt->rt_dst; - - if (!inet->saddr) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; - - if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { - /* Reset inherited state */ - tp->rx_opt.ts_recent = 0; - tp->rx_opt.ts_recent_stamp = 0; - tp->write_seq = 0; - } - - if (tcp_death_row.sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { - struct inet_peer *peer = rt_get_peer(rt); - /* - * VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state - * TIME-WAIT * and initialize rx_opt.ts_recent from it, - * when trying new connection. - */ - if (peer != NULL && - peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; - } - } - - inet->dport = usin->sin_port; - inet->daddr = daddr; - - inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; - - tp->rx_opt.mss_clamp = 536; - - /* Socket identity is still unknown (sport may be zero). - * However we set state to SYN-SENT and not releasing socket - * lock select source port, enter ourselves into the hash tables and - * complete initialization after this. - */ - tcp_set_state(sk, TCP_SYN_SENT); - err = inet_hash_connect(&tcp_death_row, sk); - if (err) - goto failure; - - err = ip_route_newports(&rt, IPPROTO_TCP, - inet->sport, inet->dport, sk); - if (err) - goto failure; - - /* OK, now commit destination to socket. */ - sk->sk_gso_type = SKB_GSO_TCPV4; - sk_setup_caps(sk, &rt->u.dst); - - if (!tp->write_seq) - tp->write_seq = secure_tcp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, - usin->sin_port); - - inet->id = tp->write_seq ^ jiffies; - - err = tcp_connect(sk); - rt = NULL; - if (err) - goto failure; - - return 0; - -failure: - /* - * This unhashes the socket and releases the local port, - * if necessary. - */ - tcp_set_state(sk, TCP_CLOSE); - ip_rt_put(rt); - sk->sk_route_caps = 0; - inet->dport = 0; - return err; -} - -/* - * This routine does path mtu discovery as defined in RFC1191. - */ -static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) -{ - struct dst_entry *dst; - struct inet_sock *inet = inet_sk(sk); - - /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs - * send out by Linux are always <576bytes so they should go through - * unfragmented). - */ - if (sk->sk_state == TCP_LISTEN) - return; - - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) - return; - - dst->ops->update_pmtu(dst, mtu); - - /* Something is about to be wrong... Remember soft error - * for the case, if this connection will not able to recover. - */ - if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; - - mtu = dst_mtu(dst); - - if (inet->pmtudisc != IP_PMTUDISC_DONT && - inet_csk(sk)->icsk_pmtu_cookie > mtu) { - tcp_sync_mss(sk, mtu); - - /* Resend the TCP packet because it's - * clear that the old packet has been - * dropped. This is the new "fast" path mtu - * discovery. - */ - tcp_simple_retransmit(sk); - } /* else let the usual retransmit timer handle it */ -} - -/* - * This routine is called by the ICMP module when it gets some - * sort of error condition. If err < 0 then the socket should - * be closed and the error returned to the user. If err > 0 - * it's just the icmp type << 8 | icmp code. After adjustment - * header points to the first 8 bytes of the tcp header. We need - * to find the appropriate port. - * - * The locking strategy used here is very "optimistic". When - * someone else accesses the socket the ICMP is just dropped - * and for some paths there is no check at all. - * A more general error queue to queue errors for later handling - * is probably better. - * - */ - -void tcp_v4_err(struct sk_buff *skb, u32 info) -{ - struct iphdr *iph = (struct iphdr *)skb->data; - struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); - struct tcp_sock *tp; - struct inet_sock *inet; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct sock *sk; - __u32 seq; - int err; - - if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); - return; - } - - sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, inet_iif(skb)); - if (!sk) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); - return; - } - if (sk->sk_state == TCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); - return; - } - - bh_lock_sock(sk); - /* If too many ICMPs get dropped on busy - * servers this needs to be solved differently. - */ - if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); - - if (sk->sk_state == TCP_CLOSE) - goto out; - - tp = tcp_sk(sk); - seq = ntohl(th->seq); - if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - switch (type) { - case ICMP_SOURCE_QUENCH: - /* Just silently ignore these. */ - goto out; - case ICMP_PARAMETERPROB: - err = EPROTO; - break; - case ICMP_DEST_UNREACH: - if (code > NR_ICMP_UNREACH) - goto out; - - if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ - if (!sock_owned_by_user(sk)) - do_pmtu_discovery(sk, iph, info); - goto out; - } - - err = icmp_err_convert[code].errno; - break; - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - default: - goto out; - } - - switch (sk->sk_state) { - struct request_sock *req, **prev; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - req = inet_csk_search_req(sk, &prev, th->dest, - iph->daddr, iph->saddr); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - an established socket here. - */ - BUG_TRAP(!req->sk); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - /* - * Still in SYN_RECV, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(sk, req, prev); - goto out; - - case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed. - */ - if (!sock_owned_by_user(sk)) { - sk->sk_err = err; - - sk->sk_error_report(sk); - - tcp_done(sk); - } else { - sk->sk_err_soft = err; - } - goto out; - } - - /* If we've already connected we will keep trying - * until we time out, or the user gives up. - * - * rfc1122 4.2.3.9 allows to consider as hard errors - * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, - * but it is obsoleted by pmtu discovery). - * - * Note, that in modern internet, where routing is unreliable - * and in each dark corner broken firewalls sit, sending random - * errors ordered by their masters even this two messages finally lose - * their original sense (even Linux sends invalid PORT_UNREACHs) - * - * Now we are in compliance with RFCs. - * --ANK (980905) - */ - - inet = inet_sk(sk); - if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; - sk->sk_error_report(sk); - } else { /* Only an error on timeout */ - sk->sk_err_soft = err; - } - -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -/* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) -{ - struct inet_sock *inet = inet_sk(sk); - struct tcphdr *th = tcp_hdr(skb); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(len, inet->saddr, - inet->daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v4_check(len, inet->saddr, inet->daddr, - csum_partial((char *)th, - th->doff << 2, - skb->csum)); - } -} - -int tcp_v4_gso_send_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - return 0; -} - -/* - * This routine will send an RST to the other tcp. - * - * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) - * for reset. - * Answer: if a packet caused RST, it is not for a socket - * existing in our system, if it is matched to a socket, - * it is just duplicate segment or bug in other side's TCP. - * So that we build reply only basing on parameters - * arrived with segment. - * Exception: precedence violation. We do not implement it in any case. - */ - -static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - struct { - struct tcphdr th; -#ifdef CONFIG_TCP_MD5SIG - __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; -#endif - } rep; - struct ip_reply_arg arg; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; -#endif - - /* Never send a reset in response to a reset. */ - if (th->rst) - return; - - if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) - return; - - /* Swap the send and the receive. */ - memset(&rep, 0, sizeof(rep)); - rep.th.dest = th->source; - rep.th.source = th->dest; - rep.th.doff = sizeof(struct tcphdr) / 4; - rep.th.rst = 1; - - if (th->ack) { - rep.th.seq = th->ack_seq; - } else { - rep.th.ack = 1; - rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + - skb->len - (th->doff << 2)); - } - - memset(&arg, 0, sizeof(arg)); - arg.iov[0].iov_base = (unsigned char *)&rep; - arg.iov[0].iov_len = sizeof(rep.th); - -#ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; - if (key) { - rep.opt[0] = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - /* Update length and the length the header thinks exists */ - arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; - rep.th.doff = arg.iov[0].iov_len / 4; - - tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], - key, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); - } -#endif - arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, /* XXX */ - sizeof(struct tcphdr), IPPROTO_TCP, 0); - arg.csumoffset = offsetof(struct tcphdr, check) / 2; - - ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); - - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); -} - -/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states - outside socket context is ugly, certainly. What can I do? - */ - -static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, - struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts) -{ - struct tcphdr *th = tcp_hdr(skb); - struct { - struct tcphdr th; - __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) -#ifdef CONFIG_TCP_MD5SIG - + (TCPOLEN_MD5SIG_ALIGNED >> 2) -#endif - ]; - } rep; - struct ip_reply_arg arg; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; - struct tcp_md5sig_key tw_key; -#endif - - memset(&rep.th, 0, sizeof(struct tcphdr)); - memset(&arg, 0, sizeof(arg)); - - arg.iov[0].iov_base = (unsigned char *)&rep; - arg.iov[0].iov_len = sizeof(rep.th); - if (ts) { - rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - rep.opt[1] = htonl(tcp_time_stamp); - rep.opt[2] = htonl(ts); - arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; - } - - /* Swap the send and the receive. */ - rep.th.dest = th->source; - rep.th.source = th->dest; - rep.th.doff = arg.iov[0].iov_len / 4; - rep.th.seq = htonl(seq); - rep.th.ack_seq = htonl(ack); - rep.th.ack = 1; - rep.th.window = htons(win); - -#ifdef CONFIG_TCP_MD5SIG - /* - * The SKB holds an imcoming packet, but may not have a valid ->sk - * pointer. This is especially the case when we're dealing with a - * TIME_WAIT ack, because the sk structure is long gone, and only - * the tcp_timewait_sock remains. So the md5 key is stashed in that - * structure, and we use it in preference. I believe that (twsk || - * skb->sk) holds true, but we program defensively. - */ - if (!twsk && skb->sk) { - key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); - } else if (twsk && twsk->tw_md5_keylen) { - tw_key.key = twsk->tw_md5_key; - tw_key.keylen = twsk->tw_md5_keylen; - key = &tw_key; - } else - key = NULL; - - if (key) { - int offset = (ts) ? 3 : 0; - - rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; - rep.th.doff = arg.iov[0].iov_len/4; - - tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], - key, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); - } -#endif - arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, /* XXX */ - arg.iov[0].iov_len, IPPROTO_TCP, 0); - arg.csumoffset = offsetof(struct tcphdr, check) / 2; - if (twsk) - arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; - - ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); - - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); -} - -static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) -{ - struct inet_timewait_sock *tw = inet_twsk(sk); - struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - - tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, - tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); - - inet_twsk_put(tw); -} - -static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, - struct request_sock *req) -{ - tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, - tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, - req->ts_recent); -} - -/* - * Send a SYN-ACK after having received an ACK. - * This still operates on a request_sock only, not on a big - * socket. - */ -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) -{ - const struct inet_request_sock *ireq = inet_rsk(req); - int err = -1; - struct sk_buff * skb; - - /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; - - skb = tcp_make_synack(sk, dst, req); - - if (skb) { - struct tcphdr *th = tcp_hdr(skb); - - th->check = tcp_v4_check(skb->len, - ireq->loc_addr, - ireq->rmt_addr, - csum_partial((char *)th, skb->len, - skb->csum)); - - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, - ireq->opt); - err = net_xmit_eval(err); - } - -out: - dst_release(dst); - return err; -} - -/* - * IPv4 request_sock destructor. - */ -static void tcp_v4_reqsk_destructor(struct request_sock *req) -{ - kfree(inet_rsk(req)->opt); -} - -#ifdef CONFIG_SYN_COOKIES -static void syn_flood_warning(struct sk_buff *skb) -{ - static unsigned long warntime; - - if (time_after(jiffies, (warntime + HZ * 60))) { - warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", - ntohs(tcp_hdr(skb)->dest)); - } -} -#endif - -/* - * Save and compile IPv4 options into the request_sock if needed. - */ -static struct ip_options *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) -{ - struct ip_options *opt = &(IPCB(skb)->opt); - struct ip_options *dopt = NULL; - - if (opt && opt->optlen) { - int opt_size = optlength(opt); - dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt) { - if (ip_options_echo(dopt, skb)) { - kfree(dopt); - dopt = NULL; - } - } - } - return dopt; -} - -#ifdef CONFIG_TCP_MD5SIG -/* - * RFC2385 MD5 checksumming requires a mapping of - * IP address->MD5 Key. - * We need to maintain these in the sk structure. - */ - -/* Find the Key structure for an address. */ -static struct tcp_md5sig_key * - tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) -{ - struct tcp_sock *tp = tcp_sk(sk); - int i; - - if (!tp->md5sig_info || !tp->md5sig_info->entries4) - return NULL; - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) - return &tp->md5sig_info->keys4[i].base; - } - return NULL; -} - -struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk) -{ - return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); -} - -EXPORT_SYMBOL(tcp_v4_md5_lookup); - -static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); -} - -/* This can be called on a newly created socket, from other files */ -int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, - u8 *newkey, u8 newkeylen) -{ - /* Add Key to the list */ - struct tcp4_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - struct tcp4_md5sig_key *keys; - - key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); - if (key) { - /* Pre-existing entry - just update that one. */ - kfree(key->base.key); - key->base.key = newkey; - key->base.keylen = newkeylen; - } else { - struct tcp_md5sig_info *md5sig; - - if (!tp->md5sig_info) { - tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), - GFP_ATOMIC); - if (!tp->md5sig_info) { - kfree(newkey); - return -ENOMEM; - } - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - } - if (tcp_alloc_md5sig_pool() == NULL) { - kfree(newkey); - return -ENOMEM; - } - md5sig = tp->md5sig_info; - - if (md5sig->alloced4 == md5sig->entries4) { - keys = kmalloc((sizeof(*keys) * - (md5sig->entries4 + 1)), GFP_ATOMIC); - if (!keys) { - kfree(newkey); - tcp_free_md5sig_pool(); - return -ENOMEM; - } - - if (md5sig->entries4) - memcpy(keys, md5sig->keys4, - sizeof(*keys) * md5sig->entries4); - - /* Free old key list, and reference new one */ - if (md5sig->keys4) - kfree(md5sig->keys4); - md5sig->keys4 = keys; - md5sig->alloced4++; - } - md5sig->entries4++; - md5sig->keys4[md5sig->entries4 - 1].addr = addr; - md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; - md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; - } - return 0; -} - -EXPORT_SYMBOL(tcp_v4_md5_do_add); - -static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, - u8 *newkey, u8 newkeylen) -{ - return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, - newkey, newkeylen); -} - -int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) -{ - struct tcp_sock *tp = tcp_sk(sk); - int i; - - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) { - /* Free the key */ - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4--; - - if (tp->md5sig_info->entries4 == 0) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; - } else if (tp->md5sig_info->entries4 != i) { - /* Need to do some manipulation */ - memcpy(&tp->md5sig_info->keys4[i], - &tp->md5sig_info->keys4[i+1], - (tp->md5sig_info->entries4 - i) * - sizeof(struct tcp4_md5sig_key)); - } - tcp_free_md5sig_pool(); - return 0; - } - } - return -ENOENT; -} - -EXPORT_SYMBOL(tcp_v4_md5_do_del); - -static void tcp_v4_clear_md5_list(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - /* Free each key, then the set of key keys, - * the crypto element, and then decrement our - * hold on the last resort crypto. - */ - if (tp->md5sig_info->entries4) { - int i; - for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4 = 0; - tcp_free_md5sig_pool(); - } - if (tp->md5sig_info->keys4) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; - } -} - -static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, - int optlen) -{ - struct tcp_md5sig cmd; - struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; - u8 *newkey; - - if (optlen < sizeof(cmd)) - return -EINVAL; - - if (copy_from_user(&cmd, optval, sizeof(cmd))) - return -EFAULT; - - if (sin->sin_family != AF_INET) - return -EINVAL; - - if (!cmd.tcpm_key || !cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; - return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); - } - - if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) - return -EINVAL; - - if (!tcp_sk(sk)->md5sig_info) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); - - if (!p) - return -EINVAL; - - tp->md5sig_info = p; - sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - } - - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); - if (!newkey) - return -ENOMEM; - return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, - newkey, cmd.tcpm_keylen); -} - -static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - int tcplen) -{ - struct scatterlist sg[4]; - __u16 data_len; - int block = 0; - __sum16 old_checksum; - struct tcp_md5sig_pool *hp; - struct tcp4_pseudohdr *bp; - struct hash_desc *desc; - int err; - unsigned int nbytes = 0; - - /* - * Okay, so RFC2385 is turned on for this connection, - * so we need to generate the MD5 hash for the packet now. - */ - - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - - bp = &hp->md5_blk.ip4; - desc = &hp->md5_desc; - - /* - * 1. the TCP pseudo-header (in the order: source IP address, - * destination IP address, zero-padded protocol number, and - * segment length) - */ - bp->saddr = saddr; - bp->daddr = daddr; - bp->pad = 0; - bp->protocol = protocol; - bp->len = htons(tcplen); - sg_set_buf(&sg[block++], bp, sizeof(*bp)); - nbytes += sizeof(*bp); - - /* 2. the TCP header, excluding options, and assuming a - * checksum of zero/ - */ - old_checksum = th->check; - th->check = 0; - sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); - nbytes += sizeof(struct tcphdr); - - /* 3. the TCP segment data (if any) */ - data_len = tcplen - (th->doff << 2); - if (data_len > 0) { - unsigned char *data = (unsigned char *)th + (th->doff << 2); - sg_set_buf(&sg[block++], data, data_len); - nbytes += data_len; - } - - /* 4. an independently-specified key or password, known to both - * TCPs and presumably connection-specific - */ - sg_set_buf(&sg[block++], key->key, key->keylen); - nbytes += key->keylen; - - /* Now store the Hash into the packet */ - err = crypto_hash_init(desc); - if (err) - goto clear_hash; - err = crypto_hash_update(desc, sg, nbytes); - if (err) - goto clear_hash; - err = crypto_hash_final(desc, md5_hash); - if (err) - goto clear_hash; - - /* Reset header, and free up the crypto */ - tcp_put_md5sig_pool(); - th->check = old_checksum; - -out: - return 0; -clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: - memset(md5_hash, 0, 16); - goto out; -} - -int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, - struct dst_entry *dst, - struct request_sock *req, - struct tcphdr *th, int protocol, - int tcplen) -{ - __be32 saddr, daddr; - - if (sk) { - saddr = inet_sk(sk)->saddr; - daddr = inet_sk(sk)->daddr; - } else { - struct rtable *rt = (struct rtable *)dst; - BUG_ON(!rt); - saddr = rt->rt_src; - daddr = rt->rt_dst; - } - return tcp_v4_do_calc_md5_hash(md5_hash, key, - saddr, daddr, - th, protocol, tcplen); -} - -EXPORT_SYMBOL(tcp_v4_calc_md5_hash); - -static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) -{ - /* - * This gets called for each TCP segment that arrives - * so we want to be efficient. - * We have 3 drop cases: - * o No MD5 hash and one expected. - * o MD5 hash and we're not expecting one. - * o MD5 hash and its wrong. - */ - __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff << 2) - sizeof(struct tcphdr); - int genhash; - unsigned char *ptr; - unsigned char newhash[16]; - - hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); - - /* - * If the TCP option length is less than the TCP_MD5SIG - * option length, then we can shortcut - */ - if (length < TCPOLEN_MD5SIG) { - if (hash_expected) - return 1; - else - return 0; - } - - /* Okay, we can't shortcut - we have to grub through the options */ - ptr = (unsigned char *)(th + 1); - while (length > 0) { - int opcode = *ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - goto done_opts; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2) - goto done_opts; - if (opsize > length) - goto done_opts; - - if (opcode == TCPOPT_MD5SIG) { - hash_location = ptr; - goto done_opts; - } - } - ptr += opsize-2; - length -= opsize; - } -done_opts: - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return 0; - - if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); - return 1; - } - - if (!hash_expected && hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); - return 1; - } - - /* Okay, so this is hash_expected and hash_location - - * so we need to calculate the checksum. - */ - genhash = tcp_v4_do_calc_md5_hash(newhash, - hash_expected, - iph->saddr, iph->daddr, - th, sk->sk_protocol, - skb->len); - - if (genhash || memcmp(hash_location, newhash, 16) != 0) { - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash failed for " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" : ""); - } - return 1; - } - return 0; -} - -#endif - -struct request_sock_ops tcp_request_sock_ops __read_mostly = { - .family = PF_INET, - .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_send_synack, - .send_ack = tcp_v4_reqsk_send_ack, - .destructor = tcp_v4_reqsk_destructor, - .send_reset = tcp_v4_send_reset, -}; - -#ifdef CONFIG_TCP_MD5SIG -static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { - .md5_lookup = tcp_v4_reqsk_md5_lookup, -}; -#endif - -static struct timewait_sock_ops tcp_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - -int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct tcp_options_received tmp_opt; - struct request_sock *req; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; -#ifdef CONFIG_SYN_COOKIES - int want_cookie = 0; -#else -#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ -#endif - - /* Never answer to SYNs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) - goto drop; - - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (inet_csk_reqsk_queue_is_full(sk) && !isn) { -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - want_cookie = 1; - } else -#endif - goto drop; - } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) - goto drop; - - req = reqsk_alloc(&tcp_request_sock_ops); - if (!req) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = 536; - tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; - - tcp_parse_options(skb, &tmp_opt, 0); - - if (want_cookie) { - tcp_clear_options(&tmp_opt); - tmp_opt.saw_tstamp = 0; - } - - if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { - /* Some OSes (unknown ones, but I see them on web server, which - * contains information interesting only for windows' - * users) do not send their stamp in SYN. It is easy case. - * We simply do not advertise TS support. - */ - tmp_opt.saw_tstamp = 0; - tmp_opt.tstamp_ok = 0; - } - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - - tcp_openreq_init(req, &tmp_opt, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->opt = tcp_v4_save_options(sk, skb); - if (!want_cookie) - TCP_ECN_create_request(req, tcp_hdr(skb)); - - if (want_cookie) { -#ifdef CONFIG_SYN_COOKIES - syn_flood_warning(skb); -#endif - isn = cookie_v4_init_sequence(sk, skb, &req->mss); - } else if (!isn) { - struct inet_peer *peer = NULL; - - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, req)) != NULL && - (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->v4daddr == saddr) { - if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > - TCP_PAWS_WINDOW) { - NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); - dst_release(dst); - goto drop_and_free; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst_metric(dst, RTAX_RTT))) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " - "request from %u.%u.%u.%u/%u\n", - NIPQUAD(saddr), - ntohs(tcp_hdr(skb)->source)); - dst_release(dst); - goto drop_and_free; - } - - isn = tcp_v4_init_sequence(skb); - } - tcp_rsk(req)->snt_isn = isn; - - if (tcp_v4_send_synack(sk, req, dst)) - goto drop_and_free; - - if (want_cookie) { - reqsk_free(req); - } else { - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - return 0; - -drop_and_free: - reqsk_free(req); -drop: - return 0; -} - - -/* - * The three way handshake has completed - we got a valid synack - - * now create the new socket. - */ -struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) -{ - struct inet_request_sock *ireq; - struct inet_sock *newinet; - struct tcp_sock *newtp; - struct sock *newsk; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; -#endif - - if (sk_acceptq_is_full(sk)) - goto exit_overflow; - - if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) - goto exit; - - newsk = tcp_create_openreq_child(sk, req, skb); - if (!newsk) - goto exit; - - newsk->sk_gso_type = SKB_GSO_TCPV4; - sk_setup_caps(newsk, dst); - - newtp = tcp_sk(newsk); - newinet = inet_sk(newsk); - ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; - newinet->opt = ireq->opt; - ireq->opt = NULL; - newinet->mc_index = inet_iif(skb); - newinet->mc_ttl = ip_hdr(skb)->ttl; - inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newinet->opt) - inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; - newinet->id = newtp->write_seq ^ jiffies; - - tcp_mtup_init(newsk); - tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); - tcp_initialize_rcv_mss(newsk); - -#ifdef CONFIG_TCP_MD5SIG - /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { - /* - * We're using one, so create a matching key - * on the newsk structure. If we fail to get - * memory, then we end up not copying the key - * across. Shucks. - */ - char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); - if (newkey != NULL) - tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, - newkey, key->keylen); - } -#endif - - __inet_hash(&tcp_hashinfo, newsk, 0); - __inet_inherit_port(&tcp_hashinfo, sk, newsk); - - return newsk; - -exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); -exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); - dst_release(dst); - return NULL; -} - -static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - const struct iphdr *iph = ip_hdr(skb); - struct sock *nsk; - struct request_sock **prev; - /* Find possible connection requests. */ - struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, - iph->saddr, iph->daddr); - if (req) - return tcp_check_req(sk, skb, req, prev); - - nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); - - if (nsk) { - if (nsk->sk_state != TCP_TIME_WAIT) { - bh_lock_sock(nsk); - return nsk; - } - inet_twsk_put(inet_twsk(nsk)); - return NULL; - } - -#ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) - sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); -#endif - return sk; -} - -static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v4_check(skb->len, iph->saddr, - iph->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, IPPROTO_TCP, 0); - - if (skb->len <= 76) { - return __skb_checksum_complete(skb); - } - return 0; -} - - -/* The socket must have it's spinlock held when we get - * here. - * - * We have a potential double-lock case here, so even when - * doing backlog processing we use the BH locking scheme. - * This is because we cannot sleep with the original spinlock - * held. - */ -int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct sock *rsk; -#ifdef CONFIG_TCP_MD5SIG - /* - * We really want to reject the packet as early as possible - * if: - * o We're expecting an MD5'd packet and this is no MD5 tcp option - * o There is an MD5 option and we're not expecting one - */ - if (tcp_v4_inbound_md5_hash(sk, skb)) - goto discard; -#endif - - if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { - rsk = sk; - goto reset; - } - TCP_CHECK_TIMER(sk); - return 0; - } - - if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) - goto csum_err; - - if (sk->sk_state == TCP_LISTEN) { - struct sock *nsk = tcp_v4_hnd_req(sk, skb); - if (!nsk) - goto discard; - - if (nsk != sk) { - if (tcp_child_process(sk, nsk, skb)) { - rsk = nsk; - goto reset; - } - return 0; - } - } - - TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { - rsk = sk; - goto reset; - } - TCP_CHECK_TIMER(sk); - return 0; - -reset: - tcp_v4_send_reset(rsk, skb); -discard: - kfree_skb(skb); - /* Be careful here. If this function gets more complicated and - * gcc suffers from register pressure on the x86, sk (in %ebx) - * might be destroyed here. This current version compiles correctly, - * but you have been warned. - */ - return 0; - -csum_err: - TCP_INC_STATS_BH(TCP_MIB_INERRS); - goto discard; -} - -/* - * From tcp_input.c - */ - -int tcp_v4_rcv(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - struct sock *sk; - int ret; - - if (skb->pkt_type != PACKET_HOST) - goto discard_it; - - /* Count it even if it's bad */ - TCP_INC_STATS_BH(TCP_MIB_INSEGS); - - if (!pskb_may_pull(skb, sizeof(struct tcphdr))) - goto discard_it; - - th = tcp_hdr(skb); - - if (th->doff < sizeof(struct tcphdr) / 4) - goto bad_packet; - if (!pskb_may_pull(skb, th->doff * 4)) - goto discard_it; - - /* An explanation is required here, I think. - * Packet length and doff are validated by header prediction, - * provided case of th->doff==0 is eliminated. - * So, we defer the checks. */ - if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) - goto bad_packet; - - th = tcp_hdr(skb); - iph = ip_hdr(skb); - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff * 4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = iph->tos; - TCP_SKB_CB(skb)->sacked = 0; - - sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); - if (!sk) - goto no_tcp_socket; - -process: - if (sk->sk_state == TCP_TIME_WAIT) - goto do_time_wait; - - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto discard_and_relse; - nf_reset(skb); - - if (sk_filter(sk, skb)) - goto discard_and_relse; - - skb->dev = NULL; - - bh_lock_sock_nested(sk); - ret = 0; - if (!sock_owned_by_user(sk)) { -#ifdef CONFIG_NET_DMA - struct tcp_sock *tp = tcp_sk(sk); - if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = get_softnet_dma(); - if (tp->ucopy.dma_chan) - ret = tcp_v4_do_rcv(sk, skb); - else -#endif - { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); - } - } else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - - sock_put(sk); - - return ret; - -no_tcp_socket: - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard_it; - - if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { -bad_packet: - TCP_INC_STATS_BH(TCP_MIB_INERRS); - } else { - tcp_v4_send_reset(NULL, skb); - } - -discard_it: - /* Discard frame. */ - kfree_skb(skb); - return 0; - -discard_and_relse: - sock_put(sk); - goto discard_it; - -do_time_wait: - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - inet_twsk_put(inet_twsk(sk)); - goto discard_it; - } - - if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); - inet_twsk_put(inet_twsk(sk)); - goto discard_it; - } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { - case TCP_TW_SYN: { - struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, - iph->daddr, th->dest, - inet_iif(skb)); - if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); - inet_twsk_put(inet_twsk(sk)); - sk = sk2; - goto process; - } - /* Fall through to ACK */ - } - case TCP_TW_ACK: - tcp_v4_timewait_ack(sk, skb); - break; - case TCP_TW_RST: - goto no_tcp_socket; - case TCP_TW_SUCCESS:; - } - goto discard_it; -} - -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -int tcp_v4_remember_stamp(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_get(sk); - struct inet_peer *peer = NULL; - int release_it = 0; - - if (!rt || rt->rt_dst != inet->daddr) { - peer = inet_getpeer(inet->daddr, 1); - release_it = 1; - } else { - if (!rt->peer) - rt_bind_peer(rt, 1); - peer = rt->peer; - } - - if (peer) { - if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && - peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; - peer->tcp_ts = tp->rx_opt.ts_recent; - } - if (release_it) - inet_putpeer(peer); - return 1; - } - - return 0; -} - -int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) -{ - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); - - if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && - peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; - peer->tcp_ts = tcptw->tw_ts_recent; - } - inet_putpeer(peer); - return 1; - } - - return 0; -} - -struct inet_connection_sock_af_ops ipv4_specific = { - .queue_xmit = ip_queue_xmit, - .send_check = tcp_v4_send_check, - .rebuild_header = inet_sk_rebuild_header, - .conn_request = tcp_v4_conn_request, - .syn_recv_sock = tcp_v4_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, - .net_header_len = sizeof(struct iphdr), - .setsockopt = ip_setsockopt, - .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ip_setsockopt, - .compat_getsockopt = compat_ip_getsockopt, -#endif -}; - -#ifdef CONFIG_TCP_MD5SIG -static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { - .md5_lookup = tcp_v4_md5_lookup, - .calc_md5_hash = tcp_v4_calc_md5_hash, - .md5_add = tcp_v4_md5_add_func, - .md5_parse = tcp_v4_parse_md5_keys, -}; -#endif - -/* NOTE: A lot of things set to zero explicitly by call to - * sk_alloc() so need not be done here. - */ -static int tcp_v4_init_sock(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - skb_queue_head_init(&tp->out_of_order_queue); - tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); - - icsk->icsk_rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; - - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - tp->snd_cwnd = 2; - - /* See draft-stevens-tcpca-spec-01 for discussion of the - * initialization of these values. - */ - tp->snd_ssthresh = 0x7fffffff; /* Infinity */ - tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; - - tp->reordering = sysctl_tcp_reordering; - icsk->icsk_ca_ops = &tcp_init_congestion_ops; - - sk->sk_state = TCP_CLOSE; - - sk->sk_write_space = sk_stream_write_space; - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - - icsk->icsk_af_ops = &ipv4_specific; - icsk->icsk_sync_mss = tcp_sync_mss; -#ifdef CONFIG_TCP_MD5SIG - tp->af_specific = &tcp_sock_ipv4_specific; -#endif - - sk->sk_sndbuf = sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - - atomic_inc(&tcp_sockets_allocated); - - return 0; -} - -int tcp_v4_destroy_sock(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_clear_xmit_timers(sk); - - tcp_cleanup_congestion_control(sk); - - /* Cleanup up the write buffer. */ - tcp_write_queue_purge(sk); - - /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); - -#ifdef CONFIG_TCP_MD5SIG - /* Clean up the MD5 key list, if any */ - if (tp->md5sig_info) { - tcp_v4_clear_md5_list(sk); - kfree(tp->md5sig_info); - tp->md5sig_info = NULL; - } -#endif - -#ifdef CONFIG_NET_DMA - /* Cleans up our sk_async_wait_queue */ - __skb_queue_purge(&sk->sk_async_wait_queue); -#endif - - /* Clean prequeue, it must be empty really */ - __skb_queue_purge(&tp->ucopy.prequeue); - - /* Clean up a referenced TCP bind bucket. */ - if (inet_csk(sk)->icsk_bind_hash) - inet_put_port(&tcp_hashinfo, sk); - - /* - * If sendmsg cached page exists, toss it. - */ - if (sk->sk_sndmsg_page) { - __free_page(sk->sk_sndmsg_page); - sk->sk_sndmsg_page = NULL; - } - - atomic_dec(&tcp_sockets_allocated); - - return 0; -} - -EXPORT_SYMBOL(tcp_v4_destroy_sock); - -#ifdef CONFIG_PROC_FS -/* Proc filesystem TCP sock list dumping. */ - -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) -{ - return hlist_empty(head) ? NULL : - list_entry(head->first, struct inet_timewait_sock, tw_node); -} - -static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) -{ - return tw->tw_node.next ? - hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; -} - -static void *listening_get_next(struct seq_file *seq, void *cur) -{ - struct inet_connection_sock *icsk; - struct hlist_node *node; - struct sock *sk = cur; - struct tcp_iter_state* st = seq->private; - - if (!sk) { - st->bucket = 0; - sk = sk_head(&tcp_hashinfo.listening_hash[0]); - goto get_sk; - } - - ++st->num; - - if (st->state == TCP_SEQ_STATE_OPENREQ) { - struct request_sock *req = cur; - - icsk = inet_csk(st->syn_wait_sk); - req = req->dl_next; - while (1) { - while (req) { - if (req->rsk_ops->family == st->family) { - cur = req; - goto out; - } - req = req->dl_next; - } - if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) - break; -get_req: - req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; - } - sk = sk_next(st->syn_wait_sk); - st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - } else { - icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - if (reqsk_queue_len(&icsk->icsk_accept_queue)) - goto start_req; - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - sk = sk_next(sk); - } -get_sk: - sk_for_each_from(sk, node) { - if (sk->sk_family == st->family) { - cur = sk; - goto out; - } - icsk = inet_csk(sk); - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - if (reqsk_queue_len(&icsk->icsk_accept_queue)) { -start_req: - st->uid = sock_i_uid(sk); - st->syn_wait_sk = sk; - st->state = TCP_SEQ_STATE_OPENREQ; - st->sbucket = 0; - goto get_req; - } - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - } - if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); - goto get_sk; - } - cur = NULL; -out: - return cur; -} - -static void *listening_get_idx(struct seq_file *seq, loff_t *pos) -{ - void *rc = listening_get_next(seq, NULL); - - while (rc && *pos) { - rc = listening_get_next(seq, rc); - --*pos; - } - return rc; -} - -static void *established_get_first(struct seq_file *seq) -{ - struct tcp_iter_state* st = seq->private; - void *rc = NULL; - - for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { - struct sock *sk; - struct hlist_node *node; - struct inet_timewait_sock *tw; - - /* We can reschedule _before_ having picked the target: */ - cond_resched_softirq(); - - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); - sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if (sk->sk_family != st->family) { - continue; - } - rc = sk; - goto out; - } - st->state = TCP_SEQ_STATE_TIME_WAIT; - inet_twsk_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket].twchain) { - if (tw->tw_family != st->family) { - continue; - } - rc = tw; - goto out; - } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); - st->state = TCP_SEQ_STATE_ESTABLISHED; - } -out: - return rc; -} - -static void *established_get_next(struct seq_file *seq, void *cur) -{ - struct sock *sk = cur; - struct inet_timewait_sock *tw; - struct hlist_node *node; - struct tcp_iter_state* st = seq->private; - - ++st->num; - - if (st->state == TCP_SEQ_STATE_TIME_WAIT) { - tw = cur; - tw = tw_next(tw); -get_tw: - while (tw && tw->tw_family != st->family) { - tw = tw_next(tw); - } - if (tw) { - cur = tw; - goto out; - } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); - st->state = TCP_SEQ_STATE_ESTABLISHED; - - /* We can reschedule between buckets: */ - cond_resched_softirq(); - - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); - } else { - cur = NULL; - goto out; - } - } else - sk = sk_next(sk); - - sk_for_each_from(sk, node) { - if (sk->sk_family == st->family) - goto found; - } - - st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); - goto get_tw; -found: - cur = sk; -out: - return cur; -} - -static void *established_get_idx(struct seq_file *seq, loff_t pos) -{ - void *rc = established_get_first(seq); - - while (rc && pos) { - rc = established_get_next(seq, rc); - --pos; - } - return rc; -} - -static void *tcp_get_idx(struct seq_file *seq, loff_t pos) -{ - void *rc; - struct tcp_iter_state* st = seq->private; - - inet_listen_lock(&tcp_hashinfo); - st->state = TCP_SEQ_STATE_LISTENING; - rc = listening_get_idx(seq, &pos); - - if (!rc) { - inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); - st->state = TCP_SEQ_STATE_ESTABLISHED; - rc = established_get_idx(seq, pos); - } - - return rc; -} - -static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct tcp_iter_state* st = seq->private; - st->state = TCP_SEQ_STATE_LISTENING; - st->num = 0; - return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - void *rc = NULL; - struct tcp_iter_state* st; - - if (v == SEQ_START_TOKEN) { - rc = tcp_get_idx(seq, 0); - goto out; - } - st = seq->private; - - switch (st->state) { - case TCP_SEQ_STATE_OPENREQ: - case TCP_SEQ_STATE_LISTENING: - rc = listening_get_next(seq, v); - if (!rc) { - inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); - st->state = TCP_SEQ_STATE_ESTABLISHED; - rc = established_get_first(seq); - } - break; - case TCP_SEQ_STATE_ESTABLISHED: - case TCP_SEQ_STATE_TIME_WAIT: - rc = established_get_next(seq, v); - break; - } -out: - ++*pos; - return rc; -} - -static void tcp_seq_stop(struct seq_file *seq, void *v) -{ - struct tcp_iter_state* st = seq->private; - - switch (st->state) { - case TCP_SEQ_STATE_OPENREQ: - if (v) { - struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - } - case TCP_SEQ_STATE_LISTENING: - if (v != SEQ_START_TOKEN) - inet_listen_unlock(&tcp_hashinfo); - break; - case TCP_SEQ_STATE_TIME_WAIT: - case TCP_SEQ_STATE_ESTABLISHED: - if (v) - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); - local_bh_enable(); - break; - } -} - -static int tcp_seq_open(struct inode *inode, struct file *file) -{ - struct tcp_seq_afinfo *afinfo = PDE(inode)->data; - struct seq_file *seq; - struct tcp_iter_state *s; - int rc; - - if (unlikely(afinfo == NULL)) - return -EINVAL; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - s->family = afinfo->family; - s->seq_ops.start = tcp_seq_start; - s->seq_ops.next = tcp_seq_next; - s->seq_ops.show = afinfo->seq_show; - s->seq_ops.stop = tcp_seq_stop; - - rc = seq_open(file, &s->seq_ops); - if (rc) - goto out_kfree; - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; -} - -int tcp_proc_register(struct tcp_seq_afinfo *afinfo) -{ - int rc = 0; - struct proc_dir_entry *p; - - if (!afinfo) - return -EINVAL; - afinfo->seq_fops->owner = afinfo->owner; - afinfo->seq_fops->open = tcp_seq_open; - afinfo->seq_fops->read = seq_read; - afinfo->seq_fops->llseek = seq_lseek; - afinfo->seq_fops->release = seq_release_private; - - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); - if (p) - p->data = afinfo; - else - rc = -ENOMEM; - return rc; -} - -void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) -{ - if (!afinfo) - return; - proc_net_remove(afinfo->name); - memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); -} - -static void get_openreq4(struct sock *sk, struct request_sock *req, - char *tmpbuf, int i, int uid) -{ - const struct inet_request_sock *ireq = inet_rsk(req); - int ttd = req->expires - jiffies; - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", - i, - ireq->loc_addr, - ntohs(inet_sk(sk)->sport), - ireq->rmt_addr, - ntohs(ireq->rmt_port), - TCP_SYN_RECV, - 0, 0, /* could print option size, but that is af dependent. */ - 1, /* timers active (only the expire timer) */ - jiffies_to_clock_t(ttd), - req->retrans, - uid, - 0, /* non standard timer */ - 0, /* open_requests have no inode */ - atomic_read(&sk->sk_refcnt), - req); -} - -static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i) -{ - int timer_active; - unsigned long timer_expires; - struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); - - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { - timer_active = 1; - timer_expires = icsk->icsk_timeout; - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { - timer_active = 4; - timer_expires = icsk->icsk_timeout; - } else if (timer_pending(&sk->sk_timer)) { - timer_active = 2; - timer_expires = sk->sk_timer.expires; - } else { - timer_active = 0; - timer_expires = jiffies; - } - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d", - i, src, srcp, dest, destp, sk->sk_state, - tp->write_seq - tp->snd_una, - sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : - (tp->rcv_nxt - tp->copied_seq), - timer_active, - jiffies_to_clock_t(timer_expires - jiffies), - icsk->icsk_retransmits, - sock_i_uid(sk), - icsk->icsk_probes_out, - sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, - tp->snd_cwnd, - tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); -} - -static void get_timewait4_sock(struct inet_timewait_sock *tw, - char *tmpbuf, int i) -{ - __be32 dest, src; - __u16 destp, srcp; - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; - - dest = tw->tw_daddr; - src = tw->tw_rcv_saddr; - destp = ntohs(tw->tw_dport); - srcp = ntohs(tw->tw_sport); - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p", - i, src, srcp, dest, destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); -} - -#define TMPSZ 150 - -static int tcp4_seq_show(struct seq_file *seq, void *v) -{ - struct tcp_iter_state* st; - char tmpbuf[TMPSZ + 1]; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-*s\n", TMPSZ - 1, - " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); - goto out; - } - st = seq->private; - - switch (st->state) { - case TCP_SEQ_STATE_LISTENING: - case TCP_SEQ_STATE_ESTABLISHED: - get_tcp4_sock(v, tmpbuf, st->num); - break; - case TCP_SEQ_STATE_OPENREQ: - get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid); - break; - case TCP_SEQ_STATE_TIME_WAIT: - get_timewait4_sock(v, tmpbuf, st->num); - break; - } - seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); -out: - return 0; -} - -static struct file_operations tcp4_seq_fops; -static struct tcp_seq_afinfo tcp4_seq_afinfo = { - .owner = THIS_MODULE, - .name = "tcp", - .family = AF_INET, - .seq_show = tcp4_seq_show, - .seq_fops = &tcp4_seq_fops, -}; - -int __init tcp4_proc_init(void) -{ - return tcp_proc_register(&tcp4_seq_afinfo); -} - -void tcp4_proc_exit(void) -{ - tcp_proc_unregister(&tcp4_seq_afinfo); -} -#endif /* CONFIG_PROC_FS */ - -struct proto tcp_prot = { - .name = "TCP", - .owner = THIS_MODULE, - .close = tcp_close, - .connect = tcp_v4_connect, - .disconnect = tcp_disconnect, - .accept = inet_csk_accept, - .ioctl = tcp_ioctl, - .init = tcp_v4_init_sock, - .destroy = tcp_v4_destroy_sock, - .shutdown = tcp_shutdown, - .setsockopt = tcp_setsockopt, - .getsockopt = tcp_getsockopt, - .recvmsg = tcp_recvmsg, - .backlog_rcv = tcp_v4_do_rcv, - .hash = tcp_v4_hash, - .unhash = tcp_unhash, - .get_port = tcp_v4_get_port, - .enter_memory_pressure = tcp_enter_memory_pressure, - .sockets_allocated = &tcp_sockets_allocated, - .orphan_count = &tcp_orphan_count, - .memory_allocated = &tcp_memory_allocated, - .memory_pressure = &tcp_memory_pressure, - .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, - .max_header = MAX_TCP_HEADER, - .obj_size = sizeof(struct tcp_sock), - .twsk_prot = &tcp_timewait_sock_ops, - .rsk_prot = &tcp_request_sock_ops, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_tcp_setsockopt, - .compat_getsockopt = compat_tcp_getsockopt, -#endif -}; - -void __init tcp_v4_init(struct net_proto_family *ops) -{ - if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, - IPPROTO_TCP) < 0) - panic("Failed to create the TCP control socket.\n"); -} - -EXPORT_SYMBOL(ipv4_specific); -EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_prot); -EXPORT_SYMBOL(tcp_unhash); -EXPORT_SYMBOL(tcp_v4_conn_request); -EXPORT_SYMBOL(tcp_v4_connect); -EXPORT_SYMBOL(tcp_v4_do_rcv); -EXPORT_SYMBOL(tcp_v4_remember_stamp); -EXPORT_SYMBOL(tcp_v4_send_check); -EXPORT_SYMBOL(tcp_v4_syn_recv_sock); - -#ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(tcp_proc_register); -EXPORT_SYMBOL(tcp_proc_unregister); -#endif -EXPORT_SYMBOL(sysctl_local_port_range); -EXPORT_SYMBOL(sysctl_tcp_low_latency); - diff -Nurb linux-2.6.22-570/net/ipv4/tcp_output.c linux-2.6.22-591/net/ipv4/tcp_output.c --- linux-2.6.22-570/net/ipv4/tcp_output.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/tcp_output.c 2007-12-21 15:36:15.000000000 -0500 @@ -432,11 +432,11 @@ sysctl_flags = 0; if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; - if (sysctl_tcp_timestamps) { + if (sk->sk_net->sysctl_tcp_timestamps) { tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; sysctl_flags |= SYSCTL_FLAG_TSTAMPS; } - if (sysctl_tcp_window_scaling) { + if (sk->sk_net->sysctl_tcp_window_scaling) { tcp_header_size += TCPOLEN_WSCALE_ALIGNED; sysctl_flags |= SYSCTL_FLAG_WSCALE; } @@ -2215,7 +2215,7 @@ * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + (sk->sk_net->sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) @@ -2238,7 +2238,7 @@ tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + sk->sk_net->sysctl_tcp_window_scaling, &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; diff -Nurb linux-2.6.22-570/net/ipv4/tcp_probe.c linux-2.6.22-591/net/ipv4/tcp_probe.c --- linux-2.6.22-570/net/ipv4/tcp_probe.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/tcp_probe.c 2007-12-21 15:36:15.000000000 -0500 @@ -172,7 +172,7 @@ if (IS_ERR(tcpw.fifo)) return PTR_ERR(tcpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_probe); @@ -182,7 +182,7 @@ pr_info("TCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(tcpw.fifo); return ret; @@ -192,7 +192,7 @@ static __exit void tcpprobe_exit(void) { kfifo_free(tcpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&tcp_probe); } diff -Nurb linux-2.6.22-570/net/ipv4/tunnel4.c linux-2.6.22-591/net/ipv4/tunnel4.c --- linux-2.6.22-570/net/ipv4/tunnel4.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/tunnel4.c 2007-12-21 15:36:15.000000000 -0500 @@ -75,6 +75,10 @@ { struct xfrm_tunnel *handler; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; @@ -113,6 +117,9 @@ { struct xfrm_tunnel *handler; + if (skb->dev->nd_net != &init_net) + return; + for (handler = tunnel4_handlers; handler; handler = handler->next) if (!handler->err_handler(skb, info)) break; diff -Nurb linux-2.6.22-570/net/ipv4/udp.c linux-2.6.22-591/net/ipv4/udp.c --- linux-2.6.22-570/net/ipv4/udp.c 2007-12-21 15:36:02.000000000 -0500 +++ linux-2.6.22-591/net/ipv4/udp.c 2007-12-21 15:36:15.000000000 -0500 @@ -101,6 +101,7 @@ #include #include #include +#include #include "udp_impl.h" /* @@ -112,16 +113,17 @@ struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static int udp_port_rover; - -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) { + if (sk->sk_net != net) + continue; if (sk->sk_hash == num) return 1; + } return 0; } @@ -148,9 +150,9 @@ if (snum == 0) { int best_size_so_far, best, result, i; - if (*port_rover > sysctl_local_port_range[1] || - *port_rover < sysctl_local_port_range[0]) - *port_rover = sysctl_local_port_range[0]; + if (*port_rover > sk->sk_net->sysctl_local_port_range[1] || + *port_rover < sk->sk_net->sysctl_local_port_range[0]) + *port_rover = sk->sk_net->sysctl_local_port_range[0]; best_size_so_far = 32767; best = result = *port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { @@ -158,9 +160,9 @@ head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & + if (result > sk->sk_net->sysctl_local_port_range[1]) + result = sk->sk_net->sysctl_local_port_range[0] + + ((result - sk->sk_net->sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); goto gotit; } @@ -177,11 +179,11 @@ result = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & + if (result > sk->sk_net->sysctl_local_port_range[1]) + result = sk->sk_net->sysctl_local_port_range[0] + + ((result - sk->sk_net->sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) + if (! __udp_lib_lport_inuse(sk->sk_net, result, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) @@ -194,6 +196,7 @@ sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && sk2 != sk && + sk->sk_net == sk2->sk_net && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -216,7 +219,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*scmp)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); + return __udp_lib_get_port(sk, snum, udp_hash, &sk->sk_net->udp_port_rover, scmp); } extern int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2); @@ -229,7 +232,8 @@ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, +static struct sock *__udp4_lib_lookup(struct net *net, + __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct hlist_head udptable[]) { @@ -243,6 +247,9 @@ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); + if (sk->sk_net != net) + continue; + if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); @@ -299,6 +306,9 @@ sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); + if (s->sk_net != sk->sk_net) + continue; + if (s->sk_hash != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || @@ -328,6 +338,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) { + struct net *net = skb->dev->nd_net; struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); @@ -337,7 +348,7 @@ int harderr; int err; - sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, + sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable ); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); @@ -623,7 +634,8 @@ rt = (struct rtable*)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = ipc.oif, .nl_u = { .ip4_u = { .daddr = faddr, .saddr = saddr, @@ -1288,6 +1300,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], int proto) { + struct net *net = skb->dev->nd_net; struct sock *sk; struct udphdr *uh = udp_hdr(skb); unsigned short ulen; @@ -1322,7 +1335,7 @@ udp_ping_of_death(skb, uh, saddr); #endif - sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, + sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest, skb->dev->ifindex, udptable ); if (sk != NULL) { @@ -1651,7 +1664,7 @@ sk = sk_next(sk); try_again: ; - } while (sk && (sk->sk_family != state->family || + } while (sk && ((sk->sk_net != state->net) || sk->sk_family != state->family || !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { @@ -1717,6 +1730,7 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -1724,21 +1738,31 @@ goto out; } +static int udp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct udp_iter_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + /* ------------------------------------------------------------------------ */ -int udp_proc_register(struct udp_seq_afinfo *afinfo) +int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) { struct proc_dir_entry *p; int rc = 0; if (!afinfo) return -EINVAL; + if (net == &init_net) { afinfo->seq_fops->owner = afinfo->owner; afinfo->seq_fops->open = udp_seq_open; afinfo->seq_fops->read = seq_read; afinfo->seq_fops->llseek = seq_lseek; - afinfo->seq_fops->release = seq_release_private; + afinfo->seq_fops->release = udp_seq_release; + } - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -1746,11 +1770,12 @@ return rc; } -void udp_proc_unregister(struct udp_seq_afinfo *afinfo) +void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(net, afinfo->name); + if (net == &init_net) memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -1803,14 +1828,30 @@ .seq_fops = &udp4_seq_fops, }; + +static int udp4_proc_net_init(struct net *net) +{ + return udp_proc_register(net, &udp4_seq_afinfo); +} + +static void udp4_proc_net_exit(struct net *net) +{ + udp_proc_unregister(net, &udp4_seq_afinfo); +} + +static struct pernet_operations udp4_proc_net_ops = { + .init = udp4_proc_net_init, + .exit = udp4_proc_net_exit, +}; + int __init udp4_proc_init(void) { - return udp_proc_register(&udp4_seq_afinfo); + return register_pernet_subsys(&udp4_proc_net_ops); } void udp4_proc_exit(void) { - udp_proc_unregister(&udp4_seq_afinfo); + unregister_pernet_subsys(&udp4_proc_net_ops); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/ipv4/udplite.c linux-2.6.22-591/net/ipv4/udplite.c --- linux-2.6.22-570/net/ipv4/udplite.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/udplite.c 2007-12-21 15:36:15.000000000 -0500 @@ -31,11 +31,18 @@ static int udplite_rcv(struct sk_buff *skb) { + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); } static void udplite_err(struct sk_buff *skb, u32 info) { + if (skb->dev->nd_net != &init_net) + return; + return __udp4_lib_err(skb, info, udplite_hash); } @@ -103,7 +110,7 @@ inet_register_protosw(&udplite4_protosw); #ifdef CONFIG_PROC_FS - if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ + if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) /* udplite4_proc_init() */ printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__); #endif return; diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_input.c linux-2.6.22-591/net/ipv4/xfrm4_input.c --- linux-2.6.22-570/net/ipv4/xfrm4_input.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/xfrm4_input.c 2007-12-21 15:36:15.000000000 -0500 @@ -18,6 +18,10 @@ int xfrm4_rcv(struct sk_buff *skb) { + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } return xfrm4_rcv_encap(skb, 0); } diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_policy.c linux-2.6.22-591/net/ipv4/xfrm4_policy.c --- linux-2.6.22-570/net/ipv4/xfrm4_policy.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/xfrm4_policy.c 2007-12-21 15:36:15.000000000 -0500 @@ -25,6 +25,7 @@ { struct rtable *rt; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip4_u = { .daddr = daddr->a4, @@ -73,6 +74,7 @@ struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip4_u = { .saddr = fl->fl4_src, @@ -213,6 +215,7 @@ u8 *xprth = skb_network_header(skb) + iph->ihl * 4; memset(fl, 0, sizeof(struct flowi)); + fl->fl_net = &init_net; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { case IPPROTO_UDP: @@ -306,7 +309,7 @@ xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct in_device *loopback_idev = in_dev_get(&init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_state.c linux-2.6.22-591/net/ipv4/xfrm4_state.c --- linux-2.6.22-570/net/ipv4/xfrm4_state.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/xfrm4_state.c 2007-12-21 15:36:15.000000000 -0500 @@ -16,7 +16,7 @@ static int xfrm4_init_flags(struct xfrm_state *x) { - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) x->props.flags |= XFRM_STATE_NOPMTUDISC; return 0; } diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c linux-2.6.22-591/net/ipv4/xfrm4_tunnel.c --- linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv4/xfrm4_tunnel.c 2007-12-21 15:36:12.000000000 -0500 @@ -109,3 +109,4 @@ module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP); diff -Nurb linux-2.6.22-570/net/ipv6/Kconfig linux-2.6.22-591/net/ipv6/Kconfig --- linux-2.6.22-570/net/ipv6/Kconfig 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/Kconfig 2007-12-21 15:36:12.000000000 -0500 @@ -109,7 +109,7 @@ If unsure, say Y. config IPV6_MIP6 - bool "IPv6: Mobility (EXPERIMENTAL)" + tristate "IPv6: Mobility (EXPERIMENTAL)" depends on IPV6 && EXPERIMENTAL select XFRM ---help--- diff -Nurb linux-2.6.22-570/net/ipv6/Makefile linux-2.6.22-591/net/ipv6/Makefile --- linux-2.6.22-570/net/ipv6/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/Makefile 2007-12-21 15:36:12.000000000 -0500 @@ -14,7 +14,6 @@ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o -ipv6-$(CONFIG_IPV6_MIP6) += mip6.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-objs += $(ipv6-y) @@ -28,6 +27,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o +obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c linux-2.6.22-591/net/ipv6/addrconf.c --- linux-2.6.22-570/net/ipv6/addrconf.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/addrconf.c 2007-12-21 15:36:15.000000000 -0500 @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -457,7 +458,7 @@ struct inet6_dev *idev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -920,7 +921,7 @@ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -1047,7 +1048,7 @@ } /* Rule 4: Prefer home address */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) if (hiscore.rule < 4) { if (ifa_result->flags & IFA_F_HOMEADDRESS) hiscore.attrs |= IPV6_SADDR_SCORE_HOA; @@ -1882,7 +1883,7 @@ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(ireq.ifr6_ifindex); + dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1913,7 +1914,7 @@ if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(p.name)) == NULL) + if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) goto err_exit; err = dev_open(dev); } @@ -1943,7 +1944,7 @@ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -1994,7 +1995,7 @@ struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2089,7 +2090,7 @@ return; } - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2245,12 +2246,12 @@ /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(idev->dev->iflink))) { + (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for_each_netdev(link_dev) { + for_each_netdev(&init_net, link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -2282,6 +2283,9 @@ struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { @@ -2419,7 +2423,7 @@ ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == &init_net.loopback_dev && how == 1) how = 0; rt6_ifdown(dev); @@ -2850,18 +2854,18 @@ int __init if6_proc_init(void) { - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } void if6_proc_exit(void) { - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct in6_addr *addr) { @@ -3017,11 +3021,15 @@ static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3074,6 +3082,7 @@ static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3083,6 +3092,9 @@ u8 ifa_flags; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3103,7 +3115,7 @@ valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3292,7 +3304,7 @@ s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -3367,26 +3379,42 @@ static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3395,6 +3423,9 @@ struct sk_buff *skb; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3407,7 +3438,7 @@ ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3427,7 +3458,7 @@ kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3450,10 +3481,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3612,19 +3643,22 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, err; int s_idx = cb->args[0]; struct net_device *dev; struct inet6_dev *idev; struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL; + if (net != &init_net) + return 0; /* FIXME: maybe disable ipv6 on non v6 guests? if (skb->sk && skb->sk->sk_vx_info) return skb->len; */ read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (!v6_dev_in_nx_info(dev, nxi)) @@ -3661,10 +3695,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3730,10 +3764,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); } static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4244,16 +4278,16 @@ * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) + if (!ipv6_add_dev(&init_net.loopback_dev)) err = -ENOMEM; rtnl_unlock(); if (err) return err; - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); + ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); #endif register_netdevice_notifier(&ipv6_dev_notf); @@ -4304,12 +4338,12 @@ * clean dev list. */ - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((idev = __in6_dev_get(dev)) == NULL) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(&loopback_dev, 2); + addrconf_ifdown(&init_net.loopback_dev, 2); /* * Check hash table. @@ -4335,6 +4369,6 @@ rtnl_unlock(); #ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); #endif } diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c.orig linux-2.6.22-591/net/ipv6/addrconf.c.orig --- linux-2.6.22-570/net/ipv6/addrconf.c.orig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/addrconf.c.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,4301 +0,0 @@ -/* - * IPv6 Address [auto]configuration - * Linux INET6 implementation - * - * Authors: - * Pedro Roque - * Alexey Kuznetsov - * - * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * Changes: - * - * Janos Farkas : delete timer on ifdown - * - * Andi Kleen : kill double kfree on module - * unload. - * Maciej W. Rozycki : FDDI support - * sekiya@USAGI : Don't send too many RS - * packets. - * yoshfuji@USAGI : Fixed interval between DAD - * packets. - * YOSHIFUJI Hideaki @USAGI : improved accuracy of - * address validation timer. - * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041) - * support. - * Yuji SEKIYA @USAGI : Don't assign a same IPv6 - * address on a same interface. - * YOSHIFUJI Hideaki @USAGI : ARCnet support - * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to - * seq_file. - * YOSHIFUJI Hideaki @USAGI : improved source address - * selection; consider scope, - * status etc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_SYSCTL -#include -#endif -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_IPV6_PRIVACY -#include -#endif - -#include -#include - -#include -#include - -/* Set to 3 to get tracing... */ -#define ACONF_DEBUG 2 - -#if ACONF_DEBUG >= 3 -#define ADBG(x) printk x -#else -#define ADBG(x) -#endif - -#define INFINITY_LIFE_TIME 0xFFFFFFFF -#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) - -#ifdef CONFIG_SYSCTL -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); -static void addrconf_sysctl_unregister(struct ipv6_devconf *p); -#endif - -#ifdef CONFIG_IPV6_PRIVACY -static int __ipv6_regen_rndid(struct inet6_dev *idev); -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); -static void ipv6_regen_rndid(unsigned long data); - -static int desync_factor = MAX_DESYNC_FACTOR * HZ; -#endif - -static int ipv6_count_addresses(struct inet6_dev *idev); - -/* - * Configured unicast address hash table - */ -static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; -static DEFINE_RWLOCK(addrconf_hash_lock); - -static void addrconf_verify(unsigned long); - -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); - -static void addrconf_join_anycast(struct inet6_ifaddr *ifp); -static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); - -static int addrconf_ifdown(struct net_device *dev, int how); - -static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); -static void addrconf_dad_timer(unsigned long data); -static void addrconf_dad_completed(struct inet6_ifaddr *ifp); -static void addrconf_dad_run(struct inet6_dev *idev); -static void addrconf_rs_timer(unsigned long data); -static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); -static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); - -static void inet6_prefix_notify(int event, struct inet6_dev *idev, - struct prefix_info *pinfo); -static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); - -static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); - -struct ipv6_devconf ipv6_devconf __read_mostly = { - .forwarding = 0, - .hop_limit = IPV6_DEFAULT_HOPLIMIT, - .mtu6 = IPV6_MIN_MTU, - .accept_ra = 1, - .accept_redirects = 1, - .autoconf = 1, - .force_mld_version = 0, - .dad_transmits = 1, - .rtr_solicits = MAX_RTR_SOLICITATIONS, - .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, - .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY - .use_tempaddr = 0, - .temp_valid_lft = TEMP_VALID_LIFETIME, - .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, - .regen_max_retry = REGEN_MAX_RETRY, - .max_desync_factor = MAX_DESYNC_FACTOR, -#endif - .max_addresses = IPV6_MAX_ADDRESSES, - .accept_ra_defrtr = 1, - .accept_ra_pinfo = 1, -#ifdef CONFIG_IPV6_ROUTER_PREF - .accept_ra_rtr_pref = 1, - .rtr_probe_interval = 60 * HZ, -#ifdef CONFIG_IPV6_ROUTE_INFO - .accept_ra_rt_info_max_plen = 0, -#endif -#endif - .proxy_ndp = 0, - .accept_source_route = 0, /* we do not accept RH0 by default. */ -}; - -static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { - .forwarding = 0, - .hop_limit = IPV6_DEFAULT_HOPLIMIT, - .mtu6 = IPV6_MIN_MTU, - .accept_ra = 1, - .accept_redirects = 1, - .autoconf = 1, - .dad_transmits = 1, - .rtr_solicits = MAX_RTR_SOLICITATIONS, - .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, - .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY - .use_tempaddr = 0, - .temp_valid_lft = TEMP_VALID_LIFETIME, - .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, - .regen_max_retry = REGEN_MAX_RETRY, - .max_desync_factor = MAX_DESYNC_FACTOR, -#endif - .max_addresses = IPV6_MAX_ADDRESSES, - .accept_ra_defrtr = 1, - .accept_ra_pinfo = 1, -#ifdef CONFIG_IPV6_ROUTER_PREF - .accept_ra_rtr_pref = 1, - .rtr_probe_interval = 60 * HZ, -#ifdef CONFIG_IPV6_ROUTE_INFO - .accept_ra_rt_info_max_plen = 0, -#endif -#endif - .proxy_ndp = 0, - .accept_source_route = 0, /* we do not accept RH0 by default. */ -}; - -/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; - -/* Check if a valid qdisc is available */ -static inline int addrconf_qdisc_ok(struct net_device *dev) -{ - return (dev->qdisc != &noop_qdisc); -} - -static void addrconf_del_timer(struct inet6_ifaddr *ifp) -{ - if (del_timer(&ifp->timer)) - __in6_ifa_put(ifp); -} - -enum addrconf_timer_t -{ - AC_NONE, - AC_DAD, - AC_RS, -}; - -static void addrconf_mod_timer(struct inet6_ifaddr *ifp, - enum addrconf_timer_t what, - unsigned long when) -{ - if (!del_timer(&ifp->timer)) - in6_ifa_hold(ifp); - - switch (what) { - case AC_DAD: - ifp->timer.function = addrconf_dad_timer; - break; - case AC_RS: - ifp->timer.function = addrconf_rs_timer; - break; - default:; - } - ifp->timer.expires = jiffies + when; - add_timer(&ifp->timer); -} - -static int snmp6_alloc_dev(struct inet6_dev *idev) -{ - int err = -ENOMEM; - - if (!idev || !idev->dev) - return -EINVAL; - - if (snmp_mib_init((void **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) - goto err_ip; - if (snmp_mib_init((void **)idev->stats.icmpv6, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) - goto err_icmp; - - return 0; - -err_icmp: - snmp_mib_free((void **)idev->stats.ipv6); -err_ip: - return err; -} - -static int snmp6_free_dev(struct inet6_dev *idev) -{ - snmp_mib_free((void **)idev->stats.icmpv6); - snmp_mib_free((void **)idev->stats.ipv6); - return 0; -} - -/* Nobody refers to this device, we may destroy it. */ - -static void in6_dev_finish_destroy_rcu(struct rcu_head *head) -{ - struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); - kfree(idev); -} - -void in6_dev_finish_destroy(struct inet6_dev *idev) -{ - struct net_device *dev = idev->dev; - BUG_TRAP(idev->addr_list==NULL); - BUG_TRAP(idev->mc_list==NULL); -#ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); -#endif - dev_put(dev); - if (!idev->dead) { - printk("Freeing alive inet6 device %p\n", idev); - return; - } - snmp6_free_dev(idev); - call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); -} - -EXPORT_SYMBOL(in6_dev_finish_destroy); - -static struct inet6_dev * ipv6_add_dev(struct net_device *dev) -{ - struct inet6_dev *ndev; - struct in6_addr maddr; - - ASSERT_RTNL(); - - if (dev->mtu < IPV6_MIN_MTU) - return NULL; - - ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - - if (ndev == NULL) - return NULL; - - rwlock_init(&ndev->lock); - ndev->dev = dev; - memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); - ndev->cnf.mtu6 = dev->mtu; - ndev->cnf.sysctl = NULL; - ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); - if (ndev->nd_parms == NULL) { - kfree(ndev); - return NULL; - } - /* We refer to the device */ - dev_hold(dev); - - if (snmp6_alloc_dev(ndev) < 0) { - ADBG((KERN_WARNING - "%s(): cannot allocate memory for statistics; dev=%s.\n", - __FUNCTION__, dev->name)); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); - return NULL; - } - - if (snmp6_register_dev(ndev) < 0) { - ADBG((KERN_WARNING - "%s(): cannot create /proc/net/dev_snmp6/%s\n", - __FUNCTION__, dev->name)); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); - return NULL; - } - - /* One reference from device. We must do this before - * we invoke __ipv6_regen_rndid(). - */ - in6_dev_hold(ndev); - -#ifdef CONFIG_IPV6_PRIVACY - init_timer(&ndev->regen_timer); - ndev->regen_timer.function = ipv6_regen_rndid; - ndev->regen_timer.data = (unsigned long) ndev; - if ((dev->flags&IFF_LOOPBACK) || - dev->type == ARPHRD_TUNNEL || -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) - dev->type == ARPHRD_SIT || -#endif - dev->type == ARPHRD_NONE) { - printk(KERN_INFO - "%s: Disabled Privacy Extensions\n", - dev->name); - ndev->cnf.use_tempaddr = -1; - } else { - in6_dev_hold(ndev); - ipv6_regen_rndid((unsigned long) ndev); - } -#endif - - if (netif_running(dev) && addrconf_qdisc_ok(dev)) - ndev->if_flags |= IF_READY; - - ipv6_mc_init_dev(ndev); - ndev->tstamp = jiffies; -#ifdef CONFIG_SYSCTL - neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - NULL); - addrconf_sysctl_register(ndev, &ndev->cnf); -#endif - /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, ndev); - - /* Join all-node multicast group */ - ipv6_addr_all_nodes(&maddr); - ipv6_dev_mc_inc(dev, &maddr); - - return ndev; -} - -static struct inet6_dev * ipv6_find_idev(struct net_device *dev) -{ - struct inet6_dev *idev; - - ASSERT_RTNL(); - - if ((idev = __in6_dev_get(dev)) == NULL) { - if ((idev = ipv6_add_dev(dev)) == NULL) - return NULL; - } - - if (dev->flags&IFF_UP) - ipv6_mc_up(idev); - return idev; -} - -#ifdef CONFIG_SYSCTL -static void dev_forward_change(struct inet6_dev *idev) -{ - struct net_device *dev; - struct inet6_ifaddr *ifa; - struct in6_addr addr; - - if (!idev) - return; - dev = idev->dev; - if (dev && (dev->flags & IFF_MULTICAST)) { - ipv6_addr_all_routers(&addr); - - if (idev->cnf.forwarding) - ipv6_dev_mc_inc(dev, &addr); - else - ipv6_dev_mc_dec(dev, &addr); - } - for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { - if (ifa->flags&IFA_F_TENTATIVE) - continue; - if (idev->cnf.forwarding) - addrconf_join_anycast(ifa); - else - addrconf_leave_anycast(ifa); - } -} - - -static void addrconf_forward_change(void) -{ - struct net_device *dev; - struct inet6_dev *idev; - - read_lock(&dev_base_lock); - for_each_netdev(dev) { - rcu_read_lock(); - idev = __in6_dev_get(dev); - if (idev) { - int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); - idev->cnf.forwarding = ipv6_devconf.forwarding; - if (changed) - dev_forward_change(idev); - } - rcu_read_unlock(); - } - read_unlock(&dev_base_lock); -} -#endif - -/* Nobody refers to this ifaddr, destroy it */ - -void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) -{ - BUG_TRAP(ifp->if_next==NULL); - BUG_TRAP(ifp->lst_next==NULL); -#ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); -#endif - - in6_dev_put(ifp->idev); - - if (del_timer(&ifp->timer)) - printk("Timer is still running, when freeing ifa=%p\n", ifp); - - if (!ifp->dead) { - printk("Freeing alive inet6 address %p\n", ifp); - return; - } - dst_release(&ifp->rt->u.dst); - - kfree(ifp); -} - -static void -ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) -{ - struct inet6_ifaddr *ifa, **ifap; - int ifp_scope = ipv6_addr_src_scope(&ifp->addr); - - /* - * Each device address list is sorted in order of scope - - * global before linklocal. - */ - for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; - ifap = &ifa->if_next) { - if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) - break; - } - - ifp->if_next = *ifap; - *ifap = ifp; -} - -/* On success it returns ifp with increased reference count */ - -static struct inet6_ifaddr * -ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, - int scope, u32 flags) -{ - struct inet6_ifaddr *ifa = NULL; - struct rt6_info *rt; - int hash; - int err = 0; - - rcu_read_lock_bh(); - if (idev->dead) { - err = -ENODEV; /*XXX*/ - goto out2; - } - - write_lock(&addrconf_hash_lock); - - /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(addr, idev->dev)) { - ADBG(("ipv6_add_addr: already assigned\n")); - err = -EEXIST; - goto out; - } - - ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); - - if (ifa == NULL) { - ADBG(("ipv6_add_addr: malloc failed\n")); - err = -ENOBUFS; - goto out; - } - - rt = addrconf_dst_alloc(idev, addr, 0); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto out; - } - - ipv6_addr_copy(&ifa->addr, addr); - - spin_lock_init(&ifa->lock); - init_timer(&ifa->timer); - ifa->timer.data = (unsigned long) ifa; - ifa->scope = scope; - ifa->prefix_len = pfxlen; - ifa->flags = flags | IFA_F_TENTATIVE; - ifa->cstamp = ifa->tstamp = jiffies; - - ifa->rt = rt; - - /* - * part one of RFC 4429, section 3.3 - * We should not configure an address as - * optimistic if we do not yet know the link - * layer address of our nexhop router - */ - - if (rt->rt6i_nexthop == NULL) - ifa->flags &= ~IFA_F_OPTIMISTIC; - - ifa->idev = idev; - in6_dev_hold(idev); - /* For caller */ - in6_ifa_hold(ifa); - - /* Add to big hash table */ - hash = ipv6_addr_hash(addr); - - ifa->lst_next = inet6_addr_lst[hash]; - inet6_addr_lst[hash] = ifa; - in6_ifa_hold(ifa); - write_unlock(&addrconf_hash_lock); - - write_lock(&idev->lock); - /* Add to inet6_dev unicast addr list. */ - ipv6_link_dev_addr(idev, ifa); - -#ifdef CONFIG_IPV6_PRIVACY - if (ifa->flags&IFA_F_TEMPORARY) { - ifa->tmp_next = idev->tempaddr_list; - idev->tempaddr_list = ifa; - in6_ifa_hold(ifa); - } -#endif - - in6_ifa_hold(ifa); - write_unlock(&idev->lock); -out2: - rcu_read_unlock_bh(); - - if (likely(err == 0)) - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); - else { - kfree(ifa); - ifa = ERR_PTR(err); - } - - return ifa; -out: - write_unlock(&addrconf_hash_lock); - goto out2; -} - -/* This function wants to get referenced ifp and releases it before return */ - -static void ipv6_del_addr(struct inet6_ifaddr *ifp) -{ - struct inet6_ifaddr *ifa, **ifap; - struct inet6_dev *idev = ifp->idev; - int hash; - int deleted = 0, onlink = 0; - unsigned long expires = jiffies; - - hash = ipv6_addr_hash(&ifp->addr); - - ifp->dead = 1; - - write_lock_bh(&addrconf_hash_lock); - for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; - ifap = &ifa->lst_next) { - if (ifa == ifp) { - *ifap = ifa->lst_next; - __in6_ifa_put(ifp); - ifa->lst_next = NULL; - break; - } - } - write_unlock_bh(&addrconf_hash_lock); - - write_lock_bh(&idev->lock); -#ifdef CONFIG_IPV6_PRIVACY - if (ifp->flags&IFA_F_TEMPORARY) { - for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL; - ifap = &ifa->tmp_next) { - if (ifa == ifp) { - *ifap = ifa->tmp_next; - if (ifp->ifpub) { - in6_ifa_put(ifp->ifpub); - ifp->ifpub = NULL; - } - __in6_ifa_put(ifp); - ifa->tmp_next = NULL; - break; - } - } - } -#endif - - for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { - if (ifa == ifp) { - *ifap = ifa->if_next; - __in6_ifa_put(ifp); - ifa->if_next = NULL; - if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) - break; - deleted = 1; - continue; - } else if (ifp->flags & IFA_F_PERMANENT) { - if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, - ifp->prefix_len)) { - if (ifa->flags & IFA_F_PERMANENT) { - onlink = 1; - if (deleted) - break; - } else { - unsigned long lifetime; - - if (!onlink) - onlink = -1; - - spin_lock(&ifa->lock); - lifetime = min_t(unsigned long, - ifa->valid_lft, 0x7fffffffUL/HZ); - if (time_before(expires, - ifa->tstamp + lifetime * HZ)) - expires = ifa->tstamp + lifetime * HZ; - spin_unlock(&ifa->lock); - } - } - } - ifap = &ifa->if_next; - } - write_unlock_bh(&idev->lock); - - ipv6_ifa_notify(RTM_DELADDR, ifp); - - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); - - addrconf_del_timer(ifp); - - /* - * Purge or update corresponding prefix - * - * 1) we don't purge prefix here if address was not permanent. - * prefix is managed by its own lifetime. - * 2) if there're no addresses, delete prefix. - * 3) if there're still other permanent address(es), - * corresponding prefix is still permanent. - * 4) otherwise, update prefix lifetime to the - * longest valid lifetime among the corresponding - * addresses on the device. - * Note: subsequent RA will update lifetime. - * - * --yoshfuji - */ - if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { - struct in6_addr prefix; - struct rt6_info *rt; - - ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); - - if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { - if (onlink == 0) { - ip6_del_rt(rt); - rt = NULL; - } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { - rt->rt6i_expires = expires; - rt->rt6i_flags |= RTF_EXPIRES; - } - } - dst_release(&rt->u.dst); - } - - in6_ifa_put(ifp); -} - -#ifdef CONFIG_IPV6_PRIVACY -static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) -{ - struct inet6_dev *idev = ifp->idev; - struct in6_addr addr, *tmpaddr; - unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; - int tmp_plen; - int ret = 0; - int max_addresses; - u32 addr_flags; - - write_lock(&idev->lock); - if (ift) { - spin_lock_bh(&ift->lock); - memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); - spin_unlock_bh(&ift->lock); - tmpaddr = &addr; - } else { - tmpaddr = NULL; - } -retry: - in6_dev_hold(idev); - if (idev->cnf.use_tempaddr <= 0) { - write_unlock(&idev->lock); - printk(KERN_INFO - "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); - in6_dev_put(idev); - ret = -1; - goto out; - } - spin_lock_bh(&ifp->lock); - if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { - idev->cnf.use_tempaddr = -1; /*XXX*/ - spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); - printk(KERN_WARNING - "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); - in6_dev_put(idev); - ret = -1; - goto out; - } - in6_ifa_hold(ifp); - memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { - spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); - printk(KERN_WARNING - "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); - in6_ifa_put(ifp); - in6_dev_put(idev); - ret = -1; - goto out; - } - memcpy(&addr.s6_addr[8], idev->rndid, 8); - tmp_valid_lft = min_t(__u32, - ifp->valid_lft, - idev->cnf.temp_valid_lft); - tmp_prefered_lft = min_t(__u32, - ifp->prefered_lft, - idev->cnf.temp_prefered_lft - desync_factor / HZ); - tmp_plen = ifp->prefix_len; - max_addresses = idev->cnf.max_addresses; - tmp_cstamp = ifp->cstamp; - tmp_tstamp = ifp->tstamp; - spin_unlock_bh(&ifp->lock); - - write_unlock(&idev->lock); - - addr_flags = IFA_F_TEMPORARY; - /* set in addrconf_prefix_rcv() */ - if (ifp->flags & IFA_F_OPTIMISTIC) - addr_flags |= IFA_F_OPTIMISTIC; - - ift = !max_addresses || - ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, - addr_flags) : NULL; - if (!ift || IS_ERR(ift)) { - in6_ifa_put(ifp); - in6_dev_put(idev); - printk(KERN_INFO - "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); - tmpaddr = &addr; - write_lock(&idev->lock); - goto retry; - } - - spin_lock_bh(&ift->lock); - ift->ifpub = ifp; - ift->valid_lft = tmp_valid_lft; - ift->prefered_lft = tmp_prefered_lft; - ift->cstamp = tmp_cstamp; - ift->tstamp = tmp_tstamp; - spin_unlock_bh(&ift->lock); - - addrconf_dad_start(ift, 0); - in6_ifa_put(ift); - in6_dev_put(idev); -out: - return ret; -} -#endif - -/* - * Choose an appropriate source address (RFC3484) - */ -struct ipv6_saddr_score { - int addr_type; - unsigned int attrs; - int matchlen; - int scope; - unsigned int rule; -}; - -#define IPV6_SADDR_SCORE_LOCAL 0x0001 -#define IPV6_SADDR_SCORE_PREFERRED 0x0004 -#define IPV6_SADDR_SCORE_HOA 0x0008 -#define IPV6_SADDR_SCORE_OIF 0x0010 -#define IPV6_SADDR_SCORE_LABEL 0x0020 -#define IPV6_SADDR_SCORE_PRIVACY 0x0040 - -static inline int ipv6_saddr_preferred(int type) -{ - if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| - IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) - return 1; - return 0; -} - -/* static matching label */ -static inline int ipv6_saddr_label(const struct in6_addr *addr, int type) -{ - /* - * prefix (longest match) label - * ----------------------------- - * ::1/128 0 - * ::/0 1 - * 2002::/16 2 - * ::/96 3 - * ::ffff:0:0/96 4 - * fc00::/7 5 - * 2001::/32 6 - */ - if (type & IPV6_ADDR_LOOPBACK) - return 0; - else if (type & IPV6_ADDR_COMPATv4) - return 3; - else if (type & IPV6_ADDR_MAPPED) - return 4; - else if (addr->s6_addr32[0] == htonl(0x20010000)) - return 6; - else if (addr->s6_addr16[0] == htons(0x2002)) - return 2; - else if ((addr->s6_addr[0] & 0xfe) == 0xfc) - return 5; - return 1; -} - -int ipv6_dev_get_saddr(struct net_device *daddr_dev, - struct in6_addr *daddr, struct in6_addr *saddr) -{ - struct ipv6_saddr_score hiscore; - struct inet6_ifaddr *ifa_result = NULL; - int daddr_type = __ipv6_addr_type(daddr); - int daddr_scope = __ipv6_addr_src_scope(daddr_type); - u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); - struct net_device *dev; - - memset(&hiscore, 0, sizeof(hiscore)); - - read_lock(&dev_base_lock); - rcu_read_lock(); - - for_each_netdev(dev) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - - /* Rule 0: Candidate Source Address (section 4) - * - multicast and link-local destination address, - * the set of candidate source address MUST only - * include addresses assigned to interfaces - * belonging to the same link as the outgoing - * interface. - * (- For site-local destination addresses, the - * set of candidate source addresses MUST only - * include addresses assigned to interfaces - * belonging to the same site as the outgoing - * interface.) - */ - if ((daddr_type & IPV6_ADDR_MULTICAST || - daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && - daddr_dev && dev != daddr_dev) - continue; - - idev = __in6_dev_get(dev); - if (!idev) - continue; - - read_lock_bh(&idev->lock); - for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { - struct ipv6_saddr_score score; - - score.addr_type = __ipv6_addr_type(&ifa->addr); - - /* Rule 0: - * - Tentative Address (RFC2462 section 5.4) - * - A tentative address is not considered - * "assigned to an interface" in the traditional - * sense, unless it is also flagged as optimistic. - * - Candidate Source Address (section 4) - * - In any case, anycast addresses, multicast - * addresses, and the unspecified address MUST - * NOT be included in a candidate set. - */ - if ((ifa->flags & IFA_F_TENTATIVE) && - (!(ifa->flags & IFA_F_OPTIMISTIC))) - continue; - if (unlikely(score.addr_type == IPV6_ADDR_ANY || - score.addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ADDRCONF: unspecified / multicast address" - "assigned as unicast address on %s", - dev->name); - continue; - } - - score.attrs = 0; - score.matchlen = 0; - score.scope = 0; - score.rule = 0; - - if (ifa_result == NULL) { - /* record it if the first available entry */ - goto record_it; - } - - /* Rule 1: Prefer same address */ - if (hiscore.rule < 1) { - if (ipv6_addr_equal(&ifa_result->addr, daddr)) - hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; - hiscore.rule++; - } - if (ipv6_addr_equal(&ifa->addr, daddr)) { - score.attrs |= IPV6_SADDR_SCORE_LOCAL; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { - score.rule = 1; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) - continue; - } - - /* Rule 2: Prefer appropriate scope */ - if (hiscore.rule < 2) { - hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); - hiscore.rule++; - } - score.scope = __ipv6_addr_src_scope(score.addr_type); - if (hiscore.scope < score.scope) { - if (hiscore.scope < daddr_scope) { - score.rule = 2; - goto record_it; - } else - continue; - } else if (score.scope < hiscore.scope) { - if (score.scope < daddr_scope) - break; /* addresses sorted by scope */ - else { - score.rule = 2; - goto record_it; - } - } - - /* Rule 3: Avoid deprecated and optimistic addresses */ - if (hiscore.rule < 3) { - if (ipv6_saddr_preferred(hiscore.addr_type) || - (((ifa_result->flags & - (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) - hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; - hiscore.rule++; - } - if (ipv6_saddr_preferred(score.addr_type) || - (((ifa->flags & - (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { - score.attrs |= IPV6_SADDR_SCORE_PREFERRED; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { - score.rule = 3; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) - continue; - } - - /* Rule 4: Prefer home address */ -#ifdef CONFIG_IPV6_MIP6 - if (hiscore.rule < 4) { - if (ifa_result->flags & IFA_F_HOMEADDRESS) - hiscore.attrs |= IPV6_SADDR_SCORE_HOA; - hiscore.rule++; - } - if (ifa->flags & IFA_F_HOMEADDRESS) { - score.attrs |= IPV6_SADDR_SCORE_HOA; - if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { - score.rule = 4; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) - continue; - } -#else - if (hiscore.rule < 4) - hiscore.rule++; -#endif - - /* Rule 5: Prefer outgoing interface */ - if (hiscore.rule < 5) { - if (daddr_dev == NULL || - daddr_dev == ifa_result->idev->dev) - hiscore.attrs |= IPV6_SADDR_SCORE_OIF; - hiscore.rule++; - } - if (daddr_dev == NULL || - daddr_dev == ifa->idev->dev) { - score.attrs |= IPV6_SADDR_SCORE_OIF; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { - score.rule = 5; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) - continue; - } - - /* Rule 6: Prefer matching label */ - if (hiscore.rule < 6) { - if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) - hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; - hiscore.rule++; - } - if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { - score.attrs |= IPV6_SADDR_SCORE_LABEL; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { - score.rule = 6; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) - continue; - } - -#ifdef CONFIG_IPV6_PRIVACY - /* Rule 7: Prefer public address - * Note: prefer temprary address if use_tempaddr >= 2 - */ - if (hiscore.rule < 7) { - if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ - (ifa_result->idev->cnf.use_tempaddr >= 2)) - hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; - hiscore.rule++; - } - if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ - (ifa->idev->cnf.use_tempaddr >= 2)) { - score.attrs |= IPV6_SADDR_SCORE_PRIVACY; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { - score.rule = 7; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) - continue; - } -#else - if (hiscore.rule < 7) - hiscore.rule++; -#endif - /* Rule 8: Use longest matching prefix */ - if (hiscore.rule < 8) { - hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); - hiscore.rule++; - } - score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); - if (score.matchlen > hiscore.matchlen) { - score.rule = 8; - goto record_it; - } -#if 0 - else if (score.matchlen < hiscore.matchlen) - continue; -#endif - - /* Final Rule: choose first available one */ - continue; -record_it: - if (ifa_result) - in6_ifa_put(ifa_result); - in6_ifa_hold(ifa); - ifa_result = ifa; - hiscore = score; - } - read_unlock_bh(&idev->lock); - } - rcu_read_unlock(); - read_unlock(&dev_base_lock); - - if (!ifa_result) - return -EADDRNOTAVAIL; - - ipv6_addr_copy(saddr, &ifa_result->addr); - in6_ifa_put(ifa_result); - return 0; -} - - -int ipv6_get_saddr(struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) -{ - return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); -} - -EXPORT_SYMBOL(ipv6_get_saddr); - -int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, - unsigned char banned_flags) -{ - struct inet6_dev *idev; - int err = -EADDRNOTAVAIL; - - rcu_read_lock(); - if ((idev = __in6_dev_get(dev)) != NULL) { - struct inet6_ifaddr *ifp; - - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { - ipv6_addr_copy(addr, &ifp->addr); - err = 0; - break; - } - } - read_unlock_bh(&idev->lock); - } - rcu_read_unlock(); - return err; -} - -static int ipv6_count_addresses(struct inet6_dev *idev) -{ - int cnt = 0; - struct inet6_ifaddr *ifp; - - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) - cnt++; - read_unlock_bh(&idev->lock); - return cnt; -} - -int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) -{ - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - - read_lock_bh(&addrconf_hash_lock); - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_equal(&ifp->addr, addr) && - !(ifp->flags&IFA_F_TENTATIVE)) { - if (dev == NULL || ifp->idev->dev == dev || - !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) - break; - } - } - read_unlock_bh(&addrconf_hash_lock); - return ifp != NULL; -} - -EXPORT_SYMBOL(ipv6_chk_addr); - -static -int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) -{ - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev) - break; - } - } - return ifp != NULL; -} - -struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) -{ - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - - read_lock_bh(&addrconf_hash_lock); - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev || - !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - in6_ifa_hold(ifp); - break; - } - } - } - read_unlock_bh(&addrconf_hash_lock); - - return ifp; -} - -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) -{ - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - if (!sk2_rcv_saddr && !sk_ipv6only) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && - !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) - return 1; - - if (addr_type == IPV6_ADDR_MAPPED && - !sk2_ipv6only && - (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr)) - return 1; - - return 0; -} - -/* Gets referenced address, destroys ifaddr */ - -static void addrconf_dad_stop(struct inet6_ifaddr *ifp) -{ - if (ifp->flags&IFA_F_PERMANENT) { - spin_lock_bh(&ifp->lock); - addrconf_del_timer(ifp); - ifp->flags |= IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - in6_ifa_put(ifp); -#ifdef CONFIG_IPV6_PRIVACY - } else if (ifp->flags&IFA_F_TEMPORARY) { - struct inet6_ifaddr *ifpub; - spin_lock_bh(&ifp->lock); - ifpub = ifp->ifpub; - if (ifpub) { - in6_ifa_hold(ifpub); - spin_unlock_bh(&ifp->lock); - ipv6_create_tempaddr(ifpub, ifp); - in6_ifa_put(ifpub); - } else { - spin_unlock_bh(&ifp->lock); - } - ipv6_del_addr(ifp); -#endif - } else - ipv6_del_addr(ifp); -} - -void addrconf_dad_failure(struct inet6_ifaddr *ifp) -{ - if (net_ratelimit()) - printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); - addrconf_dad_stop(ifp); -} - -/* Join to solicited addr multicast group. */ - -void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) -{ - struct in6_addr maddr; - - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) - return; - - addrconf_addr_solict_mult(addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); -} - -void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) -{ - struct in6_addr maddr; - - if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) - return; - - addrconf_addr_solict_mult(addr, &maddr); - __ipv6_dev_mc_dec(idev, &maddr); -} - -static void addrconf_join_anycast(struct inet6_ifaddr *ifp) -{ - struct in6_addr addr; - ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); - if (ipv6_addr_any(&addr)) - return; - ipv6_dev_ac_inc(ifp->idev->dev, &addr); -} - -static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) -{ - struct in6_addr addr; - ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); - if (ipv6_addr_any(&addr)) - return; - __ipv6_dev_ac_dec(ifp->idev, &addr); -} - -static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) -{ - if (dev->addr_len != ETH_ALEN) - return -1; - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - - /* - * The zSeries OSA network cards can be shared among various - * OS instances, but the OSA cards have only one MAC address. - * This leads to duplicate address conflicts in conjunction - * with IPv6 if more than one instance uses the same card. - * - * The driver for these cards can deliver a unique 16-bit - * identifier for each instance sharing the same card. It is - * placed instead of 0xFFFE in the interface identifier. The - * "u" bit of the interface identifier is not inverted in this - * case. Hence the resulting interface identifier has local - * scope according to RFC2373. - */ - if (dev->dev_id) { - eui[3] = (dev->dev_id >> 8) & 0xFF; - eui[4] = dev->dev_id & 0xFF; - } else { - eui[3] = 0xFF; - eui[4] = 0xFE; - eui[0] ^= 2; - } - return 0; -} - -static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) -{ - /* XXX: inherit EUI-64 from other interface -- yoshfuji */ - if (dev->addr_len != ARCNET_ALEN) - return -1; - memset(eui, 0, 7); - eui[7] = *(u8*)dev->dev_addr; - return 0; -} - -static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) -{ - if (dev->addr_len != INFINIBAND_ALEN) - return -1; - memcpy(eui, dev->dev_addr + 12, 8); - eui[0] |= 2; - return 0; -} - -static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) -{ - switch (dev->type) { - case ARPHRD_ETHER: - case ARPHRD_FDDI: - case ARPHRD_IEEE802_TR: - return addrconf_ifid_eui48(eui, dev); - case ARPHRD_ARCNET: - return addrconf_ifid_arcnet(eui, dev); - case ARPHRD_INFINIBAND: - return addrconf_ifid_infiniband(eui, dev); - } - return -1; -} - -static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) -{ - int err = -1; - struct inet6_ifaddr *ifp; - - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { - memcpy(eui, ifp->addr.s6_addr+8, 8); - err = 0; - break; - } - } - read_unlock_bh(&idev->lock); - return err; -} - -#ifdef CONFIG_IPV6_PRIVACY -/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static int __ipv6_regen_rndid(struct inet6_dev *idev) -{ -regen: - get_random_bytes(idev->rndid, sizeof(idev->rndid)); - idev->rndid[0] &= ~0x02; - - /* - * : - * check if generated address is not inappropriate - * - * - Reserved subnet anycast (RFC 2526) - * 11111101 11....11 1xxxxxxx - * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 - * 00-00-5E-FE-xx-xx-xx-xx - * - value 0 - * - XXX: already assigned to an address on the device - */ - if (idev->rndid[0] == 0xfd && - (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && - (idev->rndid[7]&0x80)) - goto regen; - if ((idev->rndid[0]|idev->rndid[1]) == 0) { - if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) - goto regen; - if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) - goto regen; - } - - return 0; -} - -static void ipv6_regen_rndid(unsigned long data) -{ - struct inet6_dev *idev = (struct inet6_dev *) data; - unsigned long expires; - - rcu_read_lock_bh(); - write_lock_bh(&idev->lock); - - if (idev->dead) - goto out; - - if (__ipv6_regen_rndid(idev) < 0) - goto out; - - expires = jiffies + - idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; - if (time_before(expires, jiffies)) { - printk(KERN_WARNING - "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", - idev->dev->name); - goto out; - } - - if (!mod_timer(&idev->regen_timer, expires)) - in6_dev_hold(idev); - -out: - write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); - in6_dev_put(idev); -} - -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { - int ret = 0; - - if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - ret = __ipv6_regen_rndid(idev); - return ret; -} -#endif - -/* - * Add prefix route. - */ - -static void -addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, - unsigned long expires, u32 flags) -{ - struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_expires = expires, - .fc_dst_len = plen, - .fc_flags = RTF_UP | flags, - }; - - ipv6_addr_copy(&cfg.fc_dst, pfx); - - /* Prevent useless cloning on PtP SIT. - This thing is done here expecting that the whole - class of non-broadcast devices need not cloning. - */ -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) - if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) - cfg.fc_flags |= RTF_NONEXTHOP; -#endif - - ip6_route_add(&cfg); -} - -/* Create "default" multicast route to the interface */ - -static void addrconf_add_mroute(struct net_device *dev) -{ - struct fib6_config cfg = { - .fc_table = RT6_TABLE_LOCAL, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_dst_len = 8, - .fc_flags = RTF_UP, - }; - - ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - - ip6_route_add(&cfg); -} - -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -static void sit_route_add(struct net_device *dev) -{ - struct fib6_config cfg = { - .fc_table = RT6_TABLE_MAIN, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_dst_len = 96, - .fc_flags = RTF_UP | RTF_NONEXTHOP, - }; - - /* prefix length - 96 bits "::d.d.d.d" */ - ip6_route_add(&cfg); -} -#endif - -static void addrconf_add_lroute(struct net_device *dev) -{ - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - addrconf_prefix_route(&addr, 64, dev, 0, 0); -} - -static struct inet6_dev *addrconf_add_dev(struct net_device *dev) -{ - struct inet6_dev *idev; - - ASSERT_RTNL(); - - if ((idev = ipv6_find_idev(dev)) == NULL) - return NULL; - - /* Add default multicast route */ - addrconf_add_mroute(dev); - - /* Add link local route */ - addrconf_add_lroute(dev); - return idev; -} - -void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) -{ - struct prefix_info *pinfo; - __u32 valid_lft; - __u32 prefered_lft; - int addr_type; - unsigned long rt_expires; - struct inet6_dev *in6_dev; - - pinfo = (struct prefix_info *) opt; - - if (len < sizeof(struct prefix_info)) { - ADBG(("addrconf: prefix option too short\n")); - return; - } - - /* - * Validation checks ([ADDRCONF], page 19) - */ - - addr_type = ipv6_addr_type(&pinfo->prefix); - - if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) - return; - - valid_lft = ntohl(pinfo->valid); - prefered_lft = ntohl(pinfo->prefered); - - if (prefered_lft > valid_lft) { - if (net_ratelimit()) - printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); - return; - } - - in6_dev = in6_dev_get(dev); - - if (in6_dev == NULL) { - if (net_ratelimit()) - printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); - return; - } - - /* - * Two things going on here: - * 1) Add routes for on-link prefixes - * 2) Configure prefixes with the auto flag set - */ - - /* Avoid arithmetic overflow. Really, we could - save rt_expires in seconds, likely valid_lft, - but it would require division in fib gc, that it - not good. - */ - if (valid_lft >= 0x7FFFFFFF/HZ) - rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - else - rt_expires = valid_lft * HZ; - - /* - * We convert this (in jiffies) to clock_t later. - * Avoid arithmetic overflow there as well. - * Overflow can happen only if HZ < USER_HZ. - */ - if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) - rt_expires = 0x7FFFFFFF / USER_HZ; - - if (pinfo->onlink) { - struct rt6_info *rt; - rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1); - - if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { - if (rt->rt6i_flags&RTF_EXPIRES) { - if (valid_lft == 0) { - ip6_del_rt(rt); - rt = NULL; - } else { - rt->rt6i_expires = jiffies + rt_expires; - } - } - } else if (valid_lft) { - addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); - } - if (rt) - dst_release(&rt->u.dst); - } - - /* Try to figure out our local address for this prefix */ - - if (pinfo->autoconf && in6_dev->cnf.autoconf) { - struct inet6_ifaddr * ifp; - struct in6_addr addr; - int create = 0, update_lft = 0; - - if (pinfo->prefix_len == 64) { - memcpy(&addr, &pinfo->prefix, 8); - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && - ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { - in6_dev_put(in6_dev); - return; - } - goto ok; - } - if (net_ratelimit()) - printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", - pinfo->prefix_len); - in6_dev_put(in6_dev); - return; - -ok: - - ifp = ipv6_get_ifaddr(&addr, dev, 1); - - if (ifp == NULL && valid_lft) { - int max_addresses = in6_dev->cnf.max_addresses; - u32 addr_flags = 0; - -#ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (in6_dev->cnf.optimistic_dad && - !ipv6_devconf.forwarding) - addr_flags = IFA_F_OPTIMISTIC; -#endif - - /* Do not allow to create too much of autoconfigured - * addresses; this would be too easy way to crash kernel. - */ - if (!max_addresses || - ipv6_count_addresses(in6_dev) < max_addresses) - ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, - addr_type&IPV6_ADDR_SCOPE_MASK, - addr_flags); - - if (!ifp || IS_ERR(ifp)) { - in6_dev_put(in6_dev); - return; - } - - update_lft = create = 1; - ifp->cstamp = jiffies; - addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT); - } - - if (ifp) { - int flags; - unsigned long now; -#ifdef CONFIG_IPV6_PRIVACY - struct inet6_ifaddr *ift; -#endif - u32 stored_lft; - - /* update lifetime (RFC2462 5.5.3 e) */ - spin_lock(&ifp->lock); - now = jiffies; - if (ifp->valid_lft > (now - ifp->tstamp) / HZ) - stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; - else - stored_lft = 0; - if (!update_lft && stored_lft) { - if (valid_lft > MIN_VALID_LIFETIME || - valid_lft > stored_lft) - update_lft = 1; - else if (stored_lft <= MIN_VALID_LIFETIME) { - /* valid_lft <= stored_lft is always true */ - /* XXX: IPsec */ - update_lft = 0; - } else { - valid_lft = MIN_VALID_LIFETIME; - if (valid_lft < prefered_lft) - prefered_lft = valid_lft; - update_lft = 1; - } - } - - if (update_lft) { - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = now; - flags = ifp->flags; - ifp->flags &= ~IFA_F_DEPRECATED; - spin_unlock(&ifp->lock); - - if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify(0, ifp); - } else - spin_unlock(&ifp->lock); - -#ifdef CONFIG_IPV6_PRIVACY - read_lock_bh(&in6_dev->lock); - /* update all temporary addresses in the list */ - for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) { - /* - * When adjusting the lifetimes of an existing - * temporary address, only lower the lifetimes. - * Implementations must not increase the - * lifetimes of an existing temporary address - * when processing a Prefix Information Option. - */ - spin_lock(&ift->lock); - flags = ift->flags; - if (ift->valid_lft > valid_lft && - ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) - ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; - if (ift->prefered_lft > prefered_lft && - ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) - ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; - spin_unlock(&ift->lock); - if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify(0, ift); - } - - if (create && in6_dev->cnf.use_tempaddr > 0) { - /* - * When a new public address is created as described in [ADDRCONF], - * also create a new temporary address. - */ - read_unlock_bh(&in6_dev->lock); - ipv6_create_tempaddr(ifp, NULL); - } else { - read_unlock_bh(&in6_dev->lock); - } -#endif - in6_ifa_put(ifp); - addrconf_verify(0); - } - } - inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); - in6_dev_put(in6_dev); -} - -/* - * Set destination address. - * Special case for SIT interfaces where we create a new "virtual" - * device. - */ -int addrconf_set_dstaddr(void __user *arg) -{ - struct in6_ifreq ireq; - struct net_device *dev; - int err = -EINVAL; - - rtnl_lock(); - - err = -EFAULT; - if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - goto err_exit; - - dev = __dev_get_by_index(ireq.ifr6_ifindex); - - err = -ENODEV; - if (dev == NULL) - goto err_exit; - -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) - if (dev->type == ARPHRD_SIT) { - struct ifreq ifr; - mm_segment_t oldfs; - struct ip_tunnel_parm p; - - err = -EADDRNOTAVAIL; - if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) - goto err_exit; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; - p.iph.saddr = 0; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPV6; - p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (void __user *)&p; - - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - - if (err == 0) { - err = -ENOBUFS; - if ((dev = __dev_get_by_name(p.name)) == NULL) - goto err_exit; - err = dev_open(dev); - } - } -#endif - -err_exit: - rtnl_unlock(); - return err; -} - -/* - * Manual configuration of address on an interface - */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, - __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) -{ - struct inet6_ifaddr *ifp; - struct inet6_dev *idev; - struct net_device *dev; - int scope; - u32 flags = RTF_EXPIRES; - - ASSERT_RTNL(); - - /* check the lifetime */ - if (!valid_lft || prefered_lft > valid_lft) - return -EINVAL; - - if ((dev = __dev_get_by_index(ifindex)) == NULL) - return -ENODEV; - - if ((idev = addrconf_add_dev(dev)) == NULL) - return -ENOBUFS; - - scope = ipv6_addr_scope(pfx); - - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; - - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; - - ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); - - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = jiffies; - spin_unlock_bh(&ifp->lock); - - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - jiffies_to_clock_t(valid_lft * HZ), flags); - /* - * Note that section 3.1 of RFC 4429 indicates - * that the Optimistic flag should not be set for - * manually configured addresses - */ - addrconf_dad_start(ifp, 0); - in6_ifa_put(ifp); - addrconf_verify(0); - return 0; - } - - return PTR_ERR(ifp); -} - -static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) -{ - struct inet6_ifaddr *ifp; - struct inet6_dev *idev; - struct net_device *dev; - - if ((dev = __dev_get_by_index(ifindex)) == NULL) - return -ENODEV; - - if ((idev = __in6_dev_get(dev)) == NULL) - return -ENXIO; - - read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->prefix_len == plen && - ipv6_addr_equal(pfx, &ifp->addr)) { - in6_ifa_hold(ifp); - read_unlock_bh(&idev->lock); - - ipv6_del_addr(ifp); - - /* If the last address is deleted administratively, - disable IPv6 on this interface. - */ - if (idev->addr_list == NULL) - addrconf_ifdown(idev->dev, 1); - return 0; - } - } - read_unlock_bh(&idev->lock); - return -EADDRNOTAVAIL; -} - - -int addrconf_add_ifaddr(void __user *arg) -{ - struct in6_ifreq ireq; - int err; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - return -EFAULT; - - rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, - IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); - rtnl_unlock(); - return err; -} - -int addrconf_del_ifaddr(void __user *arg) -{ - struct in6_ifreq ireq; - int err; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - return -EFAULT; - - rtnl_lock(); - err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); - rtnl_unlock(); - return err; -} - -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -static void sit_add_v4_addrs(struct inet6_dev *idev) -{ - struct inet6_ifaddr * ifp; - struct in6_addr addr; - struct net_device *dev; - int scope; - - ASSERT_RTNL(); - - memset(&addr, 0, sizeof(struct in6_addr)); - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); - - if (idev->dev->flags&IFF_POINTOPOINT) { - addr.s6_addr32[0] = htonl(0xfe800000); - scope = IFA_LINK; - } else { - scope = IPV6_ADDR_COMPATv4; - } - - if (addr.s6_addr32[3]) { - ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - ipv6_ifa_notify(RTM_NEWADDR, ifp); - in6_ifa_put(ifp); - } - return; - } - - for_each_netdev(dev) { - struct in_device * in_dev = __in_dev_get_rtnl(dev); - if (in_dev && (dev->flags & IFF_UP)) { - struct in_ifaddr * ifa; - - int flag = scope; - - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { - int plen; - - addr.s6_addr32[3] = ifa->ifa_local; - - if (ifa->ifa_scope == RT_SCOPE_LINK) - continue; - if (ifa->ifa_scope >= RT_SCOPE_HOST) { - if (idev->dev->flags&IFF_POINTOPOINT) - continue; - flag |= IFA_HOST; - } - if (idev->dev->flags&IFF_POINTOPOINT) - plen = 64; - else - plen = 96; - - ifp = ipv6_add_addr(idev, &addr, plen, flag, - IFA_F_PERMANENT); - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - ipv6_ifa_notify(RTM_NEWADDR, ifp); - in6_ifa_put(ifp); - } - } - } - } -} -#endif - -static void init_loopback(struct net_device *dev) -{ - struct inet6_dev *idev; - struct inet6_ifaddr * ifp; - - /* ::1 */ - - ASSERT_RTNL(); - - if ((idev = ipv6_find_idev(dev)) == NULL) { - printk(KERN_DEBUG "init loopback: add_dev failed\n"); - return; - } - - ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT); - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - ipv6_ifa_notify(RTM_NEWADDR, ifp); - in6_ifa_put(ifp); - } -} - -static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) -{ - struct inet6_ifaddr * ifp; - u32 addr_flags = IFA_F_PERMANENT; - -#ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (idev->cnf.optimistic_dad && - !ipv6_devconf.forwarding) - addr_flags |= IFA_F_OPTIMISTIC; -#endif - - - ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); - if (!IS_ERR(ifp)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); - addrconf_dad_start(ifp, 0); - in6_ifa_put(ifp); - } -} - -static void addrconf_dev_config(struct net_device *dev) -{ - struct in6_addr addr; - struct inet6_dev * idev; - - ASSERT_RTNL(); - - if ((dev->type != ARPHRD_ETHER) && - (dev->type != ARPHRD_FDDI) && - (dev->type != ARPHRD_IEEE802_TR) && - (dev->type != ARPHRD_ARCNET) && - (dev->type != ARPHRD_INFINIBAND)) { - /* Alas, we support only Ethernet autoconfiguration. */ - return; - } - - idev = addrconf_add_dev(dev); - if (idev == NULL) - return; - - memset(&addr, 0, sizeof(struct in6_addr)); - addr.s6_addr32[0] = htonl(0xFE800000); - - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) - addrconf_add_linklocal(idev, &addr); -} - -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) -static void addrconf_sit_config(struct net_device *dev) -{ - struct inet6_dev *idev; - - ASSERT_RTNL(); - - /* - * Configure the tunnel with one of our IPv4 - * addresses... we should configure all of - * our v4 addrs in the tunnel - */ - - if ((idev = ipv6_find_idev(dev)) == NULL) { - printk(KERN_DEBUG "init sit: add_dev failed\n"); - return; - } - - sit_add_v4_addrs(idev); - - if (dev->flags&IFF_POINTOPOINT) { - addrconf_add_mroute(dev); - addrconf_add_lroute(dev); - } else - sit_route_add(dev); -} -#endif - -static inline int -ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) -{ - struct in6_addr lladdr; - - if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { - addrconf_add_linklocal(idev, &lladdr); - return 0; - } - return -1; -} - -static void ip6_tnl_add_linklocal(struct inet6_dev *idev) -{ - struct net_device *link_dev; - - /* first try to inherit the link-local address from the link device */ - if (idev->dev->iflink && - (link_dev = __dev_get_by_index(idev->dev->iflink))) { - if (!ipv6_inherit_linklocal(idev, link_dev)) - return; - } - /* then try to inherit it from any device */ - for_each_netdev(link_dev) { - if (!ipv6_inherit_linklocal(idev, link_dev)) - return; - } - printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n"); -} - -/* - * Autoconfigure tunnel with a link-local address so routing protocols, - * DHCPv6, MLD etc. can be run over the virtual link - */ - -static void addrconf_ip6_tnl_config(struct net_device *dev) -{ - struct inet6_dev *idev; - - ASSERT_RTNL(); - - if ((idev = addrconf_add_dev(dev)) == NULL) { - printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); - return; - } - ip6_tnl_add_linklocal(idev); -} - -static int addrconf_notify(struct notifier_block *this, unsigned long event, - void * data) -{ - struct net_device *dev = (struct net_device *) data; - struct inet6_dev *idev = __in6_dev_get(dev); - int run_pending = 0; - - switch(event) { - case NETDEV_REGISTER: - if (!idev && dev->mtu >= IPV6_MIN_MTU) { - idev = ipv6_add_dev(dev); - if (!idev) - printk(KERN_WARNING "IPv6: add_dev failed for %s\n", - dev->name); - } - break; - case NETDEV_UP: - case NETDEV_CHANGE: - if (event == NETDEV_UP) { - if (!addrconf_qdisc_ok(dev)) { - /* device is not ready yet. */ - printk(KERN_INFO - "ADDRCONF(NETDEV_UP): %s: " - "link is not ready\n", - dev->name); - break; - } - - if (idev) - idev->if_flags |= IF_READY; - } else { - if (!addrconf_qdisc_ok(dev)) { - /* device is still not ready. */ - break; - } - - if (idev) { - if (idev->if_flags & IF_READY) { - /* device is already configured. */ - break; - } - idev->if_flags |= IF_READY; - } - - printk(KERN_INFO - "ADDRCONF(NETDEV_CHANGE): %s: " - "link becomes ready\n", - dev->name); - - run_pending = 1; - } - - switch(dev->type) { -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) - case ARPHRD_SIT: - addrconf_sit_config(dev); - break; -#endif - case ARPHRD_TUNNEL6: - addrconf_ip6_tnl_config(dev); - break; - case ARPHRD_LOOPBACK: - init_loopback(dev); - break; - - default: - addrconf_dev_config(dev); - break; - } - if (idev) { - if (run_pending) - addrconf_dad_run(idev); - - /* If the MTU changed during the interface down, when the - interface up, the changed MTU must be reflected in the - idev as well as routers. - */ - if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { - rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; - } - idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_NEWLINK, idev); - /* If the changed mtu during down is lower than IPV6_MIN_MTU - stop IPv6 on this interface. - */ - if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, event != NETDEV_DOWN); - } - break; - - case NETDEV_CHANGEMTU: - if ( idev && dev->mtu >= IPV6_MIN_MTU) { - rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; - break; - } - - /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ - - case NETDEV_DOWN: - case NETDEV_UNREGISTER: - /* - * Remove all addresses from this interface. - */ - addrconf_ifdown(dev, event != NETDEV_DOWN); - break; - - case NETDEV_CHANGENAME: - if (idev) { - snmp6_unregister_dev(idev); -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); - neigh_sysctl_unregister(idev->nd_parms); - neigh_sysctl_register(dev, idev->nd_parms, - NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - NULL); - addrconf_sysctl_register(idev, &idev->cnf); -#endif - snmp6_register_dev(idev); - } - break; - } - - return NOTIFY_OK; -} - -/* - * addrconf module should be notified of a device going up - */ -static struct notifier_block ipv6_dev_notf = { - .notifier_call = addrconf_notify, - .priority = 0 -}; - -static int addrconf_ifdown(struct net_device *dev, int how) -{ - struct inet6_dev *idev; - struct inet6_ifaddr *ifa, **bifa; - int i; - - ASSERT_RTNL(); - - if (dev == &loopback_dev && how == 1) - how = 0; - - rt6_ifdown(dev); - neigh_ifdown(&nd_tbl, dev); - - idev = __in6_dev_get(dev); - if (idev == NULL) - return -ENODEV; - - /* Step 1: remove reference to ipv6 device from parent device. - Do not dev_put! - */ - if (how == 1) { - idev->dead = 1; - - /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, NULL); - - /* Step 1.5: remove snmp6 entry */ - snmp6_unregister_dev(idev); - - } - - /* Step 2: clear hash table */ - for (i=0; iidev == idev) { - *bifa = ifa->lst_next; - ifa->lst_next = NULL; - addrconf_del_timer(ifa); - in6_ifa_put(ifa); - continue; - } - bifa = &ifa->lst_next; - } - write_unlock_bh(&addrconf_hash_lock); - } - - write_lock_bh(&idev->lock); - - /* Step 3: clear flags for stateless addrconf */ - if (how != 1) - idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - - /* Step 4: clear address list */ -#ifdef CONFIG_IPV6_PRIVACY - if (how == 1 && del_timer(&idev->regen_timer)) - in6_dev_put(idev); - - /* clear tempaddr list */ - while ((ifa = idev->tempaddr_list) != NULL) { - idev->tempaddr_list = ifa->tmp_next; - ifa->tmp_next = NULL; - ifa->dead = 1; - write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->lock); - - if (ifa->ifpub) { - in6_ifa_put(ifa->ifpub); - ifa->ifpub = NULL; - } - spin_unlock_bh(&ifa->lock); - in6_ifa_put(ifa); - write_lock_bh(&idev->lock); - } -#endif - while ((ifa = idev->addr_list) != NULL) { - idev->addr_list = ifa->if_next; - ifa->if_next = NULL; - ifa->dead = 1; - addrconf_del_timer(ifa); - write_unlock_bh(&idev->lock); - - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); - in6_ifa_put(ifa); - - write_lock_bh(&idev->lock); - } - write_unlock_bh(&idev->lock); - - /* Step 5: Discard multicast list */ - - if (how == 1) - ipv6_mc_destroy_dev(idev); - else - ipv6_mc_down(idev); - - /* Step 5: netlink notification of this interface */ - idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_DELLINK, idev); - - /* Shot the device (if unregistered) */ - - if (how == 1) { -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); - neigh_sysctl_unregister(idev->nd_parms); -#endif - neigh_parms_release(&nd_tbl, idev->nd_parms); - neigh_ifdown(&nd_tbl, dev); - in6_dev_put(idev); - } - return 0; -} - -static void addrconf_rs_timer(unsigned long data) -{ - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; - - if (ifp->idev->cnf.forwarding) - goto out; - - if (ifp->idev->if_flags & IF_RA_RCVD) { - /* - * Announcement received after solicitation - * was sent - */ - goto out; - } - - spin_lock(&ifp->lock); - if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { - struct in6_addr all_routers; - - /* The wait after the last probe can be shorter */ - addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == ifp->idev->cnf.rtr_solicits) ? - ifp->idev->cnf.rtr_solicit_delay : - ifp->idev->cnf.rtr_solicit_interval); - spin_unlock(&ifp->lock); - - ipv6_addr_all_routers(&all_routers); - - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); - } else { - spin_unlock(&ifp->lock); - /* - * Note: we do not support deprecated "all on-link" - * assumption any longer. - */ - printk(KERN_DEBUG "%s: no IPv6 routers present\n", - ifp->idev->dev->name); - } - -out: - in6_ifa_put(ifp); -} - -/* - * Duplicate Address Detection - */ -static void addrconf_dad_kick(struct inet6_ifaddr *ifp) -{ - unsigned long rand_num; - struct inet6_dev *idev = ifp->idev; - - if (ifp->flags & IFA_F_OPTIMISTIC) - rand_num = 0; - else - rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); - - ifp->probes = idev->cnf.dad_transmits; - addrconf_mod_timer(ifp, AC_DAD, rand_num); -} - -static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) -{ - struct inet6_dev *idev = ifp->idev; - struct net_device *dev = idev->dev; - - addrconf_join_solict(dev, &ifp->addr); - - net_srandom(ifp->addr.s6_addr32[3]); - - read_lock_bh(&idev->lock); - if (ifp->dead) - goto out; - spin_lock_bh(&ifp->lock); - - if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - !(ifp->flags&IFA_F_TENTATIVE) || - ifp->flags & IFA_F_NODAD) { - ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); - spin_unlock_bh(&ifp->lock); - read_unlock_bh(&idev->lock); - - addrconf_dad_completed(ifp); - return; - } - - if (!(idev->if_flags & IF_READY)) { - spin_unlock_bh(&ifp->lock); - read_unlock_bh(&idev->lock); - /* - * If the defice is not ready: - * - keep it tentative if it is a permanent address. - * - otherwise, kill it. - */ - in6_ifa_hold(ifp); - addrconf_dad_stop(ifp); - return; - } - - /* - * Optimistic nodes can start receiving - * Frames right away - */ - if(ifp->flags & IFA_F_OPTIMISTIC) - ip6_ins_rt(ifp->rt); - - addrconf_dad_kick(ifp); - spin_unlock_bh(&ifp->lock); -out: - read_unlock_bh(&idev->lock); -} - -static void addrconf_dad_timer(unsigned long data) -{ - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; - struct inet6_dev *idev = ifp->idev; - struct in6_addr unspec; - struct in6_addr mcaddr; - - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); - goto out; - } - spin_lock_bh(&ifp->lock); - if (ifp->probes == 0) { - /* - * DAD was successful - */ - - ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); - spin_unlock_bh(&ifp->lock); - read_unlock_bh(&idev->lock); - - addrconf_dad_completed(ifp); - - goto out; - } - - ifp->probes--; - addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); - spin_unlock_bh(&ifp->lock); - read_unlock_bh(&idev->lock); - - /* send a neighbour solicitation for our addr */ - memset(&unspec, 0, sizeof(unspec)); - addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); -out: - in6_ifa_put(ifp); -} - -static void addrconf_dad_completed(struct inet6_ifaddr *ifp) -{ - struct net_device * dev = ifp->idev->dev; - - /* - * Configure the address for reception. Now it is valid. - */ - - ipv6_ifa_notify(RTM_NEWADDR, ifp); - - /* If added prefix is link local and forwarding is off, - start sending router solicitations. - */ - - if (ifp->idev->cnf.forwarding == 0 && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { - struct in6_addr all_routers; - - ipv6_addr_all_routers(&all_routers); - - /* - * If a host as already performed a random delay - * [...] as part of DAD [...] there is no need - * to delay again before sending the first RS - */ - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); - - spin_lock_bh(&ifp->lock); - ifp->probes = 1; - ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); - spin_unlock_bh(&ifp->lock); - } -} - -static void addrconf_dad_run(struct inet6_dev *idev) { - struct inet6_ifaddr *ifp; - - read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { - spin_lock_bh(&ifp->lock); - if (!(ifp->flags & IFA_F_TENTATIVE)) { - spin_unlock_bh(&ifp->lock); - continue; - } - spin_unlock_bh(&ifp->lock); - addrconf_dad_kick(ifp); - } - read_unlock_bh(&idev->lock); -} - -#ifdef CONFIG_PROC_FS -struct if6_iter_state { - int bucket; -}; - -static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) -{ - struct inet6_ifaddr *ifa = NULL; - struct if6_iter_state *state = seq->private; - - for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - ifa = inet6_addr_lst[state->bucket]; - if (ifa) - break; - } - return ifa; -} - -static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) -{ - struct if6_iter_state *state = seq->private; - - ifa = ifa->lst_next; -try_again: - if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { - ifa = inet6_addr_lst[state->bucket]; - goto try_again; - } - return ifa; -} - -static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) -{ - struct inet6_ifaddr *ifa = if6_get_first(seq); - - if (ifa) - while(pos && (ifa = if6_get_next(seq, ifa)) != NULL) - --pos; - return pos ? NULL : ifa; -} - -static void *if6_seq_start(struct seq_file *seq, loff_t *pos) -{ - read_lock_bh(&addrconf_hash_lock); - return if6_get_idx(seq, *pos); -} - -static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct inet6_ifaddr *ifa; - - ifa = if6_get_next(seq, v); - ++*pos; - return ifa; -} - -static void if6_seq_stop(struct seq_file *seq, void *v) -{ - read_unlock_bh(&addrconf_hash_lock); -} - -static int if6_seq_show(struct seq_file *seq, void *v) -{ - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, - NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", - NIP6(ifp->addr), - ifp->idev->dev->ifindex, - ifp->prefix_len, - ifp->scope, - ifp->flags, - ifp->idev->dev->name); - return 0; -} - -static struct seq_operations if6_seq_ops = { - .start = if6_seq_start, - .next = if6_seq_next, - .show = if6_seq_show, - .stop = if6_seq_stop, -}; - -static int if6_seq_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int rc = -ENOMEM; - struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); - - if (!s) - goto out; - - rc = seq_open(file, &if6_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; - seq->private = s; -out: - return rc; -out_kfree: - kfree(s); - goto out; -} - -static const struct file_operations if6_fops = { - .owner = THIS_MODULE, - .open = if6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -int __init if6_proc_init(void) -{ - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) - return -ENOMEM; - return 0; -} - -void if6_proc_exit(void) -{ - proc_net_remove("if_inet6"); -} -#endif /* CONFIG_PROC_FS */ - -#ifdef CONFIG_IPV6_MIP6 -/* Check if address is a home address configured on any interface. */ -int ipv6_chk_home_addr(struct in6_addr *addr) -{ - int ret = 0; - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { - if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && - (ifp->flags & IFA_F_HOMEADDRESS)) { - ret = 1; - break; - } - } - read_unlock_bh(&addrconf_hash_lock); - return ret; -} -#endif - -/* - * Periodic address status verification - */ - -static void addrconf_verify(unsigned long foo) -{ - struct inet6_ifaddr *ifp; - unsigned long now, next; - int i; - - spin_lock_bh(&addrconf_verify_lock); - now = jiffies; - next = now + ADDR_CHECK_FREQUENCY; - - del_timer(&addr_chk_timer); - - for (i=0; i < IN6_ADDR_HSIZE; i++) { - -restart: - read_lock(&addrconf_hash_lock); - for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { - unsigned long age; -#ifdef CONFIG_IPV6_PRIVACY - unsigned long regen_advance; -#endif - - if (ifp->flags & IFA_F_PERMANENT) - continue; - - spin_lock(&ifp->lock); - age = (now - ifp->tstamp) / HZ; - -#ifdef CONFIG_IPV6_PRIVACY - regen_advance = ifp->idev->cnf.regen_max_retry * - ifp->idev->cnf.dad_transmits * - ifp->idev->nd_parms->retrans_time / HZ; -#endif - - if (ifp->valid_lft != INFINITY_LIFE_TIME && - age >= ifp->valid_lft) { - spin_unlock(&ifp->lock); - in6_ifa_hold(ifp); - read_unlock(&addrconf_hash_lock); - ipv6_del_addr(ifp); - goto restart; - } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { - spin_unlock(&ifp->lock); - continue; - } else if (age >= ifp->prefered_lft) { - /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ - int deprecate = 0; - - if (!(ifp->flags&IFA_F_DEPRECATED)) { - deprecate = 1; - ifp->flags |= IFA_F_DEPRECATED; - } - - if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) - next = ifp->tstamp + ifp->valid_lft * HZ; - - spin_unlock(&ifp->lock); - - if (deprecate) { - in6_ifa_hold(ifp); - read_unlock(&addrconf_hash_lock); - - ipv6_ifa_notify(0, ifp); - in6_ifa_put(ifp); - goto restart; - } -#ifdef CONFIG_IPV6_PRIVACY - } else if ((ifp->flags&IFA_F_TEMPORARY) && - !(ifp->flags&IFA_F_TENTATIVE)) { - if (age >= ifp->prefered_lft - regen_advance) { - struct inet6_ifaddr *ifpub = ifp->ifpub; - if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) - next = ifp->tstamp + ifp->prefered_lft * HZ; - if (!ifp->regen_count && ifpub) { - ifp->regen_count++; - in6_ifa_hold(ifp); - in6_ifa_hold(ifpub); - spin_unlock(&ifp->lock); - read_unlock(&addrconf_hash_lock); - spin_lock(&ifpub->lock); - ifpub->regen_count = 0; - spin_unlock(&ifpub->lock); - ipv6_create_tempaddr(ifpub, ifp); - in6_ifa_put(ifpub); - in6_ifa_put(ifp); - goto restart; - } - } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) - next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; - spin_unlock(&ifp->lock); -#endif - } else { - /* ifp->prefered_lft <= ifp->valid_lft */ - if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) - next = ifp->tstamp + ifp->prefered_lft * HZ; - spin_unlock(&ifp->lock); - } - } - read_unlock(&addrconf_hash_lock); - } - - addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; - add_timer(&addr_chk_timer); - spin_unlock_bh(&addrconf_verify_lock); -} - -static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) -{ - struct in6_addr *pfx = NULL; - - if (addr) - pfx = nla_data(addr); - - if (local) { - if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) - pfx = NULL; - else - pfx = nla_data(local); - } - - return pfx; -} - -static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { - [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, - [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, - [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, -}; - -static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - return err; - - ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); - if (pfx == NULL) - return -EINVAL; - - return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); -} - -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, - u32 prefered_lft, u32 valid_lft) -{ - u32 flags = RTF_EXPIRES; - - if (!valid_lft || (prefered_lft > valid_lft)) - return -EINVAL; - - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; - - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; - - spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; - ifp->tstamp = jiffies; - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - - spin_unlock_bh(&ifp->lock); - if (!(ifp->flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify(0, ifp); - - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - jiffies_to_clock_t(valid_lft * HZ), flags); - addrconf_verify(0); - - return 0; -} - -static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; - struct inet6_ifaddr *ifa; - struct net_device *dev; - u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; - u8 ifa_flags; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - return err; - - ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); - if (pfx == NULL) - return -EINVAL; - - if (tb[IFA_CACHEINFO]) { - struct ifa_cacheinfo *ci; - - ci = nla_data(tb[IFA_CACHEINFO]); - valid_lft = ci->ifa_valid; - preferred_lft = ci->ifa_prefered; - } else { - preferred_lft = INFINITY_LIFE_TIME; - valid_lft = INFINITY_LIFE_TIME; - } - - dev = __dev_get_by_index(ifm->ifa_index); - if (dev == NULL) - return -ENODEV; - - /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); - - ifa = ipv6_get_ifaddr(pfx, dev, 1); - if (ifa == NULL) { - /* - * It would be best to check for !NLM_F_CREATE here but - * userspace alreay relies on not having to provide this. - */ - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - ifa_flags, preferred_lft, valid_lft); - } - - if (nlh->nlmsg_flags & NLM_F_EXCL || - !(nlh->nlmsg_flags & NLM_F_REPLACE)) - err = -EEXIST; - else - err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); - - in6_ifa_put(ifa); - - return err; -} - -static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, - u8 scope, int ifindex) -{ - struct ifaddrmsg *ifm; - - ifm = nlmsg_data(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = prefixlen; - ifm->ifa_flags = flags; - ifm->ifa_scope = scope; - ifm->ifa_index = ifindex; -} - -static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, - unsigned long tstamp, u32 preferred, u32 valid) -{ - struct ifa_cacheinfo ci; - - ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.ifa_prefered = preferred; - ci.ifa_valid = valid; - - return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); -} - -static inline int rt_scope(int ifa_scope) -{ - if (ifa_scope & IFA_HOST) - return RT_SCOPE_HOST; - else if (ifa_scope & IFA_LINK) - return RT_SCOPE_LINK; - else if (ifa_scope & IFA_SITE) - return RT_SCOPE_SITE; - else - return RT_SCOPE_UNIVERSE; -} - -static inline int inet6_ifaddr_msgsize(void) -{ - return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) - + nla_total_size(16) /* IFA_ADDRESS */ - + nla_total_size(sizeof(struct ifa_cacheinfo)); -} - -static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) -{ - struct nlmsghdr *nlh; - u32 preferred, valid; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) - return -EMSGSIZE; - - put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), - ifa->idev->dev->ifindex); - - if (!(ifa->flags&IFA_F_PERMANENT)) { - preferred = ifa->prefered_lft; - valid = ifa->valid_lft; - if (preferred != INFINITY_LIFE_TIME) { - long tval = (jiffies - ifa->tstamp)/HZ; - preferred -= tval; - if (valid != INFINITY_LIFE_TIME) - valid -= tval; - } - } else { - preferred = INFINITY_LIFE_TIME; - valid = INFINITY_LIFE_TIME; - } - - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - return nlmsg_end(skb, nlh); -} - -static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event, u16 flags) -{ - struct nlmsghdr *nlh; - u8 scope = RT_SCOPE_UNIVERSE; - int ifindex = ifmca->idev->dev->ifindex; - - if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) - scope = RT_SCOPE_SITE; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) - return -EMSGSIZE; - - put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || - put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - return nlmsg_end(skb, nlh); -} - -static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event, unsigned int flags) -{ - struct nlmsghdr *nlh; - u8 scope = RT_SCOPE_UNIVERSE; - int ifindex = ifaca->aca_idev->dev->ifindex; - - if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) - scope = RT_SCOPE_SITE; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) - return -EMSGSIZE; - - put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || - put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } - - return nlmsg_end(skb, nlh); -} - -enum addr_type_t -{ - UNICAST_ADDR, - MULTICAST_ADDR, - ANYCAST_ADDR, -}; - -static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, - enum addr_type_t type) -{ - int idx, ip_idx; - int s_idx, s_ip_idx; - int err = 1; - struct net_device *dev; - struct inet6_dev *idev = NULL; - struct inet6_ifaddr *ifa; - struct ifmcaddr6 *ifmca; - struct ifacaddr6 *ifaca; - - s_idx = cb->args[0]; - s_ip_idx = ip_idx = cb->args[1]; - - idx = 0; - for_each_netdev(dev) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - s_ip_idx = 0; - ip_idx = 0; - if ((idev = in6_dev_get(dev)) == NULL) - goto cont; - read_lock_bh(&idev->lock); - switch (type) { - case UNICAST_ADDR: - /* unicast address incl. temp addr */ - for (ifa = idev->addr_list; ifa; - ifa = ifa->if_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if ((err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR, - NLM_F_MULTI)) <= 0) - goto done; - } - break; - case MULTICAST_ADDR: - /* multicast address */ - for (ifmca = idev->mc_list; ifmca; - ifmca = ifmca->next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if ((err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETMULTICAST, - NLM_F_MULTI)) <= 0) - goto done; - } - break; - case ANYCAST_ADDR: - /* anycast address */ - for (ifaca = idev->ac_list; ifaca; - ifaca = ifaca->aca_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if ((err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETANYCAST, - NLM_F_MULTI)) <= 0) - goto done; - } - break; - default: - break; - } - read_unlock_bh(&idev->lock); - in6_dev_put(idev); -cont: - idx++; - } -done: - if (err <= 0) { - read_unlock_bh(&idev->lock); - in6_dev_put(idev); - } - cb->args[0] = idx; - cb->args[1] = ip_idx; - return skb->len; -} - -static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) -{ - enum addr_type_t type = UNICAST_ADDR; - return inet6_dump_addr(skb, cb, type); -} - -static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) -{ - enum addr_type_t type = MULTICAST_ADDR; - return inet6_dump_addr(skb, cb, type); -} - - -static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) -{ - enum addr_type_t type = ANYCAST_ADDR; - return inet6_dump_addr(skb, cb, type); -} - -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) -{ - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *addr = NULL; - struct net_device *dev = NULL; - struct inet6_ifaddr *ifa; - struct sk_buff *skb; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - goto errout; - - addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); - if (addr == NULL) { - err = -EINVAL; - goto errout; - } - - ifm = nlmsg_data(nlh); - if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); - - if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { - err = -EADDRNOTAVAIL; - goto errout; - } - - if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { - err = -ENOBUFS; - goto errout_ifa; - } - - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, RTM_NEWADDR, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout_ifa; - } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -errout_ifa: - in6_ifa_put(ifa); -errout: - return err; -} - -static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) -{ - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); -} - -static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, - __s32 *array, int bytes) -{ - BUG_ON(bytes < (DEVCONF_MAX * 4)); - - memset(array, 0, bytes); - array[DEVCONF_FORWARDING] = cnf->forwarding; - array[DEVCONF_HOPLIMIT] = cnf->hop_limit; - array[DEVCONF_MTU6] = cnf->mtu6; - array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; - array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; - array[DEVCONF_AUTOCONF] = cnf->autoconf; - array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; - array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; - array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; - array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; - array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; -#ifdef CONFIG_IPV6_PRIVACY - array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; - array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; - array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; - array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; - array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; -#endif - array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; - array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; - array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; -#ifdef CONFIG_IPV6_ROUTER_PREF - array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; - array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; -#ifdef CONFIG_IPV6_ROUTE_INFO - array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; -#endif -#endif - array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; - array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; -#ifdef CONFIG_IPV6_OPTIMISTIC_DAD - array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; -#endif -} - -static inline size_t inet6_if_nlmsg_size(void) -{ - return NLMSG_ALIGN(sizeof(struct ifinfomsg)) - + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ - + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ - + nla_total_size(4) /* IFLA_MTU */ - + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size( /* IFLA_PROTINFO */ - nla_total_size(4) /* IFLA_INET6_FLAGS */ - + nla_total_size(sizeof(struct ifla_cacheinfo)) - + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ - + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ - ); -} - -static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items, - int bytes) -{ - int i; - int pad = bytes - sizeof(u64) * items; - BUG_ON(pad < 0); - - /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) - put_unaligned(snmp_fold_field(mib, i), &stats[i]); - - memset(&stats[items], 0, pad); -} - -static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, - int bytes) -{ - switch(attrtype) { - case IFLA_INET6_STATS: - __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); - break; - case IFLA_INET6_ICMP6STATS: - __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); - break; - } -} - -static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event, unsigned int flags) -{ - struct net_device *dev = idev->dev; - struct nlattr *nla; - struct ifinfomsg *hdr; - struct nlmsghdr *nlh; - void *protoinfo; - struct ifla_cacheinfo ci; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); - if (nlh == NULL) - return -EMSGSIZE; - - hdr = nlmsg_data(nlh); - hdr->ifi_family = AF_INET6; - hdr->__ifi_pad = 0; - hdr->ifi_type = dev->type; - hdr->ifi_index = dev->ifindex; - hdr->ifi_flags = dev_get_flags(dev); - hdr->ifi_change = 0; - - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - - if (dev->addr_len) - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - - protoinfo = nla_nest_start(skb, IFLA_PROTINFO); - if (protoinfo == NULL) - goto nla_put_failure; - - NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); - - ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.reachable_time = idev->nd_parms->reachable_time; - ci.retrans_time = idev->nd_parms->retrans_time; - NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); - - nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) - goto nla_put_failure; - ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); - - /* XXX - MC not implemented */ - - nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); - - nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); - - nla_nest_end(skb, protoinfo); - return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) -{ - int idx, err; - int s_idx = cb->args[0]; - struct net_device *dev; - struct inet6_dev *idev; - - read_lock(&dev_base_lock); - idx = 0; - for_each_netdev(dev) { - if (idx < s_idx) - goto cont; - if ((idev = in6_dev_get(dev)) == NULL) - goto cont; - err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - in6_dev_put(idev); - if (err <= 0) - break; -cont: - idx++; - } - read_unlock(&dev_base_lock); - cb->args[0] = idx; - - return skb->len; -} - -void inet6_ifinfo_notify(int event, struct inet6_dev *idev) -{ - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); -} - -static inline size_t inet6_prefix_nlmsg_size(void) -{ - return NLMSG_ALIGN(sizeof(struct prefixmsg)) - + nla_total_size(sizeof(struct in6_addr)) - + nla_total_size(sizeof(struct prefix_cacheinfo)); -} - -static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, - int event, unsigned int flags) -{ - struct prefixmsg *pmsg; - struct nlmsghdr *nlh; - struct prefix_cacheinfo ci; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); - if (nlh == NULL) - return -EMSGSIZE; - - pmsg = nlmsg_data(nlh); - pmsg->prefix_family = AF_INET6; - pmsg->prefix_pad1 = 0; - pmsg->prefix_pad2 = 0; - pmsg->prefix_ifindex = idev->dev->ifindex; - pmsg->prefix_len = pinfo->prefix_len; - pmsg->prefix_type = pinfo->type; - pmsg->prefix_pad3 = 0; - pmsg->prefix_flags = 0; - if (pinfo->onlink) - pmsg->prefix_flags |= IF_PREFIX_ONLINK; - if (pinfo->autoconf) - pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; - - NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); - - ci.preferred_time = ntohl(pinfo->prefered); - ci.valid_time = ntohl(pinfo->valid); - NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); - - return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static void inet6_prefix_notify(int event, struct inet6_dev *idev, - struct prefix_info *pinfo) -{ - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); -} - -static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) -{ - inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); - - switch (event) { - case RTM_NEWADDR: - /* - * If the address was optimistic - * we inserted the route at the start of - * our DAD process, so we don't need - * to do it again - */ - if (!(ifp->rt->rt6i_node)) - ip6_ins_rt(ifp->rt); - if (ifp->idev->cnf.forwarding) - addrconf_join_anycast(ifp); - break; - case RTM_DELADDR: - if (ifp->idev->cnf.forwarding) - addrconf_leave_anycast(ifp); - addrconf_leave_solict(ifp->idev, &ifp->addr); - dst_hold(&ifp->rt->u.dst); - if (ip6_del_rt(ifp->rt)) - dst_free(&ifp->rt->u.dst); - break; - } -} - -static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) -{ - rcu_read_lock_bh(); - if (likely(ifp->idev->dead == 0)) - __ipv6_ifa_notify(event, ifp); - rcu_read_unlock_bh(); -} - -#ifdef CONFIG_SYSCTL - -static -int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int *valp = ctl->data; - int val = *valp; - int ret; - - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - - if (write && valp != &ipv6_devconf_dflt.forwarding) { - if (valp != &ipv6_devconf.forwarding) { - if ((!*valp) ^ (!val)) { - struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; - if (idev == NULL) - return ret; - dev_forward_change(idev); - } - } else { - ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; - addrconf_forward_change(); - } - if (*valp) - rt6_purge_dflt_routers(); - } - - return ret; -} - -static int addrconf_sysctl_forward_strategy(ctl_table *table, - int __user *name, int nlen, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int *valp = table->data; - int new; - - if (!newval || !newlen) - return 0; - if (newlen != sizeof(int)) - return -EINVAL; - if (get_user(new, (int __user *)newval)) - return -EFAULT; - if (new == *valp) - return 0; - if (oldval && oldlenp) { - size_t len; - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - if (valp != &ipv6_devconf_dflt.forwarding) { - if (valp != &ipv6_devconf.forwarding) { - struct inet6_dev *idev = (struct inet6_dev *)table->extra1; - int changed; - if (unlikely(idev == NULL)) - return -ENODEV; - changed = (!*valp) ^ (!new); - *valp = new; - if (changed) - dev_forward_change(idev); - } else { - *valp = new; - addrconf_forward_change(); - } - - if (*valp) - rt6_purge_dflt_routers(); - } else - *valp = new; - - return 1; -} - -static struct addrconf_sysctl_table -{ - struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[__NET_IPV6_MAX]; - ctl_table addrconf_dev[2]; - ctl_table addrconf_conf_dir[2]; - ctl_table addrconf_proto_dir[2]; - ctl_table addrconf_root_dir[2]; -} addrconf_sysctl __read_mostly = { - .sysctl_header = NULL, - .addrconf_vars = { - { - .ctl_name = NET_IPV6_FORWARDING, - .procname = "forwarding", - .data = &ipv6_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &addrconf_sysctl_forward, - .strategy = &addrconf_sysctl_forward_strategy, - }, - { - .ctl_name = NET_IPV6_HOP_LIMIT, - .procname = "hop_limit", - .data = &ipv6_devconf.hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .ctl_name = NET_IPV6_MTU, - .procname = "mtu", - .data = &ipv6_devconf.mtu6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_ACCEPT_RA, - .procname = "accept_ra", - .data = &ipv6_devconf.accept_ra, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, - .procname = "accept_redirects", - .data = &ipv6_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_AUTOCONF, - .procname = "autoconf", - .data = &ipv6_devconf.autoconf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_DAD_TRANSMITS, - .procname = "dad_transmits", - .data = &ipv6_devconf.dad_transmits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_RTR_SOLICITS, - .procname = "router_solicitations", - .data = &ipv6_devconf.rtr_solicits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, - .procname = "router_solicitation_interval", - .data = &ipv6_devconf.rtr_solicit_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, - .procname = "router_solicitation_delay", - .data = &ipv6_devconf.rtr_solicit_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_IPV6_FORCE_MLD_VERSION, - .procname = "force_mld_version", - .data = &ipv6_devconf.force_mld_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#ifdef CONFIG_IPV6_PRIVACY - { - .ctl_name = NET_IPV6_USE_TEMPADDR, - .procname = "use_tempaddr", - .data = &ipv6_devconf.use_tempaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_TEMP_VALID_LFT, - .procname = "temp_valid_lft", - .data = &ipv6_devconf.temp_valid_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, - .procname = "temp_prefered_lft", - .data = &ipv6_devconf.temp_prefered_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_REGEN_MAX_RETRY, - .procname = "regen_max_retry", - .data = &ipv6_devconf.regen_max_retry, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, - .procname = "max_desync_factor", - .data = &ipv6_devconf.max_desync_factor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif - { - .ctl_name = NET_IPV6_MAX_ADDRESSES, - .procname = "max_addresses", - .data = &ipv6_devconf.max_addresses, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, - .procname = "accept_ra_defrtr", - .data = &ipv6_devconf.accept_ra_defrtr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, - .procname = "accept_ra_pinfo", - .data = &ipv6_devconf.accept_ra_pinfo, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#ifdef CONFIG_IPV6_ROUTER_PREF - { - .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF, - .procname = "accept_ra_rtr_pref", - .data = &ipv6_devconf.accept_ra_rtr_pref, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, - .procname = "router_probe_interval", - .data = &ipv6_devconf.rtr_probe_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, -#ifdef CONFIG_IPV6_ROUTE_INFO - { - .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, - .procname = "accept_ra_rt_info_max_plen", - .data = &ipv6_devconf.accept_ra_rt_info_max_plen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif -#endif - { - .ctl_name = NET_IPV6_PROXY_NDP, - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, - .procname = "accept_source_route", - .data = &ipv6_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#ifdef CONFIG_IPV6_OPTIMISTIC_DAD - { - .ctl_name = CTL_UNNUMBERED, - .procname = "optimistic_dad", - .data = &ipv6_devconf.optimistic_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - - }, -#endif - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_dev = { - { - .ctl_name = NET_PROTO_CONF_ALL, - .procname = "all", - .mode = 0555, - .child = addrconf_sysctl.addrconf_vars, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_conf_dir = { - { - .ctl_name = NET_IPV6_CONF, - .procname = "conf", - .mode = 0555, - .child = addrconf_sysctl.addrconf_dev, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_proto_dir = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = addrconf_sysctl.addrconf_conf_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_root_dir = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = addrconf_sysctl.addrconf_proto_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, -}; - -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) -{ - int i; - struct net_device *dev = idev ? idev->dev : NULL; - struct addrconf_sysctl_table *t; - char *dev_name = NULL; - - t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (t == NULL) - return; - for (i=0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; - t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ - } - if (dev) { - dev_name = dev->name; - t->addrconf_dev[0].ctl_name = dev->ifindex; - } else { - dev_name = "default"; - t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; - } - - /* - * Make a copy of dev_name, because '.procname' is regarded as const - * by sysctl and we wouldn't want anyone to change it under our feet - * (see SIOCSIFNAME). - */ - dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!dev_name) - goto free; - - t->addrconf_dev[0].procname = dev_name; - - t->addrconf_dev[0].child = t->addrconf_vars; - t->addrconf_conf_dir[0].child = t->addrconf_dev; - t->addrconf_proto_dir[0].child = t->addrconf_conf_dir; - t->addrconf_root_dir[0].child = t->addrconf_proto_dir; - - t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); - if (t->sysctl_header == NULL) - goto free_procname; - else - p->sysctl = t; - return; - - /* error path */ - free_procname: - kfree(dev_name); - free: - kfree(t); - - return; -} - -static void addrconf_sysctl_unregister(struct ipv6_devconf *p) -{ - if (p->sysctl) { - struct addrconf_sysctl_table *t = p->sysctl; - p->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); - kfree(t->addrconf_dev[0].procname); - kfree(t); - } -} - - -#endif - -/* - * Device notifier - */ - -int register_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&inet6addr_chain, nb); -} - -EXPORT_SYMBOL(register_inet6addr_notifier); - -int unregister_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&inet6addr_chain,nb); -} - -EXPORT_SYMBOL(unregister_inet6addr_notifier); - -/* - * Init / cleanup code - */ - -int __init addrconf_init(void) -{ - int err = 0; - - /* The addrconf netdev notifier requires that loopback_dev - * has it's ipv6 private information allocated and setup - * before it can bring up and give link-local addresses - * to other devices which are up. - * - * Unfortunately, loopback_dev is not necessarily the first - * entry in the global dev_base list of net devices. In fact, - * it is likely to be the very last entry on that list. - * So this causes the notifier registry below to try and - * give link-local addresses to all devices besides loopback_dev - * first, then loopback_dev, which cases all the non-loopback_dev - * devices to fail to get a link-local address. - * - * So, as a temporary fix, allocate the ipv6 structure for - * loopback_dev first by hand. - * Longer term, all of the dependencies ipv6 has upon the loopback - * device and it being up should be removed. - */ - rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) - err = -ENOMEM; - rtnl_unlock(); - if (err) - return err; - - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); -#endif - - register_netdevice_notifier(&ipv6_dev_notf); - - addrconf_verify(0); - - err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); - if (err < 0) - goto errout; - - /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL); - __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr); - __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); - __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); - -#ifdef CONFIG_SYSCTL - addrconf_sysctl.sysctl_header = - register_sysctl_table(addrconf_sysctl.addrconf_root_dir); - addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); -#endif - - return 0; -errout: - unregister_netdevice_notifier(&ipv6_dev_notf); - - return err; -} - -void __exit addrconf_cleanup(void) -{ - struct net_device *dev; - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - int i; - - unregister_netdevice_notifier(&ipv6_dev_notf); - -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&ipv6_devconf_dflt); - addrconf_sysctl_unregister(&ipv6_devconf); -#endif - - rtnl_lock(); - - /* - * clean dev list. - */ - - for_each_netdev(dev) { - if ((idev = __in6_dev_get(dev)) == NULL) - continue; - addrconf_ifdown(dev, 1); - } - addrconf_ifdown(&loopback_dev, 2); - - /* - * Check hash table. - */ - - write_lock_bh(&addrconf_hash_lock); - for (i=0; i < IN6_ADDR_HSIZE; i++) { - for (ifa=inet6_addr_lst[i]; ifa; ) { - struct inet6_ifaddr *bifa; - - bifa = ifa; - ifa = ifa->lst_next; - printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa); - /* Do not free it; something is wrong. - Now we can investigate it with debugger. - */ - } - } - write_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); - - rtnl_unlock(); - -#ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); -#endif -} diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-591/net/ipv6/af_inet6.c --- linux-2.6.22-570/net/ipv6/af_inet6.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/af_inet6.c 2007-12-21 15:36:15.000000000 -0500 @@ -59,9 +59,6 @@ #ifdef CONFIG_IPV6_TUNNEL #include #endif -#ifdef CONFIG_IPV6_MIP6 -#include -#endif #include #include @@ -85,7 +82,7 @@ return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -98,6 +95,9 @@ int try_loading_module = 0; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -166,7 +166,7 @@ BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -209,7 +209,7 @@ inet->mc_index = 0; inet->mc_list = NULL; - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -290,7 +290,7 @@ /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(v4addr) != RTN_LOCAL) { + if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; goto out; } @@ -316,7 +316,7 @@ err = -EINVAL; goto out; } - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -675,6 +675,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -876,9 +877,6 @@ ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); -#ifdef CONFIG_IPV6_MIP6 - mip6_init(); -#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -944,9 +942,7 @@ /* Cleanup code parts. */ ipv6_packet_cleanup(); -#ifdef CONFIG_IPV6_MIP6 - mip6_fini(); -#endif + addrconf_cleanup(); ip6_flowlabel_cleanup(); ip6_route_cleanup(); diff -Nurb linux-2.6.22-570/net/ipv6/ah6.c linux-2.6.22-591/net/ipv6/ah6.c --- linux-2.6.22-570/net/ipv6/ah6.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/ah6.c 2007-12-21 15:36:12.000000000 -0500 @@ -74,7 +74,7 @@ return 0; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /** * ipv6_rearrange_destopt - rearrange IPv6 destination options header * @iph: IPv6 header @@ -132,6 +132,8 @@ bad: return; } +#else +static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {} #endif /** @@ -189,10 +191,8 @@ while (exthdr.raw < end) { switch (nexthdr) { case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); -#endif case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( @@ -228,7 +228,7 @@ u8 nexthdr; char tmp_base[8]; struct { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) struct in6_addr saddr; #endif struct in6_addr daddr; @@ -255,7 +255,7 @@ err = -ENOMEM; goto error; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) memcpy(tmp_ext, &top_iph->saddr, extlen); #else memcpy(tmp_ext, &top_iph->daddr, extlen); @@ -294,7 +294,7 @@ memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) memcpy(&top_iph->saddr, tmp_ext, extlen); #else memcpy(&top_iph->daddr, tmp_ext, extlen); @@ -554,3 +554,4 @@ module_exit(ah6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH); diff -Nurb linux-2.6.22-570/net/ipv6/anycast.c linux-2.6.22-591/net/ipv6/anycast.c --- linux-2.6.22-570/net/ipv6/anycast.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/anycast.c 2007-12-21 15:36:15.000000000 -0500 @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -112,10 +113,10 @@ } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -196,7 +197,7 @@ write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); dev_put(dev); @@ -224,7 +225,7 @@ if (pac->acl_ifindex != prev_index) { if (dev) dev_put(dev); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -429,7 +430,7 @@ if (dev) return ipv6_chk_acast_dev(dev, addr); read_lock(&dev_base_lock); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; @@ -453,7 +454,7 @@ struct ac6_iter_state *state = ac6_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -579,7 +580,7 @@ int __init ac6_proc_init(void) { - if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -587,7 +588,7 @@ void ac6_proc_exit(void) { - proc_net_remove("anycast6"); + proc_net_remove(&init_net, "anycast6"); } #endif diff -Nurb linux-2.6.22-570/net/ipv6/datagram.c linux-2.6.22-591/net/ipv6/datagram.c --- linux-2.6.22-570/net/ipv6/datagram.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/datagram.c 2007-12-21 15:36:15.000000000 -0500 @@ -60,6 +60,7 @@ return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { @@ -544,7 +545,7 @@ if (!src_info->ipi6_ifindex) return -EINVAL; else { - dev = dev_get_by_index(src_info->ipi6_ifindex); + dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); if (!dev) return -ENODEV; } @@ -658,7 +659,7 @@ switch (rthdr->type) { case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: #endif break; diff -Nurb linux-2.6.22-570/net/ipv6/esp6.c linux-2.6.22-591/net/ipv6/esp6.c --- linux-2.6.22-570/net/ipv6/esp6.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/esp6.c 2007-12-21 15:36:12.000000000 -0500 @@ -421,3 +421,4 @@ module_exit(esp6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP); diff -Nurb linux-2.6.22-570/net/ipv6/exthdrs.c linux-2.6.22-591/net/ipv6/exthdrs.c --- linux-2.6.22-570/net/ipv6/exthdrs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/exthdrs.c 2007-12-21 15:36:12.000000000 -0500 @@ -42,7 +42,7 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -90,6 +90,7 @@ bad: return -1; } +EXPORT_SYMBOL_GPL(ipv6_find_tlv); /* * Parsing tlv encoded headers. @@ -196,7 +197,7 @@ Destination options header. *****************************/ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) { struct sk_buff *skb = *skbp; @@ -270,7 +271,7 @@ #endif static struct tlvtype_proc tlvprocdestopt_lst[] = { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, @@ -283,7 +284,7 @@ { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif struct dst_entry *dst; @@ -298,7 +299,7 @@ } opt->lastopt = opt->dst1 = skb_network_header_len(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) dstbuf = opt->dst1; #endif @@ -308,7 +309,7 @@ skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; @@ -427,7 +428,7 @@ looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own @@ -463,7 +464,7 @@ return -1; } break; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { @@ -520,7 +521,7 @@ addr += i - 1; switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, diff -Nurb linux-2.6.22-570/net/ipv6/fib6_rules.c linux-2.6.22-591/net/ipv6/fib6_rules.c --- linux-2.6.22-570/net/ipv6/fib6_rules.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/fib6_rules.c 2007-12-21 15:36:15.000000000 -0500 @@ -244,7 +244,7 @@ return -ENOBUFS; } -static u32 fib6_rule_default_pref(void) +static u32 fib6_rule_default_pref(struct fib_rules_ops *ops) { return 0x3FFF; } @@ -277,10 +277,10 @@ list_add_tail(&local_rule.common.list, &fib6_rules); list_add_tail(&main_rule.common.list, &fib6_rules); - fib_rules_register(&fib6_rules_ops); + fib_rules_register(&init_net, &fib6_rules_ops); } void fib6_rules_cleanup(void) { - fib_rules_unregister(&fib6_rules_ops); + fib_rules_unregister(&init_net, &fib6_rules_ops); } diff -Nurb linux-2.6.22-570/net/ipv6/icmp.c linux-2.6.22-591/net/ipv6/icmp.c --- linux-2.6.22-570/net/ipv6/icmp.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/icmp.c 2007-12-21 15:36:15.000000000 -0500 @@ -272,7 +272,7 @@ return 0; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) static void mip6_addr_swap(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); @@ -377,6 +377,7 @@ mip6_addr_swap(skb); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); if (saddr) @@ -495,6 +496,7 @@ tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); if (saddr) diff -Nurb linux-2.6.22-570/net/ipv6/inet6_connection_sock.c linux-2.6.22-591/net/ipv6/inet6_connection_sock.c --- linux-2.6.22-570/net/ipv6/inet6_connection_sock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/inet6_connection_sock.c 2007-12-21 15:36:15.000000000 -0500 @@ -149,6 +149,7 @@ struct in6_addr *final_p = NULL, final; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); diff -Nurb linux-2.6.22-570/net/ipv6/inet6_hashtables.c linux-2.6.22-591/net/ipv6/inet6_hashtables.c --- linux-2.6.22-570/net/ipv6/inet6_hashtables.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/inet6_hashtables.c 2007-12-21 15:36:15.000000000 -0500 @@ -61,7 +61,7 @@ const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, struct net *net) { struct sock *sk; const struct hlist_node *node; @@ -105,7 +105,7 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, struct net *net) { struct sock *sk; const struct hlist_node *node; @@ -113,7 +113,7 @@ int score, hiscore = 0; read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -152,12 +152,12 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { struct sock *sk; local_bh_disable(); - sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, net); local_bh_enable(); return sk; @@ -251,6 +251,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + struct net *net = sk->sk_net; struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->num; struct inet_bind_hashbucket *head; @@ -258,8 +259,8 @@ int ret; if (snum == 0) { - const int low = sysctl_local_port_range[0]; - const int high = sysctl_local_port_range[1]; + const int low = sk->sk_net->sysctl_local_port_range[0]; + const int high = sk->sk_net->sysctl_local_port_range[1]; const int range = high - low; int i, port; static u32 hint; @@ -270,7 +271,7 @@ local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -278,7 +279,7 @@ * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { + if ((tb->port == port) && (tb->net == net)) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -291,7 +292,7 @@ } tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - head, port); + head, net, port); if (!tb) { spin_unlock(&head->lock); break; @@ -326,7 +327,7 @@ goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); diff -Nurb linux-2.6.22-570/net/ipv6/ip6_fib.c linux-2.6.22-591/net/ipv6/ip6_fib.c --- linux-2.6.22-570/net/ipv6/ip6_fib.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/ip6_fib.c 2007-12-21 15:36:15.000000000 -0500 @@ -361,6 +361,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; @@ -369,6 +370,9 @@ struct hlist_node *node; int res = 0; + if (net != &init_net) + return 0; + s_h = cb->args[0]; s_e = cb->args[1]; @@ -1311,6 +1315,11 @@ static int fib6_clean_node(struct fib6_walker_t *w) { + struct nl_info info = { + .nlh = NULL, + .pid = 0, + .net = &init_net, + }; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; @@ -1319,7 +1328,7 @@ res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL); + res = fib6_del(rt, &info); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); diff -Nurb linux-2.6.22-570/net/ipv6/ip6_flowlabel.c linux-2.6.22-591/net/ipv6/ip6_flowlabel.c --- linux-2.6.22-570/net/ipv6/ip6_flowlabel.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/ip6_flowlabel.c 2007-12-21 15:36:15.000000000 -0500 @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -309,6 +310,7 @@ msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); + flowi.fl_net = &init_net; flowi.oif = 0; err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); @@ -690,7 +692,7 @@ void ip6_flowlabel_init(void) { #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); #endif } @@ -698,6 +700,6 @@ { del_timer(&ip6_fl_gc_timer); #ifdef CONFIG_PROC_FS - proc_net_remove("ip6_flowlabel"); + proc_net_remove(&init_net, "ip6_flowlabel"); #endif } diff -Nurb linux-2.6.22-570/net/ipv6/ip6_input.c linux-2.6.22-591/net/ipv6/ip6_input.c --- linux-2.6.22-570/net/ipv6/ip6_input.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/ip6_input.c 2007-12-21 15:36:15.000000000 -0500 @@ -61,6 +61,11 @@ u32 pkt_len; struct inet6_dev *idev; + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; diff -Nurb linux-2.6.22-570/net/ipv6/ip6_output.c linux-2.6.22-591/net/ipv6/ip6_output.c --- linux-2.6.22-570/net/ipv6/ip6_output.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/ip6_output.c 2007-12-21 15:36:15.000000000 -0500 @@ -423,7 +423,7 @@ /* XXX: idev->cnf.proxy_ndp? */ if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); @@ -543,7 +543,7 @@ found_rhdr = 1; break; case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif diff -Nurb linux-2.6.22-570/net/ipv6/ip6_tunnel.c linux-2.6.22-591/net/ipv6/ip6_tunnel.c --- linux-2.6.22-570/net/ipv6/ip6_tunnel.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/ip6_tunnel.c 2007-12-21 15:36:15.000000000 -0500 @@ -235,7 +235,7 @@ int i; for (i = 1; i < IP6_TNL_MAX; i++) { sprintf(name, "ip6tnl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i == IP6_TNL_MAX) @@ -651,7 +651,7 @@ struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && @@ -787,7 +787,7 @@ struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) printk(KERN_WARNING diff -Nurb linux-2.6.22-570/net/ipv6/ipcomp6.c linux-2.6.22-591/net/ipv6/ipcomp6.c --- linux-2.6.22-570/net/ipv6/ipcomp6.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/ipcomp6.c 2007-12-21 15:36:12.000000000 -0500 @@ -501,4 +501,4 @@ MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA "); - +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP); diff -Nurb linux-2.6.22-570/net/ipv6/ipv6_sockglue.c linux-2.6.22-591/net/ipv6/ipv6_sockglue.c --- linux-2.6.22-570/net/ipv6/ipv6_sockglue.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/ipv6_sockglue.c 2007-12-21 15:36:15.000000000 -0500 @@ -123,7 +123,7 @@ struct ipv6hdr *ipv6h; struct inet6_protocol *ops; - if (!(features & NETIF_F_HW_CSUM)) + if (!(features & NETIF_F_V6_CSUM)) features &= ~NETIF_F_SG; if (unlikely(skb_shinfo(skb)->gso_type & @@ -417,7 +417,7 @@ struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: #endif break; @@ -463,6 +463,7 @@ struct flowi fl; int junk; + fl.fl_net = &init_net; fl.fl6_flowlabel = 0; fl.oif = sk->sk_bound_dev_if; @@ -547,7 +548,7 @@ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(val) == NULL) { + if (__dev_get_by_index(&init_net, val) == NULL) { retv = -ENODEV; break; } diff -Nurb linux-2.6.22-570/net/ipv6/mcast.c linux-2.6.22-591/net/ipv6/mcast.c --- linux-2.6.22-570/net/ipv6/mcast.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/mcast.c 2007-12-21 15:36:15.000000000 -0500 @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -214,7 +215,7 @@ dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -265,7 +266,7 @@ *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { + if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -300,7 +301,7 @@ dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return NULL; @@ -331,7 +332,7 @@ np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(mc_lst->ifindex); + dev = dev_get_by_index(&init_net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -2332,7 +2333,7 @@ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2476,7 +2477,7 @@ state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2658,8 +2659,8 @@ np->hop_limit = 1; #ifdef CONFIG_PROC_FS - proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); #endif return 0; @@ -2671,7 +2672,7 @@ igmp6_socket = NULL; /* for safety */ #ifdef CONFIG_PROC_FS - proc_net_remove("mcfilter6"); - proc_net_remove("igmp6"); + proc_net_remove(&init_net, "mcfilter6"); + proc_net_remove(&init_net, "igmp6"); #endif } diff -Nurb linux-2.6.22-570/net/ipv6/mip6.c linux-2.6.22-591/net/ipv6/mip6.c --- linux-2.6.22-570/net/ipv6/mip6.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/mip6.c 2007-12-21 15:36:12.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ return len; } -int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) +static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh *mh; @@ -471,7 +472,7 @@ .remote_addr = mip6_xfrm_addr, }; -int __init mip6_init(void) +static int __init mip6_init(void) { printk(KERN_INFO "Mobile IPv6\n"); @@ -483,18 +484,35 @@ printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); goto mip6_rthdr_xfrm_fail; } + if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { + printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__); + goto mip6_rawv6_mh_fail; + } + + return 0; + mip6_rawv6_mh_fail: + xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); mip6_rthdr_xfrm_fail: xfrm_unregister_type(&mip6_destopt_type, AF_INET6); mip6_destopt_xfrm_fail: return -EAGAIN; } -void __exit mip6_fini(void) +static void __exit mip6_fini(void) { + if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) + printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__); if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); } + +module_init(mip6_init); +module_exit(mip6_fini); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING); diff -Nurb linux-2.6.22-570/net/ipv6/ndisc.c linux-2.6.22-591/net/ipv6/ndisc.c --- linux-2.6.22-570/net/ipv6/ndisc.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/ndisc.c 2007-12-21 15:36:15.000000000 -0500 @@ -418,6 +418,7 @@ int oif) { memset(fl, 0, sizeof(*fl)); + fl->fl_net = &init_net; ipv6_addr_copy(&fl->fl6_src, saddr); ipv6_addr_copy(&fl->fl6_dst, daddr); fl->proto = IPPROTO_ICMPV6; @@ -760,7 +761,7 @@ if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, + (pneigh = pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -901,7 +902,7 @@ */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1525,6 +1526,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c linux-2.6.22-591/net/ipv6/netfilter/ip6_queue.c --- linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/ip6_queue.c 2007-12-21 15:36:15.000000000 -0500 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -546,6 +547,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -565,7 +569,7 @@ if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -657,14 +661,14 @@ struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, - THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk, + NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -685,7 +689,7 @@ cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); @@ -705,7 +709,7 @@ unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c linux-2.6.22-591/net/ipv6/netfilter/ip6_tables.c --- linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/ip6_tables.c 2007-12-21 15:36:15.000000000 -0500 @@ -906,7 +906,7 @@ int ret; struct xt_table *t; - t = xt_find_table_lock(AF_INET6, entries->name); + t = xt_find_table_lock(&init_net, AF_INET6, entries->name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); @@ -972,7 +972,7 @@ duprintf("ip_tables: Translated table\n"); - t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, tmp.name), "ip6table_%s", tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1073,7 +1073,7 @@ goto free; } - t = xt_find_table_lock(AF_INET6, tmp.name); + t = xt_find_table_lock(&init_net, AF_INET6, tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1109,6 +1109,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1134,6 +1137,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1155,7 +1161,7 @@ } name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; @@ -1259,7 +1265,7 @@ return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); + ret = xt_register_table(&init_net, table, &bootstrap, newinfo); if (ret != 0) { xt_free_table_info(newinfo); return ret; diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.22-591/net/ipv6/netfilter/ip6t_REJECT.c --- linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/ip6t_REJECT.c 2007-12-21 15:36:15.000000000 -0500 @@ -92,6 +92,7 @@ } memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); @@ -172,7 +173,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = &loopback_dev; + skb_in->dev = &init_net.loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_filter.c --- linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_filter.c 2007-12-21 15:36:15.000000000 -0500 @@ -65,6 +65,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } @@ -75,6 +79,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #if 0 /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_mangle.c --- linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_mangle.c 2007-12-21 15:36:15.000000000 -0500 @@ -79,6 +79,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); } @@ -95,6 +99,10 @@ u_int8_t hop_limit; u_int32_t flowlabel, mark; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #if 0 /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_raw.c --- linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_raw.c 2007-12-21 15:36:15.000000000 -0500 @@ -57,6 +57,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ip6t_do_table(pskb, hook, in, out, &packet_raw); } diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.22-591/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c --- linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-12-21 15:36:15.000000000 -0500 @@ -167,6 +167,10 @@ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) @@ -203,6 +207,10 @@ { struct sk_buff *reasm; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* Previously seen (loopback)? */ if ((*pskb)->nfct) return NF_ACCEPT; @@ -231,6 +239,10 @@ { struct sk_buff *reasm = (*pskb)->nfct_reasm; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* This packet is fragmented and has reassembled packet. */ if (reasm) { /* Reassembled packet isn't parsed yet ? */ @@ -256,6 +268,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct ipv6hdr)) { if (net_ratelimit()) diff -Nurb linux-2.6.22-570/net/ipv6/netfilter.c linux-2.6.22-591/net/ipv6/netfilter.c --- linux-2.6.22-570/net/ipv6/netfilter.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/netfilter.c 2007-12-21 15:36:15.000000000 -0500 @@ -14,6 +14,7 @@ struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { + .fl_net = &init_net, .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .mark = skb->mark, .nl_u = diff -Nurb linux-2.6.22-570/net/ipv6/proc.c linux-2.6.22-591/net/ipv6/proc.c --- linux-2.6.22-570/net/ipv6/proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include static struct proc_dir_entry *proc_net_devsnmp6; @@ -231,22 +232,22 @@ { int rc = 0; - if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net); + proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); if (!proc_net_devsnmp6) goto proc_dev_snmp6_fail; - if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) goto proc_sockstat6_fail; out: return rc; proc_sockstat6_fail: - proc_net_remove("dev_snmp6"); + proc_net_remove(&init_net, "dev_snmp6"); proc_dev_snmp6_fail: - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "snmp6"); proc_snmp6_fail: rc = -ENOMEM; goto out; @@ -254,8 +255,8 @@ void ipv6_misc_proc_exit(void) { - proc_net_remove("sockstat6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "sockstat6"); + proc_net_remove(&init_net, "dev_snmp6"); + proc_net_remove(&init_net, "snmp6"); } diff -Nurb linux-2.6.22-570/net/ipv6/raw.c linux-2.6.22-591/net/ipv6/raw.c --- linux-2.6.22-570/net/ipv6/raw.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/raw.c 2007-12-21 15:36:15.000000000 -0500 @@ -49,7 +49,8 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 +#include +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -137,6 +138,28 @@ return 0; } +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); + +int rawv6_mh_filter_register(int (*filter)(struct sock *sock, + struct sk_buff *skb)) +{ + rcu_assign_pointer(mh_filter, filter); + return 0; +} +EXPORT_SYMBOL(rawv6_mh_filter_register); + +int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, + struct sk_buff *skb)) +{ + rcu_assign_pointer(mh_filter, NULL); + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL(rawv6_mh_filter_unregister); + +#endif + /* * demultiplex raw sockets. * (should consider queueing the skb in the sock receive_queue @@ -178,16 +201,22 @@ case IPPROTO_ICMPV6: filtered = icmpv6_filter(sk, skb); break; -#ifdef CONFIG_IPV6_MIP6 + +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: + { /* XXX: To validate MH only once for each packet, * this is placed here. It should be after checking * xfrm policy, however it doesn't. The checking xfrm * policy is placed in rawv6_rcv() because it is * required for each socket. */ - filtered = mip6_mh_filter(sk, skb); + int (*filter)(struct sock *sock, struct sk_buff *skb); + + filter = rcu_dereference(mh_filter); + filtered = filter ? filter(sk, skb) : 0; break; + } #endif default: filtered = 0; @@ -254,7 +283,7 @@ if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -611,9 +640,7 @@ struct iovec *iov; u8 __user *type = NULL; u8 __user *code = NULL; -#ifdef CONFIG_IPV6_MIP6 u8 len = 0; -#endif int probed = 0; int i; @@ -646,7 +673,6 @@ probed = 1; } break; -#ifdef CONFIG_IPV6_MIP6 case IPPROTO_MH: if (iov->iov_base && iov->iov_len < 1) break; @@ -660,7 +686,6 @@ len += iov->iov_len; break; -#endif default: probed = 1; break; @@ -704,6 +729,7 @@ * Get and verify the address. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -1291,13 +1317,13 @@ int __init raw6_proc_init(void) { - if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; return 0; } void raw6_proc_exit(void) { - proc_net_remove("raw6"); + proc_net_remove(&init_net, "raw6"); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/ipv6/reassembly.c linux-2.6.22-591/net/ipv6/reassembly.c --- linux-2.6.22-570/net/ipv6/reassembly.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/reassembly.c 2007-12-21 15:36:15.000000000 -0500 @@ -301,7 +301,7 @@ fq_kill(fq); - dev = dev_get_by_index(fq->iif); + dev = dev_get_by_index(&init_net, fq->iif); if (!dev) goto out; diff -Nurb linux-2.6.22-570/net/ipv6/route.c linux-2.6.22-591/net/ipv6/route.c --- linux-2.6.22-570/net/ipv6/route.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/route.c 2007-12-21 15:36:15.000000000 -0500 @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -137,7 +138,7 @@ .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -163,7 +164,7 @@ .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -183,7 +184,7 @@ .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -223,8 +224,8 @@ struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (dev != &init_net.loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -564,6 +565,7 @@ int oif, int strict) { struct flowi fl = { + .fl_net = &init_net, .oif = oif, .nl_u = { .ip6_u = { @@ -611,7 +613,12 @@ int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL); + struct nl_info info = { + .nlh = NULL, + .pid = 0, + .net = &init_net, + }; + return __ip6_ins_rt(rt, &info); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -742,6 +749,7 @@ struct ipv6hdr *iph = ipv6_hdr(skb); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { + .fl_net = &init_net, .iif = skb->dev->ifindex, .nl_u = { .ip6_u = { @@ -1129,7 +1137,7 @@ #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(cfg->fc_ifindex); + dev = dev_get_by_index(&init_net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1187,12 +1195,12 @@ if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != &loopback_dev) { + if (dev != &init_net.loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = &loopback_dev; + dev = &init_net.loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1333,7 +1341,12 @@ int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL); + struct nl_info info = { + .nlh = NULL, + .pid = 0, + .net = &init_net, + }; + return __ip6_del_rt(rt, &info); } static int ip6_route_del(struct fib6_config *cfg) @@ -1444,6 +1457,7 @@ int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip6rd_flowi rdfl = { .fl = { + .fl_net = &init_net, .oif = dev->ifindex, .nl_u = { .ip6_u = { @@ -1896,13 +1910,13 @@ if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&loopback_dev); + dev_hold(&init_net.loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &loopback_dev; + rt->rt6i_dev = &init_net.loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -2033,6 +2047,7 @@ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; + cfg->fc_nlinfo.net = skb->sk->sk_net; if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2078,9 +2093,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2090,9 +2109,13 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2227,6 +2250,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2234,12 +2258,16 @@ struct flowi fl; int err, iif = 0; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; err = -EINVAL; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) @@ -2263,7 +2291,7 @@ if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(&init_net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2293,7 +2321,7 @@ goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2301,17 +2329,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = 0, seq = 0; - struct nlmsghdr *nlh = NULL; + u32 pid = info->pid, seq = info->nlh ? info->nlh->nlmsg_seq : 0; + struct nlmsghdr *nlh = info->nlh; int err = -ENOBUFS; - if (info) { - pid = info->pid; - nlh = info->nlh; - if (nlh) - seq = nlh->nlmsg_seq; - } - skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); if (skb == NULL) goto errout; @@ -2323,10 +2344,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); } /* @@ -2558,13 +2579,19 @@ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; + /* Perform the initialization we can't perform at compile time */ + ip6_null_entry.u.dst.dev = &init_net.loopback_dev; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + ip6_prohibit_entry.u.dst.dev = &init_net.loopback_dev; + ip6_blk_hole_entry.u.dst.dev = &init_net.loopback_dev; +#endif fib6_init(); #ifdef CONFIG_PROC_FS - p = proc_net_create("ipv6_route", 0, rt6_proc_info); + p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); if (p) p->owner = THIS_MODULE; - proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); #endif #ifdef CONFIG_XFRM xfrm6_init(); @@ -2584,8 +2611,8 @@ fib6_rules_cleanup(); #endif #ifdef CONFIG_PROC_FS - proc_net_remove("ipv6_route"); - proc_net_remove("rt6_stats"); + proc_net_remove(&init_net, "ipv6_route"); + proc_net_remove(&init_net, "rt6_stats"); #endif #ifdef CONFIG_XFRM xfrm6_fini(); diff -Nurb linux-2.6.22-570/net/ipv6/sit.c linux-2.6.22-591/net/ipv6/sit.c --- linux-2.6.22-570/net/ipv6/sit.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/sit.c 2007-12-21 15:36:15.000000000 -0500 @@ -167,7 +167,7 @@ int i; for (i=1; i<100; i++) { sprintf(name, "sit%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -283,6 +283,9 @@ struct sk_buff *skb2; struct rt6_info *rt6i; + if (skb->dev->nd_net != &init_net) + return; + if (len < hlen + sizeof(struct ipv6hdr)) return; iph6 = (struct ipv6hdr*)(dp + hlen); @@ -369,6 +372,10 @@ struct iphdr *iph; struct ip_tunnel *tunnel; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -474,7 +481,8 @@ } { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, .tos = RT_TOS(tos) } }, @@ -745,7 +753,8 @@ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, .tos = RT_TOS(iph->tos) } }, @@ -760,7 +769,7 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff -Nurb linux-2.6.22-570/net/ipv6/tcp_ipv6.c linux-2.6.22-591/net/ipv6/tcp_ipv6.c --- linux-2.6.22-570/net/ipv6/tcp_ipv6.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/tcp_ipv6.c 2007-12-21 15:36:15.000000000 -0500 @@ -143,6 +143,7 @@ return(-EAFNOSUPPORT); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; @@ -330,6 +331,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = skb->dev->nd_net; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; @@ -339,7 +341,7 @@ __u32 seq; sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, - th->source, skb->dev->ifindex); + th->source, skb->dev->ifindex, net); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -388,6 +390,7 @@ for now. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -481,6 +484,7 @@ int err = -1; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); @@ -1066,6 +1070,7 @@ buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); @@ -1167,6 +1172,7 @@ buff->csum = csum_partial((char *)t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); @@ -1224,7 +1230,8 @@ nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), inet6_iif(skb), + sk->sk_net); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1414,6 +1421,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); if (opt && opt->srcrt) { @@ -1700,6 +1708,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; + struct net *net = skb->dev->nd_net; struct tcphdr *th; struct sock *sk; int ret; @@ -1736,7 +1745,7 @@ sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), - inet6_iif(skb)); + inet6_iif(skb), net); if (!sk) goto no_tcp_socket; @@ -1816,7 +1825,8 @@ sk2 = inet6_lookup_listener(&tcp_hashinfo, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), inet6_iif(skb), + net); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); @@ -2121,12 +2131,12 @@ int __init tcp6_proc_init(void) { - return tcp_proc_register(&tcp6_seq_afinfo); + return tcp_proc_register(&init_net, &tcp6_seq_afinfo); } void tcp6_proc_exit(void) { - tcp_proc_unregister(&tcp6_seq_afinfo); + tcp_proc_unregister(&init_net, &tcp6_seq_afinfo); } #endif diff -Nurb linux-2.6.22-570/net/ipv6/udp.c linux-2.6.22-591/net/ipv6/udp.c --- linux-2.6.22-570/net/ipv6/udp.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/udp.c 2007-12-21 15:36:15.000000000 -0500 @@ -657,6 +657,7 @@ ulen += sizeof(struct udphdr); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (sin6) { if (sin6->sin6_port == 0) @@ -967,11 +968,11 @@ int __init udp6_proc_init(void) { - return udp_proc_register(&udp6_seq_afinfo); + return udp_proc_register(&init_net, &udp6_seq_afinfo); } void udp6_proc_exit(void) { - udp_proc_unregister(&udp6_seq_afinfo); + udp_proc_unregister(&init_net, &udp6_seq_afinfo); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/ipv6/udplite.c linux-2.6.22-591/net/ipv6/udplite.c --- linux-2.6.22-570/net/ipv6/udplite.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/udplite.c 2007-12-21 15:36:15.000000000 -0500 @@ -95,11 +95,11 @@ int __init udplite6_proc_init(void) { - return udp_proc_register(&udplite6_seq_afinfo); + return udp_proc_register(&init_net, &udplite6_seq_afinfo); } void udplite6_proc_exit(void) { - udp_proc_unregister(&udplite6_seq_afinfo); + udp_proc_unregister(&init_net, &udplite6_seq_afinfo); } #endif diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_policy.c linux-2.6.22-591/net/ipv6/xfrm6_policy.c --- linux-2.6.22-570/net/ipv6/xfrm6_policy.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/ipv6/xfrm6_policy.c 2007-12-21 15:36:15.000000000 -0500 @@ -18,7 +18,7 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -40,6 +40,7 @@ { struct rt6_info *rt; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip6_u = { .daddr = *(struct in6_addr *)&daddr->a6, @@ -132,6 +133,7 @@ struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip6_u = { .saddr = fl->fl6_src, @@ -278,6 +280,7 @@ u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); + fl->fl_net = &init_net; ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); @@ -318,7 +321,7 @@ fl->proto = nexthdr; return; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; @@ -375,7 +378,7 @@ xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_state.c linux-2.6.22-591/net/ipv6/xfrm6_state.c --- linux-2.6.22-570/net/ipv6/xfrm6_state.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/xfrm6_state.c 2007-12-21 15:36:12.000000000 -0500 @@ -65,7 +65,7 @@ goto end; /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { if (src[i] && (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || @@ -130,7 +130,7 @@ goto end; /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { if (src[i] && (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c linux-2.6.22-591/net/ipv6/xfrm6_tunnel.c --- linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipv6/xfrm6_tunnel.c 2007-12-21 15:36:12.000000000 -0500 @@ -379,3 +379,4 @@ module_init(xfrm6_tunnel_init); module_exit(xfrm6_tunnel_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6); diff -Nurb linux-2.6.22-570/net/ipx/af_ipx.c linux-2.6.22-591/net/ipx/af_ipx.c --- linux-2.6.22-570/net/ipx/af_ipx.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipx/af_ipx.c 2007-12-21 15:36:15.000000000 -0500 @@ -347,6 +347,9 @@ struct net_device *dev = ptr; struct ipx_interface *i, *tmp; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN && event != NETDEV_UP) goto out; @@ -986,7 +989,7 @@ if (intrfc) ipxitf_put(intrfc); - dev = dev_get_by_name(idef->ipx_device); + dev = dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1094,7 +1097,7 @@ if (!dlink_type) goto out; - dev = __dev_get_by_name(idef->ipx_device); + dev = __dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1189,7 +1192,7 @@ if (copy_from_user(&ifr, arg, sizeof(ifr))) break; sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; - dev = __dev_get_by_name(ifr.ifr_name); + dev = __dev_get_by_name(&init_net, ifr.ifr_name); rc = -ENODEV; if (!dev) break; @@ -1360,11 +1363,14 @@ .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct socket *sock, int protocol) +static int ipx_create(struct net *net, struct socket *sock, int protocol) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * SPX support is not anymore in the kernel sources. If you want to * ressurrect it, completing it and making it understand shared skbs, @@ -1375,7 +1381,7 @@ goto out; rc = -ENOMEM; - sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); if (!sk) goto out; #ifdef IPX_REFCNT_DEBUG @@ -1644,6 +1650,9 @@ u16 ipx_pktsize; int rc = 0; + if (dev->nd_net != &init_net) + goto drop; + /* Not ours */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff -Nurb linux-2.6.22-570/net/ipx/ipx_proc.c linux-2.6.22-591/net/ipx/ipx_proc.c --- linux-2.6.22-570/net/ipx/ipx_proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/ipx/ipx_proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -353,7 +354,7 @@ struct proc_dir_entry *p; int rc = -ENOMEM; - ipx_proc_dir = proc_mkdir("ipx", proc_net); + ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net); if (!ipx_proc_dir) goto out; @@ -381,7 +382,7 @@ out_route: remove_proc_entry("interface", ipx_proc_dir); out_interface: - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); goto out; } @@ -390,7 +391,7 @@ remove_proc_entry("interface", ipx_proc_dir); remove_proc_entry("route", ipx_proc_dir); remove_proc_entry("socket", ipx_proc_dir); - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/irda/af_irda.c linux-2.6.22-591/net/irda/af_irda.c --- linux-2.6.22-570/net/irda/af_irda.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/irda/af_irda.c 2007-12-21 15:36:15.000000000 -0500 @@ -60,7 +60,7 @@ #include -static int irda_create(struct socket *sock, int protocol); +static int irda_create(struct net *net, struct socket *sock, int protocol); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -831,7 +831,7 @@ IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - err = irda_create(newsock, sk->sk_protocol); + err = irda_create(sk->sk_net, newsock, sk->sk_protocol); if (err) return err; @@ -1057,13 +1057,16 @@ * Create IrDA socket * */ -static int irda_create(struct socket *sock, int protocol) +static int irda_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct irda_sock *self; IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + if (net != &init_net) + return -EAFNOSUPPORT; + /* Check for valid socket type */ switch (sock->type) { case SOCK_STREAM: /* For TTP connections with SAR disabled */ @@ -1075,7 +1078,7 @@ } /* Allocate networking socket */ - sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); if (sk == NULL) return -ENOMEM; diff -Nurb linux-2.6.22-570/net/irda/irias_object.c linux-2.6.22-591/net/irda/irias_object.c --- linux-2.6.22-570/net/irda/irias_object.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/irda/irias_object.c 2007-12-21 15:36:12.000000000 -0500 @@ -36,39 +36,6 @@ */ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; -/* - * Function strndup (str, max) - * - * My own kernel version of strndup! - * - * Faster, check boundary... Jean II - */ -static char *strndup(char *str, size_t max) -{ - char *new_str; - int len; - - /* Check string */ - if (str == NULL) - return NULL; - /* Check length, truncate */ - len = strlen(str); - if(len > max) - len = max; - - /* Allocate new string */ - new_str = kmalloc(len + 1, GFP_ATOMIC); - if (new_str == NULL) { - IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); - return NULL; - } - - /* Copy and truncate */ - memcpy(new_str, str, len); - new_str[len] = '\0'; - - return new_str; -} /* * Function ias_new_object (name, id) @@ -90,7 +57,7 @@ } obj->magic = IAS_OBJECT_MAGIC; - obj->name = strndup(name, IAS_MAX_CLASSNAME); + obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC); if (!obj->name) { IRDA_WARNING("%s(), Unable to allocate name!\n", __FUNCTION__); @@ -360,7 +327,7 @@ } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); /* Insert value */ attrib->value = irias_new_integer_value(value); @@ -404,7 +371,7 @@ } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_octseq_value( octets, len); if (!attrib->name || !attrib->value) { @@ -446,7 +413,7 @@ } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_string_value(value); if (!attrib->name || !attrib->value) { @@ -506,7 +473,7 @@ value->type = IAS_STRING; value->charset = CS_ASCII; - value->t.string = strndup(string, IAS_MAX_STRING); + value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC); if (!value->t.string) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); kfree(value); diff -Nurb linux-2.6.22-570/net/irda/irlap_frame.c linux-2.6.22-591/net/irda/irlap_frame.c --- linux-2.6.22-570/net/irda/irlap_frame.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/irda/irlap_frame.c 2007-12-21 15:36:15.000000000 -0500 @@ -1319,6 +1319,9 @@ int command; __u8 control; + if (dev->nd_net != &init_net) + goto out; + /* FIXME: should we get our own field? */ self = (struct irlap_cb *) dev->atalk_ptr; diff -Nurb linux-2.6.22-570/net/irda/irproc.c linux-2.6.22-591/net/irda/irproc.c --- linux-2.6.22-570/net/irda/irproc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/irda/irproc.c 2007-12-21 15:36:15.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -66,7 +67,7 @@ int i; struct proc_dir_entry *d; - proc_irda = proc_mkdir("irda", proc_net); + proc_irda = proc_mkdir("irda", init_net.proc_net); if (proc_irda == NULL) return; proc_irda->owner = THIS_MODULE; @@ -92,7 +93,7 @@ for (i=0; i #include #include +#include #include @@ -136,11 +137,14 @@ .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct socket *sock, int protocol) +static int pfkey_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -149,7 +153,7 @@ return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); if (sk == NULL) goto out; @@ -3781,7 +3785,7 @@ static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); - remove_proc_entry("net/pfkey", NULL); + remove_proc_entry("pfkey", init_net.proc_net); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3798,7 +3802,7 @@ goto out_unregister_key_proto; #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL) + if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL) goto out_sock_unregister; #endif err = xfrm_register_km(&pfkeyv2_mgr); diff -Nurb linux-2.6.22-570/net/llc/af_llc.c linux-2.6.22-591/net/llc/af_llc.c --- linux-2.6.22-570/net/llc/af_llc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/llc/af_llc.c 2007-12-21 15:36:15.000000000 -0500 @@ -150,14 +150,17 @@ * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct socket *sock, int protocol) +static int llc_ui_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); @@ -249,7 +252,7 @@ if (!sock_flag(sk, SOCK_ZAPPED)) goto out; rc = -ENODEV; - llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd); + llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!llc->dev) goto out; rc = -EUSERS; @@ -300,7 +303,7 @@ goto out; rc = -ENODEV; rtnl_lock(); - llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac); rtnl_unlock(); if (!llc->dev) goto out; diff -Nurb linux-2.6.22-570/net/llc/llc_conn.c linux-2.6.22-591/net/llc/llc_conn.c --- linux-2.6.22-570/net/llc/llc_conn.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/llc/llc_conn.c 2007-12-21 15:36:15.000000000 -0500 @@ -700,7 +700,7 @@ struct llc_addr *saddr, struct llc_addr *daddr) { - struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, sk->sk_prot); struct llc_sock *newllc, *llc = llc_sk(sk); @@ -867,9 +867,9 @@ * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) { - struct sock *sk = sk_alloc(family, priority, prot, 1); + struct sock *sk = sk_alloc(net, family, priority, prot, 1); if (!sk) goto out; diff -Nurb linux-2.6.22-570/net/llc/llc_core.c linux-2.6.22-591/net/llc/llc_core.c --- linux-2.6.22-570/net/llc/llc_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/llc/llc_core.c 2007-12-23 03:37:02.000000000 -0500 @@ -19,6 +19,7 @@ #include #include #include +#include #include LIST_HEAD(llc_sap_list); @@ -162,7 +163,8 @@ { struct net_device *dev; - dev = first_net_device(); + /* XXX sapan + dev = first_net_device(&init_net); if (dev != NULL) dev = next_net_device(dev); @@ -172,6 +174,7 @@ memset(llc_station_mac_sa, 0, ETH_ALEN); dev_add_pack(&llc_packet_type); dev_add_pack(&llc_tr_packet_type); + */ return 0; } diff -Nurb linux-2.6.22-570/net/llc/llc_core.c.orig linux-2.6.22-591/net/llc/llc_core.c.orig --- linux-2.6.22-570/net/llc/llc_core.c.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/net/llc/llc_core.c.orig 2007-12-21 15:36:15.000000000 -0500 @@ -0,0 +1,197 @@ +/* + * llc_core.c - Minimum needed routines for sap handling and module init/exit + * + * Copyright (c) 1997 by Procom Technology, Inc. + * 2001-2003 by Arnaldo Carvalho de Melo + * + * This program can be redistributed or modified under the terms of the + * GNU General Public License as published by the Free Software Foundation. + * This program is distributed without any warranty or implied warranty + * of merchantability or fitness for a particular purpose. + * + * See the GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LIST_HEAD(llc_sap_list); +DEFINE_RWLOCK(llc_sap_list_lock); + +unsigned char llc_station_mac_sa[ETH_ALEN]; + +/** + * llc_sap_alloc - allocates and initializes sap. + * + * Allocates and initializes sap. + */ +static struct llc_sap *llc_sap_alloc(void) +{ + struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); + + if (sap) { + sap->state = LLC_SAP_STATE_ACTIVE; + memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN); + rwlock_init(&sap->sk_list.lock); + atomic_set(&sap->refcnt, 1); + } + return sap; +} + +/** + * llc_add_sap - add sap to station list + * @sap: Address of the sap + * + * Adds a sap to the LLC's station sap list. + */ +static void llc_add_sap(struct llc_sap *sap) +{ + list_add_tail(&sap->node, &llc_sap_list); +} + +/** + * llc_del_sap - del sap from station list + * @sap: Address of the sap + * + * Removes a sap to the LLC's station sap list. + */ +static void llc_del_sap(struct llc_sap *sap) +{ + write_lock_bh(&llc_sap_list_lock); + list_del(&sap->node); + write_unlock_bh(&llc_sap_list_lock); +} + +static struct llc_sap *__llc_sap_find(unsigned char sap_value) +{ + struct llc_sap* sap; + + list_for_each_entry(sap, &llc_sap_list, node) + if (sap->laddr.lsap == sap_value) + goto out; + sap = NULL; +out: + return sap; +} + +/** + * llc_sap_find - searchs a SAP in station + * @sap_value: sap to be found + * + * Searchs for a sap in the sap list of the LLC's station upon the sap ID. + * If the sap is found it will be refcounted and the user will have to do + * a llc_sap_put after use. + * Returns the sap or %NULL if not found. + */ +struct llc_sap *llc_sap_find(unsigned char sap_value) +{ + struct llc_sap* sap; + + read_lock_bh(&llc_sap_list_lock); + sap = __llc_sap_find(sap_value); + if (sap) + llc_sap_hold(sap); + read_unlock_bh(&llc_sap_list_lock); + return sap; +} + +/** + * llc_sap_open - open interface to the upper layers. + * @lsap: SAP number. + * @func: rcv func for datalink protos + * + * Interface function to upper layer. Each one who wants to get a SAP + * (for example NetBEUI) should call this function. Returns the opened + * SAP for success, NULL for failure. + */ +struct llc_sap *llc_sap_open(unsigned char lsap, + int (*func)(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev)) +{ + struct llc_sap *sap = NULL; + + write_lock_bh(&llc_sap_list_lock); + if (__llc_sap_find(lsap)) /* SAP already exists */ + goto out; + sap = llc_sap_alloc(); + if (!sap) + goto out; + sap->laddr.lsap = lsap; + sap->rcv_func = func; + llc_add_sap(sap); +out: + write_unlock_bh(&llc_sap_list_lock); + return sap; +} + +/** + * llc_sap_close - close interface for upper layers. + * @sap: SAP to be closed. + * + * Close interface function to upper layer. Each one who wants to + * close an open SAP (for example NetBEUI) should call this function. + * Removes this sap from the list of saps in the station and then + * frees the memory for this sap. + */ +void llc_sap_close(struct llc_sap *sap) +{ + WARN_ON(!hlist_empty(&sap->sk_list.list)); + llc_del_sap(sap); + kfree(sap); +} + +static struct packet_type llc_packet_type = { + .type = __constant_htons(ETH_P_802_2), + .func = llc_rcv, +}; + +static struct packet_type llc_tr_packet_type = { + .type = __constant_htons(ETH_P_TR_802_2), + .func = llc_rcv, +}; + +static int __init llc_init(void) +{ + struct net_device *dev; + + dev = first_net_device(&init_net); + if (dev != NULL) + dev = next_net_device(dev); + + if (dev != NULL) + memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN); + else + memset(llc_station_mac_sa, 0, ETH_ALEN); + dev_add_pack(&llc_packet_type); + dev_add_pack(&llc_tr_packet_type); + return 0; +} + +static void __exit llc_exit(void) +{ + dev_remove_pack(&llc_packet_type); + dev_remove_pack(&llc_tr_packet_type); +} + +module_init(llc_init); +module_exit(llc_exit); + +EXPORT_SYMBOL(llc_station_mac_sa); +EXPORT_SYMBOL(llc_sap_list); +EXPORT_SYMBOL(llc_sap_list_lock); +EXPORT_SYMBOL(llc_sap_find); +EXPORT_SYMBOL(llc_sap_open); +EXPORT_SYMBOL(llc_sap_close); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); +MODULE_DESCRIPTION("LLC IEEE 802.2 core support"); diff -Nurb linux-2.6.22-570/net/llc/llc_input.c linux-2.6.22-591/net/llc/llc_input.c --- linux-2.6.22-570/net/llc/llc_input.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/llc/llc_input.c 2007-12-21 15:36:15.000000000 -0500 @@ -12,6 +12,7 @@ * See the GNU General Public License for more details. */ #include +#include #include #include #include @@ -145,6 +146,9 @@ int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + if (dev->nd_net != &init_net) + goto drop; + /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. diff -Nurb linux-2.6.22-570/net/llc/llc_proc.c linux-2.6.22-591/net/llc/llc_proc.c --- linux-2.6.22-570/net/llc/llc_proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/llc/llc_proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -231,7 +232,7 @@ int rc = -ENOMEM; struct proc_dir_entry *p; - llc_proc_dir = proc_mkdir("llc", proc_net); + llc_proc_dir = proc_mkdir("llc", init_net.proc_net); if (!llc_proc_dir) goto out; llc_proc_dir->owner = THIS_MODULE; @@ -254,7 +255,7 @@ out_core: remove_proc_entry("socket", llc_proc_dir); out_socket: - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); goto out; } @@ -262,5 +263,5 @@ { remove_proc_entry("socket", llc_proc_dir); remove_proc_entry("core", llc_proc_dir); - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); } diff -Nurb linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c linux-2.6.22-591/net/mac80211/ieee80211_ioctl.c --- linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/mac80211/ieee80211_ioctl.c 2007-12-21 15:36:12.000000000 -0500 @@ -838,6 +838,29 @@ } +static int ieee80211_ioctl_giwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->type == IEEE80211_IF_TYPE_STA) + sta = sta_info_get(local, sdata->u.sta.bssid); + else + return -EOPNOTSUPP; + if (!sta) + return -ENODEV; + if (sta->txrate < local->oper_hw_mode->num_rates) + rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000; + else + rate->value = 0; + sta_info_put(sta); + return 0; +} + static int ieee80211_ioctl_siwrts(struct net_device *dev, struct iw_request_info *info, struct iw_param *rts, char *extra) @@ -1779,7 +1802,7 @@ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* SIOCSIWRATE */ - (iw_handler) NULL, /* SIOCGIWRATE */ + (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ diff -Nurb linux-2.6.22-570/net/netfilter/core.c linux-2.6.22-591/net/netfilter/core.c --- linux-2.6.22-570/net/netfilter/core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/core.c 2007-12-21 15:36:15.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include "nf_internals.h" @@ -203,7 +204,9 @@ return 0; /* Not exclusive use of packet? Must copy. */ - if (skb_shared(*pskb) || skb_cloned(*pskb)) + if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) + goto copy_skb; + if (skb_shared(*pskb)) goto copy_skb; return pskb_may_pull(*pskb, writable_len); @@ -278,8 +281,28 @@ #endif /* CONFIG_NF_CONNTRACK */ #ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); +static int netfilter_proc_init(struct net * net) +{ + int error = -ENOMEM; + net->proc_net_netfilter = proc_mkdir("netfilter", net->proc_net); + + if (net->proc_net_netfilter) { + net->proc_net_netfilter->data = net; + error = 0; + } + return error; +} + +static void netfilter_proc_exit(struct net *net) +{ + remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_proc_ops = { + .init = netfilter_proc_init, + .exit = netfilter_proc_exit, +}; + #endif void __init netfilter_init(void) @@ -291,8 +314,7 @@ } #ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", proc_net); - if (!proc_net_netfilter) + if (register_pernet_subsys(&netfilter_proc_ops) < 0) panic("cannot create netfilter proc entry"); #endif diff -Nurb linux-2.6.22-570/net/netfilter/nf_conntrack_h323_main.c linux-2.6.22-591/net/netfilter/nf_conntrack_h323_main.c --- linux-2.6.22-570/net/netfilter/nf_conntrack_h323_main.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nf_conntrack_h323_main.c 2007-12-21 15:36:15.000000000 -0500 @@ -724,6 +724,8 @@ memset(&fl1, 0, sizeof(fl1)); memset(&fl2, 0, sizeof(fl2)); + fl1.fl_net = &init_net; + fl2.fl_net = &init_net; switch (family) { case AF_INET: { diff -Nurb linux-2.6.22-570/net/netfilter/nf_conntrack_standalone.c linux-2.6.22-591/net/netfilter/nf_conntrack_standalone.c --- linux-2.6.22-570/net/netfilter/nf_conntrack_standalone.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nf_conntrack_standalone.c 2007-12-21 15:36:15.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -419,14 +420,14 @@ return ret; #ifdef CONFIG_PROC_FS - proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc_exp) goto cleanup_proc; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto cleanup_proc_exp; @@ -447,11 +448,11 @@ cleanup_proc_stat: #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); + remove_proc_entry("nf_conntrack", init_net.proc_net_stat); cleanup_proc_exp: - proc_net_remove("nf_conntrack_expect"); + proc_net_remove(&init_net, "nf_conntrack_expect"); cleanup_proc: - proc_net_remove("nf_conntrack"); + proc_net_remove(&init_net, "nf_conntrack"); cleanup_init: #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); @@ -464,9 +465,9 @@ unregister_sysctl_table(nf_ct_sysctl_header); #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack_expect"); - proc_net_remove("nf_conntrack"); + remove_proc_entry("nf_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "nf_conntrack_expect"); + proc_net_remove(&init_net, "nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); } diff -Nurb linux-2.6.22-570/net/netfilter/nf_log.c linux-2.6.22-591/net/netfilter/nf_log.c --- linux-2.6.22-570/net/netfilter/nf_log.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nf_log.c 2007-12-21 15:36:15.000000000 -0500 @@ -168,7 +168,8 @@ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; - pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); + pde = create_proc_entry("nf_log", S_IRUGO, + init_net.proc_net_netfilter); if (!pde) return -1; diff -Nurb linux-2.6.22-570/net/netfilter/nf_queue.c linux-2.6.22-591/net/netfilter/nf_queue.c --- linux-2.6.22-570/net/netfilter/nf_queue.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nf_queue.c 2007-12-21 15:36:15.000000000 -0500 @@ -346,7 +346,7 @@ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; - pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); + pde = create_proc_entry("nf_queue", S_IRUGO, init_net.proc_net_netfilter); if (!pde) return -1; pde->proc_fops = &nfqueue_file_ops; diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink.c linux-2.6.22-591/net/netfilter/nfnetlink.c --- linux-2.6.22-570/net/netfilter/nfnetlink.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nfnetlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -264,7 +264,7 @@ { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink_log.c linux-2.6.22-591/net/netfilter/nfnetlink_log.c --- linux-2.6.22-570/net/netfilter/nfnetlink_log.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nfnetlink_log.c 2007-12-21 15:36:15.000000000 -0500 @@ -705,7 +705,8 @@ hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { UDEBUG("node = %p\n", inst); - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -1023,7 +1024,7 @@ #ifdef CONFIG_PROC_FS proc_nful = create_proc_entry("nfnetlink_log", 0440, - proc_net_netfilter); + init_net.proc_net_netfilter); if (!proc_nful) goto cleanup_subsys; proc_nful->proc_fops = &nful_file_ops; @@ -1043,7 +1044,7 @@ { nf_log_unregister(&nfulnl_logger); #ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_log", proc_net_netfilter); + remove_proc_entry("nfnetlink_log", init_net.proc_net_netfilter); #endif nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink_queue.c linux-2.6.22-591/net/netfilter/nfnetlink_queue.c --- linux-2.6.22-570/net/netfilter/nfnetlink_queue.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/nfnetlink_queue.c 2007-12-21 15:36:15.000000000 -0500 @@ -734,6 +734,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); @@ -762,7 +765,8 @@ struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -1106,7 +1110,7 @@ #ifdef CONFIG_PROC_FS proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, - proc_net_netfilter); + init_net.proc_net_netfilter); if (!proc_nfqueue) goto cleanup_subsys; proc_nfqueue->proc_fops = &nfqnl_file_ops; @@ -1129,7 +1133,7 @@ nf_unregister_queue_handlers(&nfqh); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); + remove_proc_entry("nfnetlink_queue", init_net.proc_net_netfilter); #endif nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); diff -Nurb linux-2.6.22-570/net/netfilter/x_tables.c linux-2.6.22-591/net/netfilter/x_tables.c --- linux-2.6.22-570/net/netfilter/x_tables.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/x_tables.c 2007-12-21 15:36:15.000000000 -0500 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -37,11 +38,16 @@ struct mutex mutex; struct list_head match; struct list_head target; - struct list_head tables; struct mutex compat_mutex; }; -static struct xt_af *xt; + +struct xt_af_pernet { + struct list_head tables; +}; + +static struct xt_af * xt; + #ifdef DEBUG_IP_FIREWALL_USER #define duprintf(format, args...) printk(format , ## args) @@ -286,9 +292,9 @@ return 1; } if (target == 1) - have_rev = target_revfn(af, name, revision, &best); + have_rev = target_revfn( af, name, revision, &best); else - have_rev = match_revfn(af, name, revision, &best); + have_rev = match_revfn( af, name, revision, &best); mutex_unlock(&xt[af].mutex); /* Nothing at all? Return 0 to try loading module. */ @@ -533,14 +539,14 @@ EXPORT_SYMBOL(xt_free_table_info); /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -struct xt_table *xt_find_table_lock(int af, const char *name) +struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) { struct xt_table *t; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); - list_for_each_entry(t, &xt[af].tables, list) + list_for_each_entry(t, &net->xtn[af].tables, list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; mutex_unlock(&xt[af].mutex); @@ -596,7 +602,7 @@ } EXPORT_SYMBOL_GPL(xt_replace_table); -int xt_register_table(struct xt_table *table, +int xt_register_table(struct net *net, struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { @@ -609,7 +615,7 @@ return ret; /* Don't autoload: we'd eat our tail... */ - list_for_each_entry(t, &xt[table->af].tables, list) { + list_for_each_entry(t, &net->xtn[table->af].tables, list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto unlock; @@ -628,7 +634,7 @@ /* save number of initial entries */ private->initial_entries = private->number; - list_add(&table->list, &xt[table->af].tables); + list_add(&table->list, &net->xtn[table->af].tables); ret = 0; unlock: @@ -666,7 +672,7 @@ return pos ? NULL : head; } -static struct list_head *type2list(u_int16_t af, u_int16_t type) +static struct list_head *type2list(struct net *net, u_int16_t af, u_int16_t type) { struct list_head *list; @@ -678,7 +684,7 @@ list = &xt[af].match; break; case TABLE: - list = &xt[af].tables; + list = &net->xtn[af].tables; break; default: list = NULL; @@ -691,6 +697,7 @@ static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos) { struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private; + struct net *net = PDE_NET(pde); u_int16_t af = (unsigned long)pde->data & 0xffff; u_int16_t type = (unsigned long)pde->data >> 16; struct list_head *list; @@ -698,7 +705,7 @@ if (af >= NPROTO) return NULL; - list = type2list(af, type); + list = type2list(net, af, type); if (!list) return NULL; @@ -711,6 +718,7 @@ static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct proc_dir_entry *pde = seq->private; + struct net *net = PDE_NET(pde); u_int16_t af = (unsigned long)pde->data & 0xffff; u_int16_t type = (unsigned long)pde->data >> 16; struct list_head *list; @@ -718,7 +726,7 @@ if (af >= NPROTO) return NULL; - list = type2list(af, type); + list = type2list(net, af, type); if (!list) return NULL; @@ -759,6 +767,7 @@ if (!ret) { struct seq_file *seq = file->private_data; struct proc_dir_entry *pde = PDE(inode); + get_net(PROC_NET(inode)); seq->private = pde; } @@ -766,12 +775,18 @@ return ret; } +static int xt_tgt_release(struct inode *inode, struct file *file) +{ + put_net(PROC_NET(inode)); + return seq_release(inode, file); +} + static const struct file_operations xt_file_ops = { .owner = THIS_MODULE, .open = xt_tgt_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = xt_tgt_release, }; #define FORMAT_TABLES "_tables_names" @@ -794,7 +809,7 @@ #ifdef CONFIG_PROC_FS strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out; proc->data = (void *) ((unsigned long) af | (TABLE << 16)); @@ -802,14 +817,14 @@ strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_tables; proc->data = (void *) ((unsigned long) af | (MATCH << 16)); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_matches; proc->data = (void *) ((unsigned long) af | (TARGET << 16)); @@ -821,12 +836,12 @@ out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out: return -1; #endif @@ -840,19 +855,42 @@ strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); +static int xt_net_init(struct net *net) +{ + int i; + + net->xtn = kmalloc(sizeof(struct xt_af_pernet) * NPROTO, GFP_KERNEL); + if (!net->xtn) + return -ENOMEM; + + for (i = 0; i < NPROTO; i++) { + INIT_LIST_HEAD(&net->xtn[i].tables); + } + return 0; +} + +static void xt_net_exit(struct net *net) +{ + kfree(net->xtn); +} + +static struct pernet_operations xt_net_ops = { + .init = xt_net_init, + .exit = xt_net_exit, +}; static int __init xt_init(void) { @@ -869,13 +907,13 @@ #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); - INIT_LIST_HEAD(&xt[i].tables); } - return 0; + return register_pernet_subsys(&xt_net_ops); } static void __exit xt_fini(void) { + unregister_pernet_subsys(&xt_net_ops); kfree(xt); } diff -Nurb linux-2.6.22-570/net/netfilter/xt_MARK.c linux-2.6.22-591/net/netfilter/xt_MARK.c --- linux-2.6.22-570/net/netfilter/xt_MARK.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/netfilter/xt_MARK.c 2007-12-21 15:36:15.000000000 -0500 @@ -131,7 +131,7 @@ if ((*pskb)->sk) connection_sk = (*pskb)->sk; else { - connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif); + connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif,(*pskb)->sk->sk_net); } if (connection_sk) { diff -Nurb linux-2.6.22-570/net/netfilter/xt_hashlimit.c linux-2.6.22-591/net/netfilter/xt_hashlimit.c --- linux-2.6.22-570/net/netfilter/xt_hashlimit.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netfilter/xt_hashlimit.c 2007-12-21 15:36:15.000000000 -0500 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -736,13 +737,13 @@ printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; } - hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); + hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net); if (!hashlimit_procdir4) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err3; } - hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); + hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); if (!hashlimit_procdir6) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); @@ -750,7 +751,7 @@ } return 0; err4: - remove_proc_entry("ipt_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); err3: kmem_cache_destroy(hashlimit_cachep); err2: @@ -762,8 +763,8 @@ static void __exit xt_hashlimit_fini(void) { - remove_proc_entry("ipt_hashlimit", proc_net); - remove_proc_entry("ip6t_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); + remove_proc_entry("ip6t_hashlimit", init_net.proc_net); kmem_cache_destroy(hashlimit_cachep); xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); } diff -Nurb linux-2.6.22-570/net/netlink/af_netlink.c linux-2.6.22-591/net/netlink/af_netlink.c --- linux-2.6.22-570/net/netlink/af_netlink.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/netlink/af_netlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -63,6 +63,7 @@ #include #include #include +#include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -212,7 +213,7 @@ wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -222,7 +223,7 @@ read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -327,7 +328,7 @@ * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 pid) { struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; @@ -340,7 +341,7 @@ head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -383,15 +384,15 @@ .obj_size = sizeof(struct netlink_sock), }; -static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, - int protocol) +static int __netlink_create(struct net *net, struct socket *sock, + struct mutex *cb_mutex, int protocol) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) return -ENOMEM; @@ -411,7 +412,7 @@ return 0; } -static int netlink_create(struct socket *sock, int protocol) +static int netlink_create(struct net *net, struct socket *sock, int protocol) { struct module *module = NULL; struct mutex *cb_mutex; @@ -440,7 +441,7 @@ cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); - if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0) + if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); @@ -477,6 +478,7 @@ if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { + .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -505,6 +507,7 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -518,6 +521,8 @@ netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { + if ((osk->sk_net != net)) + continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; @@ -529,7 +534,7 @@ } netlink_table_ungrab(); - err = netlink_insert(sk, pid); + err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; @@ -583,6 +588,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -606,7 +612,7 @@ return -EINVAL; } else { err = nladdr->nl_pid ? - netlink_insert(sk, nladdr->nl_pid) : + netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; @@ -690,10 +696,12 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; + struct net *net; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + net = ssk->sk_net; + sock = netlink_lookup(net, protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); @@ -866,6 +874,7 @@ struct netlink_broadcast_data { struct sock *exclude_sk; + struct net *net; u32 pid; u32 group; int failure; @@ -888,6 +897,9 @@ !test_bit(p->group - 1, nlk->groups)) goto out; + if ((sk->sk_net != p->net)) + goto out; + if (p->failure) { netlink_overrun(sk); goto out; @@ -926,6 +938,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { + struct net *net = ssk->sk_net; struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -933,6 +946,7 @@ skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.net = net; info.pid = pid; info.group = group; info.failure = 0; @@ -981,6 +995,9 @@ if (sk == p->exclude_sk) goto out; + if (sk->sk_net != p->exclude_sk->sk_net) + goto out; + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1276,7 +1293,7 @@ */ struct sock * -netlink_kernel_create(int unit, unsigned int groups, +netlink_kernel_create(struct net *net, int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module) { @@ -1293,7 +1310,7 @@ if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1308,18 +1325,20 @@ if (input) nlk_sk(sk)->data_ready = input; - if (netlink_insert(sk, 0)) + if (netlink_insert(sk, net, 0)) goto out_sock_release; nlk = nlk_sk(sk); nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); + if (!nl_table[unit].registered) { nl_table[unit].groups = groups; nl_table[unit].listeners = listeners; nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; nl_table[unit].registered = 1; + } netlink_table_ungrab(); return sk; @@ -1420,7 +1439,7 @@ atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1462,7 +1481,8 @@ if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_net, + in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1613,6 +1633,7 @@ #ifdef CONFIG_PROC_FS struct nl_seq_iter { + struct net *net; int link; int hash_idx; }; @@ -1630,6 +1651,8 @@ for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { + if (iter->net != s->sk_net) + continue; if (off == pos) { iter->link = i; iter->hash_idx = j; @@ -1659,11 +1682,14 @@ if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); - s = sk_next(v); + iter = seq->private; + s = v; + do { + s = sk_next(s); + } while (s && (iter->net != s->sk_net)); if (s) return s; - iter = seq->private; i = iter->link; j = iter->hash_idx + 1; @@ -1672,6 +1698,8 @@ for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); + while (s && (iter->net != s->sk_net)) + s = sk_next(s); if (s) { iter->link = i; iter->hash_idx = j; @@ -1742,15 +1770,24 @@ seq = file->private_data; seq->private = iter; + iter->net = get_net(PROC_NET(inode)); return 0; } +static int netlink_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct nl_seq_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations netlink_seq_fops = { .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = netlink_seq_release, }; #endif @@ -1792,6 +1829,27 @@ .owner = THIS_MODULE, /* for consistency 8) */ }; +static int netlink_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + return -ENOMEM; +#endif + return 0; +} + +static void netlink_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "netlink"); +#endif +} + +static struct pernet_operations netlink_net_ops = { + .init = netlink_net_init, + .exit = netlink_net_exit, +}; + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1837,9 +1895,7 @@ } sock_register(&netlink_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create("netlink", 0, &netlink_seq_fops); -#endif + register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: diff -Nurb linux-2.6.22-570/net/netlink/attr.c linux-2.6.22-591/net/netlink/attr.c --- linux-2.6.22-570/net/netlink/attr.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netlink/attr.c 2007-12-21 15:36:12.000000000 -0500 @@ -72,6 +72,17 @@ return -ERANGE; break; + case NLA_NESTED_COMPAT: + if (attrlen < pt->len) + return -ERANGE; + if (attrlen < NLA_ALIGN(pt->len)) + break; + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) + return -ERANGE; + nla = nla_data(nla) + NLA_ALIGN(pt->len); + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) + return -ERANGE; + break; default: if (pt->len) minlen = pt->len; diff -Nurb linux-2.6.22-570/net/netlink/genetlink.c linux-2.6.22-591/net/netlink/genetlink.c --- linux-2.6.22-570/net/netlink/genetlink.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netlink/genetlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -557,8 +557,9 @@ goto errout_register; netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); - genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, - genl_rcv, NULL, THIS_MODULE); + genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, + GENL_MAX_ID, genl_rcv, NULL, + THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff -Nurb linux-2.6.22-570/net/netrom/af_netrom.c linux-2.6.22-591/net/netrom/af_netrom.c --- linux-2.6.22-570/net/netrom/af_netrom.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netrom/af_netrom.c 2007-12-21 15:36:15.000000000 -0500 @@ -41,6 +41,7 @@ #include #include #include +#include #include static int nr_ndevs = 4; @@ -105,6 +106,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -408,15 +412,18 @@ .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct socket *sock, int protocol) +static int nr_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct nr_sock *nr; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) return -ENOMEM; nr = nr_sk(sk); @@ -458,7 +465,7 @@ if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; nr = nr_sk(sk); @@ -1447,9 +1454,9 @@ nr_loopback_init(); - proc_net_fops_create("nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); + proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); + proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); out: return rc; fail: @@ -1477,9 +1484,9 @@ { int i; - proc_net_remove("nr"); - proc_net_remove("nr_neigh"); - proc_net_remove("nr_nodes"); + proc_net_remove(&init_net, "nr"); + proc_net_remove(&init_net, "nr_neigh"); + proc_net_remove(&init_net, "nr_nodes"); nr_loopback_clear(); nr_rt_free(); diff -Nurb linux-2.6.22-570/net/netrom/nr_route.c linux-2.6.22-591/net/netrom/nr_route.c --- linux-2.6.22-570/net/netrom/nr_route.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/netrom/nr_route.c 2007-12-21 15:36:15.000000000 -0500 @@ -580,7 +580,7 @@ { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -598,7 +598,7 @@ struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -618,7 +618,7 @@ struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; diff -Nurb linux-2.6.22-570/net/packet/af_packet.c linux-2.6.22-591/net/packet/af_packet.c --- linux-2.6.22-570/net/packet/af_packet.c 2007-12-21 15:36:03.000000000 -0500 +++ linux-2.6.22-591/net/packet/af_packet.c 2007-12-21 15:36:15.000000000 -0500 @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -135,10 +136,6 @@ packet classifier depends on it. */ -/* List of all packet sockets. */ -static HLIST_HEAD(packet_sklist); -static DEFINE_RWLOCK(packet_sklist_lock); - static atomic_t packet_socks_nr; @@ -273,6 +270,9 @@ if (skb->pkt_type == PACKET_LOOPBACK) goto out; + if (dev->nd_net != sk->sk_net) + goto out; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto oom; @@ -344,7 +344,7 @@ */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(sk->sk_net, saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -462,6 +462,9 @@ sk = pt->af_packet_priv; po = pkt_sk(sk); + if (dev->nd_net != sk->sk_net) + goto drop; + skb->dev = dev; if (dev->hard_header) { @@ -578,6 +581,9 @@ sk = pt->af_packet_priv; po = pkt_sk(sk); + if (dev->nd_net != sk->sk_net) + goto drop; + if (dev->hard_header) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); @@ -738,7 +744,7 @@ } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(sk->sk_net, ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -811,15 +817,17 @@ { struct sock *sk = sock->sk; struct packet_sock *po; + struct net *net; if (!sk) return 0; + net = sk->sk_net; po = pkt_sk(sk); - write_lock_bh(&packet_sklist_lock); + write_lock_bh(&net->packet_sklist_lock); sk_del_node_init(sk); - write_unlock_bh(&packet_sklist_lock); + write_unlock_bh(&net->packet_sklist_lock); /* * Unhook packet receive handler. @@ -933,7 +941,7 @@ return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(sk->sk_net, name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -960,7 +968,7 @@ if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sll->sll_ifindex); + dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex); if (dev == NULL) goto out; } @@ -982,7 +990,7 @@ * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; @@ -998,7 +1006,7 @@ sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); if (sk == NULL) goto out; @@ -1034,9 +1042,9 @@ po->running = 1; } - write_lock_bh(&packet_sklist_lock); - sk_add_node(sk, &packet_sklist); - write_unlock_bh(&packet_sklist_lock); + write_lock_bh(&net->packet_sklist_lock); + sk_add_node(sk, &net->packet_sklist); + write_unlock_bh(&net->packet_sklist_lock); return(0); out: return err; @@ -1154,7 +1162,7 @@ return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(pkt_sk(sk)->ifindex); + dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1179,7 +1187,7 @@ sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(po->ifindex); + dev = dev_get_by_index(sk->sk_net, po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1231,7 +1239,7 @@ rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(mreq->mr_ifindex); + dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex); if (!dev) goto done; @@ -1285,7 +1293,7 @@ if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(ml->ifindex); + dev = dev_get_by_index(sk->sk_net, ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1313,7 +1321,7 @@ struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1469,9 +1477,10 @@ struct sock *sk; struct hlist_node *node; struct net_device *dev = data; + struct net *net = dev->nd_net; - read_lock(&packet_sklist_lock); - sk_for_each(sk, node, &packet_sklist) { + read_lock(&net->packet_sklist_lock); + sk_for_each(sk, node, &net->packet_sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -1510,7 +1519,7 @@ break; } } - read_unlock(&packet_sklist_lock); + read_unlock(&net->packet_sklist_lock); return NOTIFY_DONE; } @@ -1878,12 +1887,12 @@ }; #ifdef CONFIG_PROC_FS -static inline struct sock *packet_seq_idx(loff_t off) +static inline struct sock *packet_seq_idx(struct net *net, loff_t off) { struct sock *s; struct hlist_node *node; - sk_for_each(s, node, &packet_sklist) { + sk_for_each(s, node, &net->packet_sklist) { if (!off--) return s; } @@ -1892,21 +1901,24 @@ static void *packet_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&packet_sklist_lock); - return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; + struct net *net = seq->private; + read_lock(&net->packet_sklist_lock); + return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return (v == SEQ_START_TOKEN) - ? sk_head(&packet_sklist) + ? sk_head(&net->packet_sklist) : sk_next((struct sock*)v) ; } static void packet_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&packet_sklist_lock); + struct net *net = seq->private; + read_unlock(&net->packet_sklist_lock); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -1942,7 +1954,22 @@ static int packet_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &packet_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &packet_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int packet_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq= file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations packet_seq_fops = { @@ -1950,15 +1977,37 @@ .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = packet_seq_release, }; #endif +static int packet_net_init(struct net *net) +{ + rwlock_init(&net->packet_sklist_lock); + INIT_HLIST_HEAD(&net->packet_sklist); + + if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + return -ENOMEM; + + return 0; +} + +static void packet_net_exit(struct net *net) +{ + proc_net_remove(net, "packet"); +} + +static struct pernet_operations packet_net_ops = { + .init = packet_net_init, + .exit = packet_net_exit, +}; + + static void __exit packet_exit(void) { - proc_net_remove("packet"); unregister_netdevice_notifier(&packet_netdev_notifier); + unregister_pernet_subsys(&packet_net_ops); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); } @@ -1971,8 +2020,8 @@ goto out; sock_register(&packet_family_ops); + register_pernet_subsys(&packet_net_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create("packet", 0, &packet_seq_fops); out: return rc; } diff -Nurb linux-2.6.22-570/net/rose/af_rose.c linux-2.6.22-591/net/rose/af_rose.c --- linux-2.6.22-570/net/rose/af_rose.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/rose/af_rose.c 2007-12-21 15:36:15.000000000 -0500 @@ -45,6 +45,7 @@ #include #include #include +#include static int rose_ndevs = 10; @@ -196,6 +197,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -498,15 +502,18 @@ .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct socket *sock, int protocol) +static int rose_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct rose_sock *rose; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return -ENOMEM; rose = rose_sk(sk); @@ -544,7 +551,7 @@ if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return NULL; rose = rose_sk(sk); @@ -1576,10 +1583,10 @@ rose_add_loopback_neigh(); - proc_net_fops_create("rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops); + proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); + proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); + proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); + proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); out: return rc; fail: @@ -1606,10 +1613,10 @@ { int i; - proc_net_remove("rose"); - proc_net_remove("rose_neigh"); - proc_net_remove("rose_nodes"); - proc_net_remove("rose_routes"); + proc_net_remove(&init_net, "rose"); + proc_net_remove(&init_net, "rose_neigh"); + proc_net_remove(&init_net, "rose_nodes"); + proc_net_remove(&init_net, "rose_routes"); rose_loopback_clear(); rose_rt_free(); diff -Nurb linux-2.6.22-570/net/rose/rose_route.c linux-2.6.22-591/net/rose/rose_route.c --- linux-2.6.22-570/net/rose/rose_route.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/rose/rose_route.c 2007-12-21 15:36:15.000000000 -0500 @@ -583,7 +583,7 @@ { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -601,7 +601,7 @@ struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -619,7 +619,7 @@ struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -636,7 +636,7 @@ struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } diff -Nurb linux-2.6.22-570/net/rxrpc/af_rxrpc.c linux-2.6.22-591/net/rxrpc/af_rxrpc.c --- linux-2.6.22-570/net/rxrpc/af_rxrpc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/rxrpc/af_rxrpc.c 2007-12-21 15:36:15.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "ar-internal.h" @@ -605,13 +606,16 @@ /* * create an RxRPC socket */ -static int rxrpc_create(struct socket *sock, int protocol) +static int rxrpc_create(struct net *net, struct socket *sock, int protocol) { struct rxrpc_sock *rx; struct sock *sk; _enter("%p,%d", sock, protocol); + if (net != &init_net) + return -EAFNOSUPPORT; + /* we support transport protocol UDP only */ if (protocol != PF_INET) return -EPROTONOSUPPORT; @@ -622,7 +626,7 @@ sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); if (!sk) return -ENOMEM; @@ -829,8 +833,8 @@ } #ifdef CONFIG_PROC_FS - proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops); - proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); #endif return 0; @@ -868,8 +872,8 @@ _debug("flush scheduled work"); flush_workqueue(rxrpc_workqueue); - proc_net_remove("rxrpc_conns"); - proc_net_remove("rxrpc_calls"); + proc_net_remove(&init_net, "rxrpc_conns"); + proc_net_remove(&init_net, "rxrpc_calls"); destroy_workqueue(rxrpc_workqueue); kmem_cache_destroy(rxrpc_call_jar); _leave(""); diff -Nurb linux-2.6.22-570/net/sched/act_api.c linux-2.6.22-591/net/sched/act_api.c --- linux-2.6.22-570/net/sched/act_api.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sched/act_api.c 2007-12-21 15:36:15.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -675,7 +676,7 @@ return -EINVAL; } - return rtnl_unicast(skb, pid); + return rtnl_unicast(skb, &init_net, pid); } static struct tc_action * @@ -796,7 +797,7 @@ nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -859,7 +860,7 @@ /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -903,7 +904,7 @@ nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -941,10 +942,14 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct rtattr **tca = arg; u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; + if (net != &init_net) + return -EINVAL; + if (tca[TCA_ACT_TAB-1] == NULL) { printk("tc_ctl_action: received NO action attribs\n"); return -EINVAL; @@ -1014,6 +1019,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct rtattr *x; @@ -1023,6 +1029,9 @@ struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); struct rtattr *kind = find_dump_kind(cb->nlh); + if (net != &init_net) + return 0; + if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; diff -Nurb linux-2.6.22-570/net/sched/act_mirred.c linux-2.6.22-591/net/sched/act_mirred.c --- linux-2.6.22-570/net/sched/act_mirred.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sched/act_mirred.c 2007-12-21 15:36:15.000000000 -0500 @@ -85,7 +85,7 @@ parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); if (parm->ifindex) { - dev = __dev_get_by_index(parm->ifindex); + dev = __dev_get_by_index(&init_net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff -Nurb linux-2.6.22-570/net/sched/cls_api.c linux-2.6.22-591/net/sched/cls_api.c --- linux-2.6.22-570/net/sched/cls_api.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sched/cls_api.c 2007-12-21 15:36:15.000000000 -0500 @@ -129,6 +129,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct rtattr **tca; struct tcmsg *t; u32 protocol; @@ -145,6 +146,9 @@ unsigned long fh; int err; + if (net != &init_net) + return -EINVAL; + replay: tca = arg; t = NLMSG_DATA(n); @@ -164,7 +168,7 @@ /* Find head of filter chain. */ /* Find link */ - if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ @@ -365,7 +369,7 @@ return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct tcf_dump_args @@ -385,6 +389,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int t; int s_t; struct net_device *dev; @@ -395,9 +400,12 @@ struct Qdisc_class_ops *cops; struct tcf_dump_args arg; + if (net != &init_net) + return 0; + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff -Nurb linux-2.6.22-570/net/sched/em_meta.c linux-2.6.22-591/net/sched/em_meta.c --- linux-2.6.22-570/net/sched/em_meta.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sched/em_meta.c 2007-12-21 15:36:15.000000000 -0500 @@ -291,7 +291,7 @@ } else { struct net_device *dev; - dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); if (dev) dev_put(dev); diff -Nurb linux-2.6.22-570/net/sched/sch_api.c linux-2.6.22-591/net/sched/sch_api.c --- linux-2.6.22-570/net/sched/sch_api.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/sched/sch_api.c 2007-12-21 15:36:15.000000000 -0500 @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -609,6 +610,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct net_device *dev; @@ -617,7 +619,10 @@ struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if (net != &init_net) + return -EINVAL; + + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -670,6 +675,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct tcmsg *tcm; struct rtattr **tca; struct net_device *dev; @@ -677,6 +683,9 @@ struct Qdisc *q, *p; int err; + if (net != &init_net) + return -EINVAL; + replay: /* Reinit, just in case something touches this. */ tcm = NLMSG_DATA(n); @@ -684,7 +693,7 @@ clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -873,7 +882,7 @@ } if (skb->len) - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -882,16 +891,20 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; struct Qdisc *q; + if (net != &init_net) + return 0; + s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -930,6 +943,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct net_device *dev; @@ -942,7 +956,10 @@ u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if (net != &init_net) + return -EINVAL; + + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; /* @@ -1096,7 +1113,7 @@ return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args @@ -1116,6 +1133,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int t; int s_t; struct net_device *dev; @@ -1123,9 +1141,12 @@ struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct qdisc_dump_args arg; + if (net != &init_net) + return 0; + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1252,7 +1273,7 @@ { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); diff -Nurb linux-2.6.22-570/net/sched/sch_generic.c linux-2.6.22-591/net/sched/sch_generic.c --- linux-2.6.22-570/net/sched/sch_generic.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sched/sch_generic.c 2007-12-21 15:36:12.000000000 -0500 @@ -59,122 +59,143 @@ spin_unlock_bh(&dev->queue_lock); } -/* - dev->queue_lock serializes queue accesses for this device - AND dev->qdisc pointer itself. +static inline int qdisc_qlen(struct Qdisc *q) +{ + return q->q.qlen; +} - netif_tx_lock serializes accesses to device driver. +static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, + struct Qdisc *q) +{ + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); - dev->queue_lock and netif_tx_lock are mutually exclusive, - if one is grabbed, another must be free. - */ + netif_schedule(dev); + return 0; +} +static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, + struct Qdisc *q) +{ + struct sk_buff *skb; -/* Kick device. + if ((skb = dev->gso_skb)) + dev->gso_skb = NULL; + else + skb = q->dequeue(q); - Returns: 0 - queue is empty or throttled. - >0 - queue is not empty. + return skb; +} - NOTE: Called under dev->queue_lock with locally disabled BH. -*/ +static inline int handle_dev_cpu_collision(struct sk_buff *skb, + struct net_device *dev, + struct Qdisc *q) +{ + int ret; + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + /* + * Same CPU holding the lock. It may be a transient + * configuration error, when hard_start_xmit() recurses. We + * detect it by checking xmit owner and drop the packet when + * deadloop is detected. Return OK to try the next skb. + */ + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_WARNING "Dead loop on netdevice %s, " + "fix it urgently!\n", dev->name); + ret = qdisc_qlen(q); + } else { + /* + * Another cpu is holding lock, requeue & delay xmits for + * some time. + */ + __get_cpu_var(netdev_rx_stat).cpu_collision++; + ret = dev_requeue_skb(skb, dev, q); + } + + return ret; +} + +/* + * NOTE: Called under dev->queue_lock with locally disabled BH. + * + * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this + * device at a time. dev->queue_lock serializes queue accesses for + * this device AND dev->qdisc pointer itself. + * + * netif_tx_lock serializes accesses to device driver. + * + * dev->queue_lock and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; + unsigned lockless; + int ret; /* Dequeue packet */ - if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { - unsigned nolock = (dev->features & NETIF_F_LLTX); - - dev->gso_skb = NULL; + if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + return 0; /* - * When the driver has LLTX set it does its own locking - * in start_xmit. No need to add additional overhead by - * locking again. These checks are worth it because - * even uncongested locks can be quite expensive. - * The driver can do trylock like here too, in case - * of lock congestion it should return -1 and the packet - * will be requeued. - */ - if (!nolock) { - if (!netif_tx_trylock(dev)) { - collision: - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. + * When the driver has LLTX set, it does its own locking in + * start_xmit. These checks are worth it because even uncongested + * locks can be quite expensive. The driver can do a trylock, as + * is being done here; in case of lock contention it should return + * NETDEV_TX_LOCKED and the packet will be requeued. */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - goto out; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - goto requeue; - } + lockless = (dev->features & NETIF_F_LLTX); + + if (!lockless && !netif_tx_trylock(dev)) { + /* Another CPU grabbed the driver tx lock */ + return handle_dev_cpu_collision(skb, dev, q); } - { /* And release queue */ spin_unlock(&dev->queue_lock); - if (!netif_queue_stopped(dev)) { - int ret; - ret = dev_hard_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) { - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto out; - } - if (ret == NETDEV_TX_LOCKED && nolock) { - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto collision; - } - } - /* NETDEV_TX_BUSY - we need to requeue */ - /* Release the driver */ - if (!nolock) { + if (!lockless) netif_tx_unlock(dev); - } + spin_lock(&dev->queue_lock); q = dev->qdisc; - } - /* Device kicked us out :( - This is possible in three cases: + switch (ret) { + case NETDEV_TX_OK: + /* Driver sent out skb successfully */ + ret = qdisc_qlen(q); + break; - 0. driver is locked - 1. fastroute is enabled - 2. device cannot determine busy state - before start of transmission (f.e. dialout) - 3. device is buggy (ppp) - */ + case NETDEV_TX_LOCKED: + /* Driver try lock failed */ + ret = handle_dev_cpu_collision(skb, dev, q); + break; -requeue: - if (unlikely(q == &noop_qdisc)) - kfree_skb(skb); - else if (skb->next) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); - netif_schedule(dev); + default: + /* Driver returned NETDEV_TX_BUSY - requeue skb */ + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING "BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); + + ret = dev_requeue_skb(skb, dev, q); + break; } - return 0; -out: - BUG_ON((int) q->q.qlen < 0); - return q->q.qlen; + return ret; } void __qdisc_run(struct net_device *dev) diff -Nurb linux-2.6.22-570/net/sched/sch_ingress.c linux-2.6.22-591/net/sched/sch_ingress.c --- linux-2.6.22-570/net/sched/sch_ingress.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sched/sch_ingress.c 2007-12-21 15:36:15.000000000 -0500 @@ -243,6 +243,10 @@ struct net_device *dev = skb->dev; int fwres=NF_ACCEPT; + /* Only filter packets in the initial network namespace */ + if ((indev?indev:outdev)->nd_net != &init_net) + return NF_ACCEPT; + DPRINTK("ing_hook: skb %s dev=%s len=%u\n", skb->sk ? "(owned)" : "(unowned)", skb->dev ? (*pskb)->dev->name : "(no dev)", diff -Nurb linux-2.6.22-570/net/sctp/input.c linux-2.6.22-591/net/sctp/input.c --- linux-2.6.22-570/net/sctp/input.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sctp/input.c 2007-12-21 15:36:15.000000000 -0500 @@ -126,6 +126,10 @@ int family; struct sctp_af *af; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (skb->pkt_type!=PACKET_HOST) goto discard_it; @@ -509,6 +513,9 @@ sk_buff_data_t saveip, savesctp; int err; + if (skb->dev->nd_net != &init_net) + return; + if (skb->len < ihlen + 8) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; diff -Nurb linux-2.6.22-570/net/sctp/ipv6.c linux-2.6.22-591/net/sctp/ipv6.c --- linux-2.6.22-570/net/sctp/ipv6.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/sctp/ipv6.c 2007-12-21 15:36:15.000000000 -0500 @@ -189,6 +189,7 @@ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb @@ -230,6 +231,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl.oif = daddr->v6.sin6_scope_id; @@ -619,7 +621,7 @@ struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -664,7 +666,7 @@ newinet->mc_index = 0; newinet->mc_list = NULL; - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) newinet->pmtudisc = IP_PMTUDISC_DONT; else newinet->pmtudisc = IP_PMTUDISC_WANT; @@ -841,7 +843,7 @@ if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { @@ -872,7 +874,7 @@ if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { diff -Nurb linux-2.6.22-570/net/sctp/protocol.c linux-2.6.22-591/net/sctp/protocol.c --- linux-2.6.22-570/net/sctp/protocol.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sctp/protocol.c 2007-12-21 15:36:15.000000000 -0500 @@ -59,6 +59,7 @@ #include #include #include +#include /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -93,7 +94,7 @@ { if (!proc_net_sctp) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/sctp", NULL); + ent = proc_mkdir("sctp", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_sctp = ent; @@ -126,7 +127,7 @@ if (proc_net_sctp) { proc_net_sctp = NULL; - remove_proc_entry("net/sctp", NULL); + remove_proc_entry("sctp", init_net.proc_net); } } @@ -170,7 +171,7 @@ struct sctp_af *af; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); @@ -354,13 +355,13 @@ /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - int ret = inet_addr_type(addr->v4.sin_addr.s_addr); + int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL && !sp->inet.freebind && - !sysctl_ip_nonlocal_bind) + !init_net.sysctl_ip_nonlocal_bind) return 0; return 1; @@ -423,6 +424,7 @@ union sctp_addr dst_saddr; memset(&fl, 0x0, sizeof(struct flowi)); + fl.fl_net = &init_net; fl.fl4_dst = daddr->v4.sin_addr.s_addr; fl.proto = IPPROTO_SCTP; if (asoc) { @@ -539,7 +541,7 @@ { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -1122,7 +1124,7 @@ } spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; + sctp_port_rover = init_net.sysctl_local_port_range[0] - 1; printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", diff -Nurb linux-2.6.22-570/net/sctp/socket.c linux-2.6.22-591/net/sctp/socket.c --- linux-2.6.22-570/net/sctp/socket.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sctp/socket.c 2007-12-21 15:36:15.000000000 -0500 @@ -5021,8 +5021,8 @@ * already in the hash table; if not, we use that; if * it is, we try next. */ - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int low = sk->sk_net->sysctl_local_port_range[0]; + int high = sk->sk_net->sysctl_local_port_range[1]; int remaining = (high - low) + 1; int rover; int index; diff -Nurb linux-2.6.22-570/net/socket.c linux-2.6.22-591/net/socket.c --- linux-2.6.22-570/net/socket.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/socket.c 2007-12-21 15:36:15.000000000 -0500 @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -821,9 +822,9 @@ */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook) (unsigned int, void __user *)) +void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -833,9 +834,9 @@ EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); +static int (*vlan_ioctl_hook) (struct net *, void __user *arg); -void vlan_ioctl_set(int (*hook) (void __user *)) +void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; @@ -864,16 +865,20 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; + struct sock *sk; void __user *argp = (void __user *)arg; int pid, err; + struct net *net; sock = file->private_data; + sk = sock->sk; + net = sk->sk_net; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #ifdef CONFIG_WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #endif /* CONFIG_WIRELESS_EXT */ switch (cmd) { @@ -899,7 +904,7 @@ mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(cmd, argp); + err = br_ioctl_hook(net, cmd, argp); mutex_unlock(&br_ioctl_mutex); break; case SIOCGIFVLAN: @@ -910,7 +915,7 @@ mutex_lock(&vlan_ioctl_mutex); if (vlan_ioctl_hook) - err = vlan_ioctl_hook(argp); + err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; case SIOCADDDLCI: @@ -933,7 +938,7 @@ * to the NIC driver. */ if (err == -ENOIOCTLCMD) - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); break; } return err; @@ -1102,7 +1107,7 @@ return 0; } -static int __sock_create(int family, int type, int protocol, +static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1185,7 +1190,7 @@ /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(sock, protocol); + err = pf->create(net, sock, protocol); if (err < 0) goto out_module_put; @@ -1224,12 +1229,12 @@ int sock_create(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 0); + return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } int sock_create_kern(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 1); + return __sock_create(&init_net, family, type, protocol, res, 1); } asmlinkage long sys_socket(int family, int type, int protocol) @@ -1389,8 +1394,6 @@ * ready for listening. */ -int sysctl_somaxconn __read_mostly = SOMAXCONN; - asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; @@ -1398,8 +1401,9 @@ sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - if ((unsigned)backlog > sysctl_somaxconn) - backlog = sysctl_somaxconn; + struct net *net = sock->sk->sk_net; + if ((unsigned)backlog > net->sysctl_somaxconn) + backlog = net->sysctl_somaxconn; err = security_socket_listen(sock, backlog); if (!err) @@ -2189,6 +2193,16 @@ printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } +static int sock_pernet_init(struct net *net) +{ + net->sysctl_somaxconn = SOMAXCONN; + return 0; +} + +static struct pernet_operations sock_net_ops = { + .init = sock_pernet_init, +}; + static int __init sock_init(void) { /* @@ -2217,6 +2231,8 @@ netfilter_init(); #endif + register_pernet_subsys(&sock_net_ops); + return 0; } diff -Nurb linux-2.6.22-570/net/socket.c.orig linux-2.6.22-591/net/socket.c.orig --- linux-2.6.22-570/net/socket.c.orig 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/socket.c.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,2344 +0,0 @@ -/* - * NET An implementation of the SOCKET network access protocol. - * - * Version: @(#)socket.c 1.1.93 18/02/95 - * - * Authors: Orest Zborowski, - * Ross Biro - * Fred N. van Kempen, - * - * Fixes: - * Anonymous : NOTSOCK/BADF cleanup. Error fix in - * shutdown() - * Alan Cox : verify_area() fixes - * Alan Cox : Removed DDI - * Jonathan Kamens : SOCK_DGRAM reconnect bug - * Alan Cox : Moved a load of checks to the very - * top level. - * Alan Cox : Move address structures to/from user - * mode above the protocol layers. - * Rob Janssen : Allow 0 length sends. - * Alan Cox : Asynchronous I/O support (cribbed from the - * tty drivers). - * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) - * Jeff Uphoff : Made max number of sockets command-line - * configurable. - * Matti Aarnio : Made the number of sockets dynamic, - * to be allocated when needed, and mr. - * Uphoff's max is used as max to be - * allowed to allocate. - * Linus : Argh. removed all the socket allocation - * altogether: it's in the inode now. - * Alan Cox : Made sock_alloc()/sock_release() public - * for NetROM and future kernel nfsd type - * stuff. - * Alan Cox : sendmsg/recvmsg basics. - * Tom Dyas : Export net symbols. - * Marcin Dalecki : Fixed problems with CONFIG_NET="n". - * Alan Cox : Added thread locking to sys_* calls - * for sockets. May have errors at the - * moment. - * Kevin Buhr : Fixed the dumb errors in the above. - * Andi Kleen : Some small cleanups, optimizations, - * and fixed a copy_from_user() bug. - * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) - * Tigran Aivazian : Made listen(2) backlog sanity checks - * protocol-independent - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * - * This module is effectively the top level interface to the BSD socket - * paradigm. - * - * Based upon Swansea University Computer Society NET3.039 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include -#include - -static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -static int sock_mmap(struct file *file, struct vm_area_struct *vma); - -static int sock_close(struct inode *inode, struct file *file); -static unsigned int sock_poll(struct file *file, - struct poll_table_struct *wait); -static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -#ifdef CONFIG_COMPAT -static long compat_sock_ioctl(struct file *file, - unsigned int cmd, unsigned long arg); -#endif -static int sock_fasync(int fd, struct file *filp, int on); -static ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more); - -/* - * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear - * in the operation structures but are done directly via the socketcall() multiplexor. - */ - -static const struct file_operations socket_file_ops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .aio_read = sock_aio_read, - .aio_write = sock_aio_write, - .poll = sock_poll, - .unlocked_ioctl = sock_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = compat_sock_ioctl, -#endif - .mmap = sock_mmap, - .open = sock_no_open, /* special open code to disallow open via /proc */ - .release = sock_close, - .fasync = sock_fasync, - .sendpage = sock_sendpage, - .splice_write = generic_splice_sendpage, -}; - -/* - * The protocol list. Each protocol is registered in here. - */ - -static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; - -/* - * Statistics counters of the socket lists - */ - -static DEFINE_PER_CPU(int, sockets_in_use) = 0; - -/* - * Support routines. - * Move socket addresses back and forth across the kernel/user - * divide and look after the messy bits. - */ - -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - - 16 for IP, 16 for IPX, - 24 for IPv6, - about 80 for AX.25 - must be at least one bigger than - the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). - */ - -/** - * move_addr_to_kernel - copy a socket address into kernel space - * @uaddr: Address in user space - * @kaddr: Address in kernel space - * @ulen: Length in user space - * - * The address is copied into kernel space. If the provided address is - * too long an error code of -EINVAL is returned. If the copy gives - * invalid addresses -EFAULT is returned. On a success 0 is returned. - */ - -int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) -{ - if (ulen < 0 || ulen > MAX_SOCK_ADDR) - return -EINVAL; - if (ulen == 0) - return 0; - if (copy_from_user(kaddr, uaddr, ulen)) - return -EFAULT; - return audit_sockaddr(ulen, kaddr); -} - -/** - * move_addr_to_user - copy an address to user space - * @kaddr: kernel space address - * @klen: length of address in kernel - * @uaddr: user space address - * @ulen: pointer to user length field - * - * The value pointed to by ulen on entry is the buffer length available. - * This is overwritten with the buffer space used. -EINVAL is returned - * if an overlong buffer is specified or a negative buffer size. -EFAULT - * is returned if either the buffer or the length field are not - * accessible. - * After copying the data up to the limit the user specifies, the true - * length of the data is written over the length limit the user - * specified. Zero is returned for a success. - */ - -int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, - int __user *ulen) -{ - int err; - int len; - - err = get_user(len, ulen); - if (err) - return err; - if (len > klen) - len = klen; - if (len < 0 || len > MAX_SOCK_ADDR) - return -EINVAL; - if (len) { - if (audit_sockaddr(klen, kaddr)) - return -ENOMEM; - if (copy_to_user(uaddr, kaddr, len)) - return -EFAULT; - } - /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g - */ - return __put_user(klen, ulen); -} - -#define SOCKFS_MAGIC 0x534F434B - -static struct kmem_cache *sock_inode_cachep __read_mostly; - -static struct inode *sock_alloc_inode(struct super_block *sb) -{ - struct socket_alloc *ei; - - ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); - if (!ei) - return NULL; - init_waitqueue_head(&ei->socket.wait); - - ei->socket.fasync_list = NULL; - ei->socket.state = SS_UNCONNECTED; - ei->socket.flags = 0; - ei->socket.ops = NULL; - ei->socket.sk = NULL; - ei->socket.file = NULL; - - return &ei->vfs_inode; -} - -static void sock_destroy_inode(struct inode *inode) -{ - kmem_cache_free(sock_inode_cachep, - container_of(inode, struct socket_alloc, vfs_inode)); -} - -static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) -{ - struct socket_alloc *ei = (struct socket_alloc *)foo; - - inode_init_once(&ei->vfs_inode); -} - -static int init_inodecache(void) -{ - sock_inode_cachep = kmem_cache_create("sock_inode_cache", - sizeof(struct socket_alloc), - 0, - (SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD), - init_once, - NULL); - if (sock_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static struct super_operations sockfs_ops = { - .alloc_inode = sock_alloc_inode, - .destroy_inode =sock_destroy_inode, - .statfs = simple_statfs, -}; - -static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, - mnt); -} - -static struct vfsmount *sock_mnt __read_mostly; - -static struct file_system_type sock_fs_type = { - .name = "sockfs", - .get_sb = sockfs_get_sb, - .kill_sb = kill_anon_super, -}; - -static int sockfs_delete_dentry(struct dentry *dentry) -{ - /* - * At creation time, we pretended this dentry was hashed - * (by clearing DCACHE_UNHASHED bit in d_flags) - * At delete time, we restore the truth : not hashed. - * (so that dput() can proceed correctly) - */ - dentry->d_flags |= DCACHE_UNHASHED; - return 0; -} - -/* - * sockfs_dname() is called from d_path(). - */ -static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", - dentry->d_inode->i_ino); -} - -static struct dentry_operations sockfs_dentry_operations = { - .d_delete = sockfs_delete_dentry, - .d_dname = sockfs_dname, -}; - -/* - * Obtains the first available file descriptor and sets it up for use. - * - * These functions create file structures and maps them to fd space - * of the current process. On success it returns file descriptor - * and file struct implicitly stored in sock->file. - * Note that another thread may close file descriptor before we return - * from this function. We use the fact that now we do not refer - * to socket after mapping. If one day we will need it, this - * function will increment ref. count on file by 1. - * - * In any case returned fd MAY BE not valid! - * This race condition is unavoidable - * with shared fd spaces, we cannot solve it inside kernel, - * but we take care of internal coherence yet. - */ - -static int sock_alloc_fd(struct file **filep) -{ - int fd; - - fd = get_unused_fd(); - if (likely(fd >= 0)) { - struct file *file = get_empty_filp(); - - *filep = file; - if (unlikely(!file)) { - put_unused_fd(fd); - return -ENFILE; - } - } else - *filep = NULL; - return fd; -} - -static int sock_attach_fd(struct socket *sock, struct file *file) -{ - struct qstr name = { .name = "" }; - - file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!file->f_path.dentry)) - return -ENOMEM; - - file->f_path.dentry->d_op = &sockfs_dentry_operations; - /* - * We dont want to push this dentry into global dentry hash table. - * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED - * This permits a working /proc/$pid/fd/XXX on sockets - */ - file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); - file->f_path.mnt = mntget(sock_mnt); - file->f_mapping = file->f_path.dentry->d_inode->i_mapping; - - sock->file = file; - file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_mode = FMODE_READ | FMODE_WRITE; - file->f_flags = O_RDWR; - file->f_pos = 0; - file->private_data = sock; - - return 0; -} - -int sock_map_fd(struct socket *sock) -{ - struct file *newfile; - int fd = sock_alloc_fd(&newfile); - - if (likely(fd >= 0)) { - int err = sock_attach_fd(sock, newfile); - - if (unlikely(err < 0)) { - put_filp(newfile); - put_unused_fd(fd); - return err; - } - fd_install(fd, newfile); - } - return fd; -} - -static struct socket *sock_from_file(struct file *file, int *err) -{ - if (file->f_op == &socket_file_ops) - return file->private_data; /* set in sock_map_fd */ - - *err = -ENOTSOCK; - return NULL; -} - -/** - * sockfd_lookup - Go from a file number to its socket slot - * @fd: file handle - * @err: pointer to an error code return - * - * The file handle passed in is locked and the socket it is bound - * too is returned. If an error occurs the err pointer is overwritten - * with a negative errno code and NULL is returned. The function checks - * for both invalid handles and passing a handle which is not a socket. - * - * On a success the socket object pointer is returned. - */ - -struct socket *sockfd_lookup(int fd, int *err) -{ - struct file *file; - struct socket *sock; - - file = fget(fd); - if (!file) { - *err = -EBADF; - return NULL; - } - - sock = sock_from_file(file, err); - if (!sock) - fput(file); - return sock; -} - -static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) -{ - struct file *file; - struct socket *sock; - - *err = -EBADF; - file = fget_light(fd, fput_needed); - if (file) { - sock = sock_from_file(file, err); - if (sock) - return sock; - fput_light(file, *fput_needed); - } - return NULL; -} - -/** - * sock_alloc - allocate a socket - * - * Allocate a new inode and socket object. The two are bound together - * and initialised. The socket is then returned. If we are out of inodes - * NULL is returned. - */ - -static struct socket *sock_alloc(void) -{ - struct inode *inode; - struct socket *sock; - - inode = new_inode(sock_mnt->mnt_sb); - if (!inode) - return NULL; - - sock = SOCKET_I(inode); - - inode->i_mode = S_IFSOCK | S_IRWXUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - - get_cpu_var(sockets_in_use)++; - put_cpu_var(sockets_in_use); - return sock; -} - -/* - * In theory you can't get an open on this inode, but /proc provides - * a back door. Remember to keep it shut otherwise you'll let the - * creepy crawlies in. - */ - -static int sock_no_open(struct inode *irrelevant, struct file *dontcare) -{ - return -ENXIO; -} - -const struct file_operations bad_sock_fops = { - .owner = THIS_MODULE, - .open = sock_no_open, -}; - -/** - * sock_release - close a socket - * @sock: socket to close - * - * The socket is released from the protocol stack if it has a release - * callback, and the inode is then released if the socket is bound to - * an inode not a file. - */ - -void sock_release(struct socket *sock) -{ - if (sock->ops) { - struct module *owner = sock->ops->owner; - - sock->ops->release(sock); - sock->ops = NULL; - module_put(owner); - } - - if (sock->fasync_list) - printk(KERN_ERR "sock_release: fasync list not empty!\n"); - - get_cpu_var(sockets_in_use)--; - put_cpu_var(sockets_in_use); - if (!sock->file) { - iput(SOCK_INODE(sock)); - return; - } - sock->file = NULL; -} - -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) -{ - struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - - err = security_socket_sendmsg(sock, msg, size); - if (err) - return err; - - return sock->ops->sendmsg(iocb, sock, msg, size); -} - -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) -{ - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -int kernel_sendmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, size_t size) -{ - mm_segment_t oldfs = get_fs(); - int result; - - set_fs(KERNEL_DS); - /* - * the following is safe, since for compiler definitions of kvec and - * iovec are identical, yielding the same in-core layout and alignment - */ - msg->msg_iov = (struct iovec *)vec; - msg->msg_iovlen = num; - result = sock_sendmsg(sock, msg, size); - set_fs(oldfs); - return result; -} - -/* - * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) - */ -void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) -{ - ktime_t kt = skb->tstamp; - - if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; - /* Race occurred between timestamp enabling and packet - receiving. Fill in the current time for now. */ - if (kt.tv64 == 0) - kt = ktime_get_real(); - skb->tstamp = kt; - tv = ktime_to_timeval(kt); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); - } else { - struct timespec ts; - /* Race occurred between timestamp enabling and packet - receiving. Fill in the current time for now. */ - if (kt.tv64 == 0) - kt = ktime_get_real(); - skb->tstamp = kt; - ts = ktime_to_timespec(kt); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); - } -} - -EXPORT_SYMBOL_GPL(__sock_recv_timestamp); - -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) -{ - int err; - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - si->flags = flags; - - err = security_socket_recvmsg(sock, msg, size, flags); - if (err) - return err; - - return sock->ops->recvmsg(iocb, sock, msg, size, flags); -} - -int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -int kernel_recvmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, size_t size, int flags) -{ - mm_segment_t oldfs = get_fs(); - int result; - - set_fs(KERNEL_DS); - /* - * the following is safe, since for compiler definitions of kvec and - * iovec are identical, yielding the same in-core layout and alignment - */ - msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; - result = sock_recvmsg(sock, msg, size, flags); - set_fs(oldfs); - return result; -} - -static void sock_aio_dtor(struct kiocb *iocb) -{ - kfree(iocb->private); -} - -static ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more) -{ - struct socket *sock; - int flags; - - sock = file->private_data; - - flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; - if (more) - flags |= MSG_MORE; - - return sock->ops->sendpage(sock, page, offset, size, flags); -} - -static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - struct sock_iocb *siocb) -{ - if (!is_sync_kiocb(iocb)) { - siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); - if (!siocb) - return NULL; - iocb->ki_dtor = sock_aio_dtor; - } - - siocb->kiocb = iocb; - iocb->private = siocb; - return siocb; -} - -static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) -{ - struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *)iov; - msg->msg_iovlen = nr_segs; - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - - return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); -} - -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; - - if (pos != 0) - return -ESPIPE; - - if (iocb->ki_left == 0) /* Match SYS5 behaviour */ - return 0; - - - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); -} - -static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) -{ - struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *)iov; - msg->msg_iovlen = nr_segs; - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - if (sock->type == SOCK_SEQPACKET) - msg->msg_flags |= MSG_EOR; - - return __sock_sendmsg(iocb, sock, msg, size); -} - -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; - - if (pos != 0) - return -ESPIPE; - - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); -} - -/* - * Atomic setting of ioctl hooks to avoid race - * with module unload. - */ - -static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; - -void brioctl_set(int (*hook) (unsigned int, void __user *)) -{ - mutex_lock(&br_ioctl_mutex); - br_ioctl_hook = hook; - mutex_unlock(&br_ioctl_mutex); -} - -EXPORT_SYMBOL(brioctl_set); - -static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); - -void vlan_ioctl_set(int (*hook) (void __user *)) -{ - mutex_lock(&vlan_ioctl_mutex); - vlan_ioctl_hook = hook; - mutex_unlock(&vlan_ioctl_mutex); -} - -EXPORT_SYMBOL(vlan_ioctl_set); - -static DEFINE_MUTEX(dlci_ioctl_mutex); -static int (*dlci_ioctl_hook) (unsigned int, void __user *); - -void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) -{ - mutex_lock(&dlci_ioctl_mutex); - dlci_ioctl_hook = hook; - mutex_unlock(&dlci_ioctl_mutex); -} - -EXPORT_SYMBOL(dlci_ioctl_set); - -/* - * With an ioctl, arg may well be a user mode pointer, but we don't know - * what to do with it - that's up to the protocol still. - */ - -static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) -{ - struct socket *sock; - void __user *argp = (void __user *)arg; - int pid, err; - - sock = file->private_data; - if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(cmd, argp); - } else -#ifdef CONFIG_WIRELESS_EXT - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(cmd, argp); - } else -#endif /* CONFIG_WIRELESS_EXT */ - switch (cmd) { - case FIOSETOWN: - case SIOCSPGRP: - err = -EFAULT; - if (get_user(pid, (int __user *)argp)) - break; - err = f_setown(sock->file, pid, 1); - break; - case FIOGETOWN: - case SIOCGPGRP: - err = put_user(f_getown(sock->file), - (int __user *)argp); - break; - case SIOCGIFBR: - case SIOCSIFBR: - case SIOCBRADDBR: - case SIOCBRDELBR: - err = -ENOPKG; - if (!br_ioctl_hook) - request_module("bridge"); - - mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) - err = br_ioctl_hook(cmd, argp); - mutex_unlock(&br_ioctl_mutex); - break; - case SIOCGIFVLAN: - case SIOCSIFVLAN: - err = -ENOPKG; - if (!vlan_ioctl_hook) - request_module("8021q"); - - mutex_lock(&vlan_ioctl_mutex); - if (vlan_ioctl_hook) - err = vlan_ioctl_hook(argp); - mutex_unlock(&vlan_ioctl_mutex); - break; - case SIOCADDDLCI: - case SIOCDELDLCI: - err = -ENOPKG; - if (!dlci_ioctl_hook) - request_module("dlci"); - - if (dlci_ioctl_hook) { - mutex_lock(&dlci_ioctl_mutex); - err = dlci_ioctl_hook(cmd, argp); - mutex_unlock(&dlci_ioctl_mutex); - } - break; - default: - err = sock->ops->ioctl(sock, cmd, arg); - - /* - * If this ioctl is unknown try to hand it down - * to the NIC driver. - */ - if (err == -ENOIOCTLCMD) - err = dev_ioctl(cmd, argp); - break; - } - return err; -} - -int sock_create_lite(int family, int type, int protocol, struct socket **res) -{ - int err; - struct socket *sock = NULL; - - err = security_socket_create(family, type, protocol, 1); - if (err) - goto out; - - sock = sock_alloc(); - if (!sock) { - err = -ENOMEM; - goto out; - } - - sock->type = type; - err = security_socket_post_create(sock, family, type, protocol, 1); - if (err) - goto out_release; - -out: - *res = sock; - return err; -out_release: - sock_release(sock); - sock = NULL; - goto out; -} - -/* No kernel lock held - perfect */ -static unsigned int sock_poll(struct file *file, poll_table *wait) -{ - struct socket *sock; - - /* - * We can't return errors to poll, so it's either yes or no. - */ - sock = file->private_data; - return sock->ops->poll(file, sock, wait); -} - -static int sock_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct socket *sock = file->private_data; - - return sock->ops->mmap(file, sock, vma); -} - -static int sock_close(struct inode *inode, struct file *filp) -{ - /* - * It was possible the inode is NULL we were - * closing an unfinished socket. - */ - - if (!inode) { - printk(KERN_DEBUG "sock_close: NULL inode\n"); - return 0; - } - sock_fasync(-1, filp, 0); - sock_release(SOCKET_I(inode)); - return 0; -} - -/* - * Update the socket async list - * - * Fasync_list locking strategy. - * - * 1. fasync_list is modified only under process context socket lock - * i.e. under semaphore. - * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) - * or under socket lock. - * 3. fasync_list can be used from softirq context, so that - * modification under socket lock have to be enhanced with - * write_lock_bh(&sk->sk_callback_lock). - * --ANK (990710) - */ - -static int sock_fasync(int fd, struct file *filp, int on) -{ - struct fasync_struct *fa, *fna = NULL, **prev; - struct socket *sock; - struct sock *sk; - - if (on) { - fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if (fna == NULL) - return -ENOMEM; - } - - sock = filp->private_data; - - sk = sock->sk; - if (sk == NULL) { - kfree(fna); - return -EINVAL; - } - - lock_sock(sk); - - prev = &(sock->fasync_list); - - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) - if (fa->fa_file == filp) - break; - - if (on) { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd = fd; - write_unlock_bh(&sk->sk_callback_lock); - - kfree(fna); - goto out; - } - fna->fa_file = filp; - fna->fa_fd = fd; - fna->magic = FASYNC_MAGIC; - fna->fa_next = sock->fasync_list; - write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list = fna; - write_unlock_bh(&sk->sk_callback_lock); - } else { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - *prev = fa->fa_next; - write_unlock_bh(&sk->sk_callback_lock); - kfree(fa); - } - } - -out: - release_sock(sock->sk); - return 0; -} - -/* This function may be called only under socket lock or callback_lock */ - -int sock_wake_async(struct socket *sock, int how, int band) -{ - if (!sock || !sock->fasync_list) - return -1; - switch (how) { - case 1: - - if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) - break; - goto call_kill; - case 2: - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) - break; - /* fall through */ - case 0: -call_kill: - __kill_fasync(sock->fasync_list, SIGIO, band); - break; - case 3: - __kill_fasync(sock->fasync_list, SIGURG, band); - } - return 0; -} - -static int __sock_create(int family, int type, int protocol, - struct socket **res, int kern) -{ - int err; - struct socket *sock; - const struct net_proto_family *pf; - - /* - * Check protocol is in range - */ - if (family < 0 || family >= NPROTO) - return -EAFNOSUPPORT; - if (type < 0 || type >= SOCK_MAX) - return -EINVAL; - - /* Compatibility. - - This uglymoron is moved from INET layer to here to avoid - deadlock in module load. - */ - if (family == PF_INET && type == SOCK_PACKET) { - static int warned; - if (!warned) { - warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", - current->comm); - } - family = PF_PACKET; - } - - err = security_socket_create(family, type, protocol, kern); - if (err) - return err; - - /* - * Allocate the socket and allow the family to set things up. if - * the protocol is 0, the family is instructed to select an appropriate - * default. - */ - sock = sock_alloc(); - if (!sock) { - if (net_ratelimit()) - printk(KERN_WARNING "socket: no more sockets\n"); - return -ENFILE; /* Not exactly a match, but its the - closest posix thing */ - } - - sock->type = type; - -#if defined(CONFIG_KMOD) - /* Attempt to load a protocol module if the find failed. - * - * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user - * requested real, full-featured networking support upon configuration. - * Otherwise module support will break! - */ - if (net_families[family] == NULL) - request_module("net-pf-%d", family); -#endif - - rcu_read_lock(); - pf = rcu_dereference(net_families[family]); - err = -EAFNOSUPPORT; - if (!pf) - goto out_release; - - /* - * We will call the ->create function, that possibly is in a loadable - * module, so we have to bump that loadable module refcnt first. - */ - if (!try_module_get(pf->owner)) - goto out_release; - - /* Now protected by module ref count */ - rcu_read_unlock(); - - err = pf->create(sock, protocol); - if (err < 0) - goto out_module_put; - - /* - * Now to bump the refcnt of the [loadable] module that owns this - * socket at sock_release time we decrement its refcnt. - */ - if (!try_module_get(sock->ops->owner)) - goto out_module_busy; - - /* - * Now that we're done with the ->create function, the [loadable] - * module can have its refcnt decremented - */ - module_put(pf->owner); - err = security_socket_post_create(sock, family, type, protocol, kern); - if (err) - goto out_sock_release; - *res = sock; - - return 0; - -out_module_busy: - err = -EAFNOSUPPORT; -out_module_put: - sock->ops = NULL; - module_put(pf->owner); -out_sock_release: - sock_release(sock); - return err; - -out_release: - rcu_read_unlock(); - goto out_sock_release; -} - -int sock_create(int family, int type, int protocol, struct socket **res) -{ - return __sock_create(family, type, protocol, res, 0); -} - -int sock_create_kern(int family, int type, int protocol, struct socket **res) -{ - return __sock_create(family, type, protocol, res, 1); -} - -asmlinkage long sys_socket(int family, int type, int protocol) -{ - int retval; - struct socket *sock; - - retval = sock_create(family, type, protocol, &sock); - if (retval < 0) - goto out; - - retval = sock_map_fd(sock); - if (retval < 0) - goto out_release; - -out: - /* It may be already another descriptor 8) Not kernel problem. */ - return retval; - -out_release: - sock_release(sock); - return retval; -} - -/* - * Create a pair of connected sockets. - */ - -asmlinkage long sys_socketpair(int family, int type, int protocol, - int __user *usockvec) -{ - struct socket *sock1, *sock2; - int fd1, fd2, err; - struct file *newfile1, *newfile2; - - /* - * Obtain the first socket and check if the underlying protocol - * supports the socketpair call. - */ - - err = sock_create(family, type, protocol, &sock1); - if (err < 0) - goto out; - - err = sock_create(family, type, protocol, &sock2); - if (err < 0) - goto out_release_1; - - err = sock1->ops->socketpair(sock1, sock2); - if (err < 0) - goto out_release_both; - - fd1 = sock_alloc_fd(&newfile1); - if (unlikely(fd1 < 0)) { - err = fd1; - goto out_release_both; - } - - fd2 = sock_alloc_fd(&newfile2); - if (unlikely(fd2 < 0)) { - err = fd2; - put_filp(newfile1); - put_unused_fd(fd1); - goto out_release_both; - } - - err = sock_attach_fd(sock1, newfile1); - if (unlikely(err < 0)) { - goto out_fd2; - } - - err = sock_attach_fd(sock2, newfile2); - if (unlikely(err < 0)) { - fput(newfile1); - goto out_fd1; - } - - err = audit_fd_pair(fd1, fd2); - if (err < 0) { - fput(newfile1); - fput(newfile2); - goto out_fd; - } - - fd_install(fd1, newfile1); - fd_install(fd2, newfile2); - /* fd1 and fd2 may be already another descriptors. - * Not kernel problem. - */ - - err = put_user(fd1, &usockvec[0]); - if (!err) - err = put_user(fd2, &usockvec[1]); - if (!err) - return 0; - - sys_close(fd2); - sys_close(fd1); - return err; - -out_release_both: - sock_release(sock2); -out_release_1: - sock_release(sock1); -out: - return err; - -out_fd2: - put_filp(newfile1); - sock_release(sock1); -out_fd1: - put_filp(newfile2); - sock_release(sock2); -out_fd: - put_unused_fd(fd1); - put_unused_fd(fd2); - goto out; -} - -/* - * Bind a name to a socket. Nothing much to do here since it's - * the protocol's responsibility to handle the local address. - * - * We move the socket address to kernel space before we call - * the protocol layer (having also checked the address is ok). - */ - -asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) -{ - struct socket *sock; - char address[MAX_SOCK_ADDR]; - int err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock) { - err = move_addr_to_kernel(umyaddr, addrlen, address); - if (err >= 0) { - err = security_socket_bind(sock, - (struct sockaddr *)address, - addrlen); - if (!err) - err = sock->ops->bind(sock, - (struct sockaddr *) - address, addrlen); - } - fput_light(sock->file, fput_needed); - } - return err; -} - -/* - * Perform a listen. Basically, we allow the protocol to do anything - * necessary for a listen, and if that works, we mark the socket as - * ready for listening. - */ - -int sysctl_somaxconn __read_mostly = SOMAXCONN; - -asmlinkage long sys_listen(int fd, int backlog) -{ - struct socket *sock; - int err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock) { - if ((unsigned)backlog > sysctl_somaxconn) - backlog = sysctl_somaxconn; - - err = security_socket_listen(sock, backlog); - if (!err) - err = sock->ops->listen(sock, backlog); - - fput_light(sock->file, fput_needed); - } - return err; -} - -/* - * For accept, we attempt to create a new socket, set up the link - * with the client, wake up the client, then return the new - * connected fd. We collect the address of the connector in kernel - * space and move it to user at the very end. This is unclean because - * we open the socket then return an error. - * - * 1003.1g adds the ability to recvmsg() to query connection pending - * status to recvmsg. We need to add that support in a way thats - * clean when we restucture accept also. - */ - -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) -{ - struct socket *sock, *newsock; - struct file *newfile; - int err, len, newfd, fput_needed; - char address[MAX_SOCK_ADDR]; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - - err = -ENFILE; - if (!(newsock = sock_alloc())) - goto out_put; - - newsock->type = sock->type; - newsock->ops = sock->ops; - - /* - * We don't need try_module_get here, as the listening socket (sock) - * has the protocol module (sock->ops->owner) held. - */ - __module_get(newsock->ops->owner); - - newfd = sock_alloc_fd(&newfile); - if (unlikely(newfd < 0)) { - err = newfd; - sock_release(newsock); - goto out_put; - } - - err = sock_attach_fd(newsock, newfile); - if (err < 0) - goto out_fd_simple; - - err = security_socket_accept(sock, newsock); - if (err) - goto out_fd; - - err = sock->ops->accept(sock, newsock, sock->file->f_flags); - if (err < 0) - goto out_fd; - - if (upeer_sockaddr) { - if (newsock->ops->getname(newsock, (struct sockaddr *)address, - &len, 2) < 0) { - err = -ECONNABORTED; - goto out_fd; - } - err = move_addr_to_user(address, len, upeer_sockaddr, - upeer_addrlen); - if (err < 0) - goto out_fd; - } - - /* File flags are not inherited via accept() unlike another OSes. */ - - fd_install(newfd, newfile); - err = newfd; - - security_socket_post_accept(sock, newsock); - -out_put: - fput_light(sock->file, fput_needed); -out: - return err; -out_fd_simple: - sock_release(newsock); - put_filp(newfile); - put_unused_fd(newfd); - goto out_put; -out_fd: - fput(newfile); - put_unused_fd(newfd); - goto out_put; -} - -/* - * Attempt to connect to a socket with the server address. The address - * is in user space so we verify it is OK and move it to kernel space. - * - * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to - * break bindings - * - * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and - * other SEQPACKET protocols that take time to connect() as it doesn't - * include the -EINPROGRESS status for such sockets. - */ - -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, - int addrlen) -{ - struct socket *sock; - char address[MAX_SOCK_ADDR]; - int err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - err = move_addr_to_kernel(uservaddr, addrlen, address); - if (err < 0) - goto out_put; - - err = - security_socket_connect(sock, (struct sockaddr *)address, addrlen); - if (err) - goto out_put; - - err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, - sock->file->f_flags); -out_put: - fput_light(sock->file, fput_needed); -out: - return err; -} - -/* - * Get the local address ('name') of a socket object. Move the obtained - * name to user space. - */ - -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) -{ - struct socket *sock; - char address[MAX_SOCK_ADDR]; - int len, err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - - err = security_socket_getsockname(sock); - if (err) - goto out_put; - - err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); - if (err) - goto out_put; - err = move_addr_to_user(address, len, usockaddr, usockaddr_len); - -out_put: - fput_light(sock->file, fput_needed); -out: - return err; -} - -/* - * Get the remote address ('name') of a socket object. Move the obtained - * name to user space. - */ - -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) -{ - struct socket *sock; - char address[MAX_SOCK_ADDR]; - int len, err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_getpeername(sock); - if (err) { - fput_light(sock->file, fput_needed); - return err; - } - - err = - sock->ops->getname(sock, (struct sockaddr *)address, &len, - 1); - if (!err) - err = move_addr_to_user(address, len, usockaddr, - usockaddr_len); - fput_light(sock->file, fput_needed); - } - return err; -} - -/* - * Send a datagram to a given address. We move the address into kernel - * space and check the user space data area is readable before invoking - * the protocol. - */ - -asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, - unsigned flags, struct sockaddr __user *addr, - int addr_len) -{ - struct socket *sock; - char address[MAX_SOCK_ADDR]; - int err; - struct msghdr msg; - struct iovec iov; - int fput_needed; - struct file *sock_file; - - sock_file = fget_light(fd, &fput_needed); - err = -EBADF; - if (!sock_file) - goto out; - - sock = sock_from_file(sock_file, &err); - if (!sock) - goto out_put; - iov.iov_base = buff; - iov.iov_len = len; - msg.msg_name = NULL; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - if (addr) { - err = move_addr_to_kernel(addr, addr_len, address); - if (err < 0) - goto out_put; - msg.msg_name = address; - msg.msg_namelen = addr_len; - } - if (sock->file->f_flags & O_NONBLOCK) - flags |= MSG_DONTWAIT; - msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg, len); - -out_put: - fput_light(sock_file, fput_needed); -out: - return err; -} - -/* - * Send a datagram down a socket. - */ - -asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) -{ - return sys_sendto(fd, buff, len, flags, NULL, 0); -} - -/* - * Receive a frame from the socket and optionally record the address of the - * sender. We verify the buffers are writable and if needed move the - * sender address from kernel to user space. - */ - -asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, - unsigned flags, struct sockaddr __user *addr, - int __user *addr_len) -{ - struct socket *sock; - struct iovec iov; - struct msghdr msg; - char address[MAX_SOCK_ADDR]; - int err, err2; - struct file *sock_file; - int fput_needed; - - sock_file = fget_light(fd, &fput_needed); - err = -EBADF; - if (!sock_file) - goto out; - - sock = sock_from_file(sock_file, &err); - if (!sock) - goto out_put; - - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_iovlen = 1; - msg.msg_iov = &iov; - iov.iov_len = size; - iov.iov_base = ubuf; - msg.msg_name = address; - msg.msg_namelen = MAX_SOCK_ADDR; - if (sock->file->f_flags & O_NONBLOCK) - flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, size, flags); - - if (err >= 0 && addr != NULL) { - err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); - if (err2 < 0) - err = err2; - } -out_put: - fput_light(sock_file, fput_needed); -out: - return err; -} - -/* - * Receive a datagram from a socket. - */ - -asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, - unsigned flags) -{ - return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); -} - -/* - * Set a socket option. Because we don't know the option lengths we have - * to pass the user mode parameter for the protocols to sort out. - */ - -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) -{ - int err, fput_needed; - struct socket *sock; - - if (optlen < 0) - return -EINVAL; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_setsockopt(sock, level, optname); - if (err) - goto out_put; - - if (level == SOL_SOCKET) - err = - sock_setsockopt(sock, level, optname, optval, - optlen); - else - err = - sock->ops->setsockopt(sock, level, optname, optval, - optlen); -out_put: - fput_light(sock->file, fput_needed); - } - return err; -} - -/* - * Get a socket option. Because we don't know the option lengths we have - * to pass a user mode parameter for the protocols to sort out. - */ - -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) -{ - int err, fput_needed; - struct socket *sock; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_getsockopt(sock, level, optname); - if (err) - goto out_put; - - if (level == SOL_SOCKET) - err = - sock_getsockopt(sock, level, optname, optval, - optlen); - else - err = - sock->ops->getsockopt(sock, level, optname, optval, - optlen); -out_put: - fput_light(sock->file, fput_needed); - } - return err; -} - -/* - * Shutdown a socket. - */ - -asmlinkage long sys_shutdown(int fd, int how) -{ - int err, fput_needed; - struct socket *sock; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_shutdown(sock, how); - if (!err) - err = sock->ops->shutdown(sock, how); - fput_light(sock->file, fput_needed); - } - return err; -} - -/* A couple of helpful macros for getting the address of the 32/64 bit - * fields which are the same type (int / unsigned) on our platforms. - */ -#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) -#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) -#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) - -/* - * BSD sendmsg interface - */ - -asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) -{ - struct compat_msghdr __user *msg_compat = - (struct compat_msghdr __user *)msg; - struct socket *sock; - char address[MAX_SOCK_ADDR]; - struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; - unsigned char ctl[sizeof(struct cmsghdr) + 20] - __attribute__ ((aligned(sizeof(__kernel_size_t)))); - /* 20 is size of ipv6_pktinfo */ - unsigned char *ctl_buf = ctl; - struct msghdr msg_sys; - int err, ctl_len, iov_size, total_len; - int fput_needed; - - err = -EFAULT; - if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(&msg_sys, msg_compat)) - return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - - /* do not move before msg_sys is valid */ - err = -EMSGSIZE; - if (msg_sys.msg_iovlen > UIO_MAXIOV) - goto out_put; - - /* Check whether to allocate the iovec area */ - err = -ENOMEM; - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); - if (msg_sys.msg_iovlen > UIO_FASTIOV) { - iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); - if (!iov) - goto out_put; - } - - /* This will also move the address data into kernel space */ - if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); - } else - err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); - if (err < 0) - goto out_freeiov; - total_len = err; - - err = -ENOBUFS; - - if (msg_sys.msg_controllen > INT_MAX) - goto out_freeiov; - ctl_len = msg_sys.msg_controllen; - if ((MSG_CMSG_COMPAT & flags) && ctl_len) { - err = - cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, - sizeof(ctl)); - if (err) - goto out_freeiov; - ctl_buf = msg_sys.msg_control; - ctl_len = msg_sys.msg_controllen; - } else if (ctl_len) { - if (ctl_len > sizeof(ctl)) { - ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); - if (ctl_buf == NULL) - goto out_freeiov; - } - err = -EFAULT; - /* - * Careful! Before this, msg_sys.msg_control contains a user pointer. - * Afterwards, it will be a kernel pointer. Thus the compiler-assisted - * checking falls down on this. - */ - if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, - ctl_len)) - goto out_freectl; - msg_sys.msg_control = ctl_buf; - } - msg_sys.msg_flags = flags; - - if (sock->file->f_flags & O_NONBLOCK) - msg_sys.msg_flags |= MSG_DONTWAIT; - err = sock_sendmsg(sock, &msg_sys, total_len); - -out_freectl: - if (ctl_buf != ctl) - sock_kfree_s(sock->sk, ctl_buf, ctl_len); -out_freeiov: - if (iov != iovstack) - sock_kfree_s(sock->sk, iov, iov_size); -out_put: - fput_light(sock->file, fput_needed); -out: - return err; -} - -/* - * BSD recvmsg interface - */ - -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, - unsigned int flags) -{ - struct compat_msghdr __user *msg_compat = - (struct compat_msghdr __user *)msg; - struct socket *sock; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct msghdr msg_sys; - unsigned long cmsg_ptr; - int err, iov_size, total_len, len; - int fput_needed; - - /* kernel mode address */ - char addr[MAX_SOCK_ADDR]; - - /* user mode address pointers */ - struct sockaddr __user *uaddr; - int __user *uaddr_len; - - if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(&msg_sys, msg_compat)) - return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - - err = -EMSGSIZE; - if (msg_sys.msg_iovlen > UIO_MAXIOV) - goto out_put; - - /* Check whether to allocate the iovec area */ - err = -ENOMEM; - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); - if (msg_sys.msg_iovlen > UIO_FASTIOV) { - iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); - if (!iov) - goto out_put; - } - - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) - */ - - uaddr = (void __user *)msg_sys.msg_name; - uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); - } else - err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); - if (err < 0) - goto out_freeiov; - total_len = err; - - cmsg_ptr = (unsigned long)msg_sys.msg_control; - msg_sys.msg_flags = 0; - if (MSG_CMSG_COMPAT & flags) - msg_sys.msg_flags = MSG_CMSG_COMPAT; - - if (sock->file->f_flags & O_NONBLOCK) - flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg_sys, total_len, flags); - if (err < 0) - goto out_freeiov; - len = err; - - if (uaddr != NULL) { - err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, - uaddr_len); - if (err < 0) - goto out_freeiov; - } - err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), - COMPAT_FLAGS(msg)); - if (err) - goto out_freeiov; - if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, - &msg_compat->msg_controllen); - else - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, - &msg->msg_controllen); - if (err) - goto out_freeiov; - err = len; - -out_freeiov: - if (iov != iovstack) - sock_kfree_s(sock->sk, iov, iov_size); -out_put: - fput_light(sock->file, fput_needed); -out: - return err; -} - -#ifdef __ARCH_WANT_SYS_SOCKETCALL - -/* Argument list sizes for sys_socketcall */ -#define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[18]={ - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) -}; - -#undef AL - -/* - * System call vectors. - * - * Argument checking cleaned up. Saved 20% in size. - * This function doesn't need to set the kernel lock because - * it is set by the callees. - */ - -asmlinkage long sys_socketcall(int call, unsigned long __user *args) -{ - unsigned long a[6]; - unsigned long a0, a1; - int err; - - if (call < 1 || call > SYS_RECVMSG) - return -EINVAL; - - /* copy_from_user should be SMP safe. */ - if (copy_from_user(a, args, nargs[call])) - return -EFAULT; - - err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); - if (err) - return err; - - a0 = a[0]; - a1 = a[1]; - - switch (call) { - case SYS_SOCKET: - err = sys_socket(a0, a1, a[2]); - break; - case SYS_BIND: - err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); - break; - case SYS_CONNECT: - err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); - break; - case SYS_LISTEN: - err = sys_listen(a0, a1); - break; - case SYS_ACCEPT: - err = - sys_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); - break; - case SYS_GETSOCKNAME: - err = - sys_getsockname(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); - break; - case SYS_GETPEERNAME: - err = - sys_getpeername(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); - break; - case SYS_SOCKETPAIR: - err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); - break; - case SYS_SEND: - err = sys_send(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_SENDTO: - err = sys_sendto(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], a[5]); - break; - case SYS_RECV: - err = sys_recv(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_RECVFROM: - err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], - (int __user *)a[5]); - break; - case SYS_SHUTDOWN: - err = sys_shutdown(a0, a1); - break; - case SYS_SETSOCKOPT: - err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); - break; - case SYS_GETSOCKOPT: - err = - sys_getsockopt(a0, a1, a[2], (char __user *)a[3], - (int __user *)a[4]); - break; - case SYS_SENDMSG: - err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); - break; - case SYS_RECVMSG: - err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); - break; - default: - err = -EINVAL; - break; - } - return err; -} - -#endif /* __ARCH_WANT_SYS_SOCKETCALL */ - -/** - * sock_register - add a socket protocol handler - * @ops: description of protocol - * - * This function is called by a protocol handler that wants to - * advertise its address family, and have it linked into the - * socket interface. The value ops->family coresponds to the - * socket system call protocol family. - */ -int sock_register(const struct net_proto_family *ops) -{ - int err; - - if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, - NPROTO); - return -ENOBUFS; - } - - spin_lock(&net_family_lock); - if (net_families[ops->family]) - err = -EEXIST; - else { - net_families[ops->family] = ops; - err = 0; - } - spin_unlock(&net_family_lock); - - printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); - return err; -} - -/** - * sock_unregister - remove a protocol handler - * @family: protocol family to remove - * - * This function is called by a protocol handler that wants to - * remove its address family, and have it unlinked from the - * new socket creation. - * - * If protocol handler is a module, then it can use module reference - * counts to protect against new references. If protocol handler is not - * a module then it needs to provide its own protection in - * the ops->create routine. - */ -void sock_unregister(int family) -{ - BUG_ON(family < 0 || family >= NPROTO); - - spin_lock(&net_family_lock); - net_families[family] = NULL; - spin_unlock(&net_family_lock); - - synchronize_rcu(); - - printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); -} - -static int __init sock_init(void) -{ - /* - * Initialize sock SLAB cache. - */ - - sk_init(); - - /* - * Initialize skbuff SLAB cache - */ - skb_init(); - - /* - * Initialize the protocols module. - */ - - init_inodecache(); - register_filesystem(&sock_fs_type); - sock_mnt = kern_mount(&sock_fs_type); - - /* The real protocol initialization is performed in later initcalls. - */ - -#ifdef CONFIG_NETFILTER - netfilter_init(); -#endif - - return 0; -} - -core_initcall(sock_init); /* early initcall */ - -#ifdef CONFIG_PROC_FS -void socket_seq_show(struct seq_file *seq) -{ - int cpu; - int counter = 0; - - for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); - - /* It can be negative, by the way. 8) */ - if (counter < 0) - counter = 0; - - seq_printf(seq, "sockets: used %d\n", counter); -} -#endif /* CONFIG_PROC_FS */ - -#ifdef CONFIG_COMPAT -static long compat_sock_ioctl(struct file *file, unsigned cmd, - unsigned long arg) -{ - struct socket *sock = file->private_data; - int ret = -ENOIOCTLCMD; - - if (sock->ops->compat_ioctl) - ret = sock->ops->compat_ioctl(sock, cmd, arg); - - return ret; -} -#endif - -int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) -{ - return sock->ops->bind(sock, addr, addrlen); -} - -int kernel_listen(struct socket *sock, int backlog) -{ - return sock->ops->listen(sock, backlog); -} - -int kernel_accept(struct socket *sock, struct socket **newsock, int flags) -{ - struct sock *sk = sock->sk; - int err; - - err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, - newsock); - if (err < 0) - goto done; - - err = sock->ops->accept(sock, *newsock, flags); - if (err < 0) { - sock_release(*newsock); - goto done; - } - - (*newsock)->ops = sock->ops; - -done: - return err; -} - -int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, - int flags) -{ - return sock->ops->connect(sock, addr, addrlen, flags); -} - -int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen) -{ - return sock->ops->getname(sock, addr, addrlen, 0); -} - -int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen) -{ - return sock->ops->getname(sock, addr, addrlen, 1); -} - -int kernel_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, optval, optlen); - else - err = sock->ops->getsockopt(sock, level, optname, optval, - optlen); - set_fs(oldfs); - return err; -} - -int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, optval, optlen); - else - err = sock->ops->setsockopt(sock, level, optname, optval, - optlen); - set_fs(oldfs); - return err; -} - -int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) -{ - if (sock->ops->sendpage) - return sock->ops->sendpage(sock, page, offset, size, flags); - - return sock_no_sendpage(sock, page, offset, size, flags); -} - -int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, cmd, arg); - set_fs(oldfs); - - return err; -} - -/* ABI emulation layers need these two */ -EXPORT_SYMBOL(move_addr_to_kernel); -EXPORT_SYMBOL(move_addr_to_user); -EXPORT_SYMBOL(sock_create); -EXPORT_SYMBOL(sock_create_kern); -EXPORT_SYMBOL(sock_create_lite); -EXPORT_SYMBOL(sock_map_fd); -EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sock_register); -EXPORT_SYMBOL(sock_release); -EXPORT_SYMBOL(sock_sendmsg); -EXPORT_SYMBOL(sock_unregister); -EXPORT_SYMBOL(sock_wake_async); -EXPORT_SYMBOL(sockfd_lookup); -EXPORT_SYMBOL(kernel_sendmsg); -EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); diff -Nurb linux-2.6.22-570/net/sunrpc/auth.c linux-2.6.22-591/net/sunrpc/auth.c --- linux-2.6.22-570/net/sunrpc/auth.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/sunrpc/auth.c 2007-12-21 15:36:12.000000000 -0500 @@ -19,12 +19,16 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { +static DEFINE_SPINLOCK(rpc_authflavor_lock); +static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ &authunix_ops, /* AUTH_UNIX */ NULL, /* others can be loadable modules */ }; +static LIST_HEAD(cred_unused); +static unsigned long number_cred_unused; + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -33,55 +37,67 @@ } int -rpcauth_register(struct rpc_authops *ops) +rpcauth_register(const struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != NULL) - return -EPERM; /* what else? */ + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == NULL) { auth_flavors[flavor] = ops; - return 0; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } int -rpcauth_unregister(struct rpc_authops *ops) +rpcauth_unregister(const struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != ops) - return -EPERM; /* what else? */ + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == ops) { auth_flavors[flavor] = NULL; - return 0; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { struct rpc_auth *auth; - struct rpc_authops *ops; + const struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); auth = ERR_PTR(-EINVAL); if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; - /* FIXME - auth_flavors[] really needs an rw lock, - * and module refcounting. */ #ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); #endif - if ((ops = auth_flavors[flavor]) == NULL) + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); goto out; + } + spin_unlock(&rpc_authflavor_lock); auth = ops->create(clnt, pseudoflavor); + module_put(ops->owner); if (IS_ERR(auth)) return auth; if (clnt->cl_auth) - rpcauth_destroy(clnt->cl_auth); + rpcauth_release(clnt->cl_auth); clnt->cl_auth = auth; out: @@ -89,7 +105,7 @@ } void -rpcauth_destroy(struct rpc_auth *auth) +rpcauth_release(struct rpc_auth *auth) { if (!atomic_dec_and_test(&auth->au_count)) return; @@ -98,11 +114,31 @@ static DEFINE_SPINLOCK(rpc_credcache_lock); +static void +rpcauth_unhash_cred_locked(struct rpc_cred *cred) +{ + hlist_del_rcu(&cred->cr_hash); + smp_mb__before_clear_bit(); + clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); +} + +static void +rpcauth_unhash_cred(struct rpc_cred *cred) +{ + spinlock_t *cache_lock; + + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) + rpcauth_unhash_cred_locked(cred); + spin_unlock(cache_lock); +} + /* * Initialize RPC credential cache */ int -rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) +rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; int i; @@ -112,8 +148,7 @@ return -ENOMEM; for (i = 0; i < RPC_CREDCACHE_NR; i++) INIT_HLIST_HEAD(&new->hashtable[i]); - new->expire = expire; - new->nextgc = jiffies + (expire >> 1); + spin_lock_init(&new->lock); auth->au_credcache = new; return 0; } @@ -122,13 +157,13 @@ * Destroy a list of credentials */ static inline -void rpcauth_destroy_credlist(struct hlist_head *head) +void rpcauth_destroy_credlist(struct list_head *head) { struct rpc_cred *cred; - while (!hlist_empty(head)) { - cred = hlist_entry(head->first, struct rpc_cred, cr_hash); - hlist_del_init(&cred->cr_hash); + while (!list_empty(head)) { + cred = list_entry(head->next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); put_rpccred(cred); } } @@ -138,58 +173,95 @@ * that are not referenced. */ void -rpcauth_free_credcache(struct rpc_auth *auth) +rpcauth_clear_credcache(struct rpc_cred_cache *cache) { - struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; + LIST_HEAD(free); + struct hlist_head *head; struct rpc_cred *cred; int i; spin_lock(&rpc_credcache_lock); + spin_lock(&cache->lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, &free); + head = &cache->hashtable[i]; + while (!hlist_empty(head)) { + cred = hlist_entry(head->first, struct rpc_cred, cr_hash); + get_rpccred(cred); + if (!list_empty(&cred->cr_lru)) { + list_del(&cred->cr_lru); + number_cred_unused--; } + list_add_tail(&cred->cr_lru, &free); + rpcauth_unhash_cred_locked(cred); } + } + spin_unlock(&cache->lock); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); } -static void -rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) +/* + * Destroy the RPC credential cache + */ +void +rpcauth_destroy_credcache(struct rpc_auth *auth) { - if (atomic_read(&cred->cr_count) != 1) - return; - if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, free); + struct rpc_cred_cache *cache = auth->au_credcache; + + if (cache) { + auth->au_credcache = NULL; + rpcauth_clear_credcache(cache); + kfree(cache); } } /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static void -rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) +static int +rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { - struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos, *next; + spinlock_t *cache_lock; struct rpc_cred *cred; - int i; - dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - rpcauth_prune_expired(auth, cred, free); + while(!list_empty(&cred_unused)) { + cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); + number_cred_unused--; + if (atomic_read(&cred->cr_count) != 0) + continue; + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) { + get_rpccred(cred); + list_add_tail(&cred->cr_lru, free); + rpcauth_unhash_cred_locked(cred); + nr_to_scan --; } + spin_unlock(cache_lock); + if (nr_to_scan == 0) + break; } - cache->nextgc = jiffies + cache->expire; + return nr_to_scan; +} + +/* + * Run memory cache shrinker. + */ +static int +rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +{ + LIST_HEAD(free); + int res; + + if (list_empty(&cred_unused)) + return 0; + spin_lock(&rpc_credcache_lock); + nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); + res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(&free); + return res; } /* @@ -199,53 +271,56 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, int flags) { + LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; - struct rpc_cred *new = NULL, - *cred = NULL; + struct hlist_node *pos; + struct rpc_cred *cred = NULL, + *entry, *new; int nr = 0; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) nr = acred->uid & RPC_CREDCACHE_MASK; -retry: - spin_lock(&rpc_credcache_lock); - if (time_before(cache->nextgc, jiffies)) - rpcauth_gc_credcache(auth, &free); - hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { - struct rpc_cred *entry; - entry = hlist_entry(pos, struct rpc_cred, cr_hash); - if (entry->cr_ops->crmatch(acred, entry, flags)) { - hlist_del(&entry->cr_hash); - cred = entry; - break; - } - rpcauth_prune_expired(auth, entry, &free); - } - if (new) { - if (cred) - hlist_add_head(&new->cr_hash, &free); - else - cred = new; + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + spin_lock(&cache->lock); + if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { + spin_unlock(&cache->lock); + continue; } - if (cred) { - hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); - get_rpccred(cred); + cred = get_rpccred(entry); + spin_unlock(&cache->lock); + break; } - spin_unlock(&rpc_credcache_lock); + rcu_read_unlock(); - rpcauth_destroy_credlist(&free); + if (cred != NULL) + goto found; - if (!cred) { new = auth->au_ops->crcreate(auth, acred, flags); - if (!IS_ERR(new)) { -#ifdef RPC_DEBUG - new->cr_magic = RPCAUTH_CRED_MAGIC; -#endif - goto retry; - } else + if (IS_ERR(new)) { + cred = new; + goto out; + } + + spin_lock(&cache->lock); + hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + cred = get_rpccred(entry); + break; + } + if (cred == NULL) { cred = new; - } else if ((cred->cr_flags & RPCAUTH_CRED_NEW) + set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); + hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); + } else + list_add_tail(&new->cr_lru, &free); + spin_unlock(&cache->lock); +found: + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL && !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); @@ -254,8 +329,9 @@ cred = ERR_PTR(res); } } - - return (struct rpc_cred *) cred; + rpcauth_destroy_credlist(&free); +out: + return cred; } struct rpc_cred * @@ -277,6 +353,23 @@ return ret; } +void +rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, + struct rpc_auth *auth, const struct rpc_credops *ops) +{ + INIT_HLIST_NODE(&cred->cr_hash); + INIT_LIST_HEAD(&cred->cr_lru); + atomic_set(&cred->cr_count, 1); + cred->cr_auth = auth; + cred->cr_ops = ops; + cred->cr_expire = jiffies; +#ifdef RPC_DEBUG + cred->cr_magic = RPCAUTH_CRED_MAGIC; +#endif + cred->cr_uid = acred->uid; +} +EXPORT_SYMBOL(rpcauth_init_cred); + struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { @@ -317,9 +410,31 @@ void put_rpccred(struct rpc_cred *cred) { - cred->cr_expire = jiffies; + /* Fast path for unhashed credentials */ + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) + goto need_lock; + if (!atomic_dec_and_test(&cred->cr_count)) return; + goto out_destroy; +need_lock: + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) + return; + if (!list_empty(&cred->cr_lru)) { + number_cred_unused--; + list_del_init(&cred->cr_lru); + } + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) + rpcauth_unhash_cred(cred); + else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + spin_unlock(&rpc_credcache_lock); + return; + } + spin_unlock(&rpc_credcache_lock); +out_destroy: cred->cr_ops->crdestroy(cred); } @@ -404,17 +519,34 @@ void rpcauth_invalcred(struct rpc_task *task) { + struct rpc_cred *cred = task->tk_msg.rpc_cred; + dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); - spin_lock(&rpc_credcache_lock); - if (task->tk_msg.rpc_cred) - task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - spin_unlock(&rpc_credcache_lock); + task->tk_pid, task->tk_auth->au_ops->au_name, cred); + if (cred) + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } int rpcauth_uptodatecred(struct rpc_task *task) { - return !(task->tk_msg.rpc_cred) || - (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + return cred == NULL || + test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; +} + + +static struct shrinker *rpc_cred_shrinker; + +void __init rpcauth_init_module(void) +{ + rpc_init_authunix(); + rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker); +} + +void __exit rpcauth_remove_module(void) +{ + if (rpc_cred_shrinker != NULL) + remove_shrinker(rpc_cred_shrinker); } diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c linux-2.6.22-591/net/sunrpc/auth_gss/auth_gss.c --- linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/auth_gss/auth_gss.c 2007-12-21 15:36:12.000000000 -0500 @@ -54,9 +54,9 @@ #include #include -static struct rpc_authops authgss_ops; +static const struct rpc_authops authgss_ops; -static struct rpc_credops gss_credops; +static const struct rpc_credops gss_credops; #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -64,7 +64,6 @@ #define NFS_NGROUPS 16 -#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ #define GSS_CRED_SLACK 1024 /* XXX: unused */ /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ @@ -85,10 +84,8 @@ struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; - struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - spinlock_t lock; }; static void gss_destroy_ctx(struct gss_cl_ctx *); @@ -116,8 +113,8 @@ write_lock(&gss_ctx_lock); old = gss_cred->gc_ctx; gss_cred->gc_ctx = ctx; - cred->cr_flags |= RPCAUTH_CRED_UPTODATE; - cred->cr_flags &= ~RPCAUTH_CRED_NEW; + set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); write_unlock(&gss_ctx_lock); if (old) gss_put_ctx(old); @@ -130,7 +127,7 @@ int res = 0; read_lock(&gss_ctx_lock); - if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) res = 1; read_unlock(&gss_ctx_lock); return res; @@ -269,10 +266,10 @@ } static struct gss_upcall_msg * -__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) +__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &gss_auth->upcalls, list) { + list_for_each_entry(pos, &rpci->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -290,24 +287,24 @@ static inline struct gss_upcall_msg * gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) { + struct inode *inode = gss_auth->dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *old; - spin_lock(&gss_auth->lock); - old = __gss_find_upcall(gss_auth, gss_msg->uid); + spin_lock(&inode->i_lock); + old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &gss_auth->upcalls); + list_add(&gss_msg->list, &rpci->in_downcall); } else gss_msg = old; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); return gss_msg; } static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - if (list_empty(&gss_msg->list)) - return; list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); @@ -318,10 +315,14 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg) { struct gss_auth *gss_auth = gss_msg->auth; + struct inode *inode = gss_auth->dentry->d_inode; - spin_lock(&gss_auth->lock); + if (list_empty(&gss_msg->list)) + return; + spin_lock(&inode->i_lock); + if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); } static void @@ -330,16 +331,16 @@ struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; + struct inode *inode = gss_msg->auth->dentry->d_inode; - BUG_ON(gss_msg == NULL); if (gss_msg->ctx) gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); else task->tk_status = gss_msg->msg.errno; - spin_lock(&gss_msg->auth->lock); + spin_lock(&inode->i_lock); gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); - spin_unlock(&gss_msg->auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); } @@ -386,11 +387,12 @@ gss_refresh_upcall(struct rpc_task *task) { struct rpc_cred *cred = task->tk_msg.rpc_cred; - struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; + struct inode *inode = gss_auth->dentry->d_inode; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -400,7 +402,7 @@ err = PTR_ERR(gss_msg); goto out; } - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -411,7 +413,7 @@ rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); } else err = gss_msg->msg.errno; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -422,6 +424,7 @@ static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { + struct inode *inode = gss_auth->dentry->d_inode; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -435,12 +438,12 @@ } for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); break; } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); if (signalled()) { err = -ERESTARTSYS; goto out_intr; @@ -489,12 +492,11 @@ const void *p, *end; void *buf; struct rpc_clnt *clnt; - struct gss_auth *gss_auth; - struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; + struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; uid_t uid; - int err = -EFBIG; + ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) goto out; @@ -503,7 +505,7 @@ if (!buf) goto out; - clnt = RPC_I(filp->f_path.dentry->d_inode)->private; + clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -519,43 +521,38 @@ ctx = gss_alloc_context(); if (ctx == NULL) goto err; - err = 0; - gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); - p = gss_fill_context(p, end, ctx, gss_auth->mech); + + err = -ENOENT; + /* Find a matching upcall */ + spin_lock(&inode->i_lock); + gss_msg = __gss_find_upcall(RPC_I(inode), uid); + if (gss_msg == NULL) { + spin_unlock(&inode->i_lock); + goto err_put_ctx; + } + list_del_init(&gss_msg->list); + spin_unlock(&inode->i_lock); + + p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); - if (err != -EACCES) - goto err_put_ctx; + gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; + goto err_release_msg; } - spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - if (gss_msg) { - if (err == 0 && gss_msg->ctx == NULL) gss_msg->ctx = gss_get_ctx(ctx); - gss_msg->msg.errno = err; + err = mlen; + +err_release_msg: + spin_lock(&inode->i_lock); __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - } else { - struct auth_cred acred = { .uid = uid }; - spin_unlock(&gss_auth->lock); - cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW); - if (IS_ERR(cred)) { - err = PTR_ERR(cred); - goto err_put_ctx; - } - gss_cred_set_ctx(cred, gss_get_ctx(ctx)); - } - gss_put_ctx(ctx); - kfree(buf); - dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); - return mlen; err_put_ctx: gss_put_ctx(ctx); err: kfree(buf); out: - dprintk("RPC: gss_pipe_downcall returning %d\n", err); + dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); return err; } @@ -563,27 +560,21 @@ gss_pipe_release(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_clnt *clnt; - struct rpc_auth *auth; - struct gss_auth *gss_auth; - - clnt = rpci->private; - auth = clnt->cl_auth; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - spin_lock(&gss_auth->lock); - while (!list_empty(&gss_auth->upcalls)) { struct gss_upcall_msg *gss_msg; - gss_msg = list_entry(gss_auth->upcalls.next, + spin_lock(&inode->i_lock); + while (!list_empty(&rpci->in_downcall)) { + + gss_msg = list_entry(rpci->in_downcall.next, struct gss_upcall_msg, list); gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); } static void @@ -637,8 +628,6 @@ gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; - INIT_LIST_HEAD(&gss_auth->upcalls); - spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; @@ -646,10 +635,6 @@ auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); - err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); - if (err) - goto err_put_mech; - gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { @@ -657,7 +642,13 @@ goto err_put_mech; } + err = rpcauth_init_credcache(auth); + if (err) + goto err_unlink_pipe; + return auth; +err_unlink_pipe: + rpc_unlink(gss_auth->dentry); err_put_mech: gss_mech_put(gss_auth->mech); err_free: @@ -675,12 +666,13 @@ dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); + rpcauth_destroy_credcache(auth); + gss_auth = container_of(auth, struct gss_auth, rpc_auth); rpc_unlink(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); - rpcauth_free_credcache(auth); kfree(gss_auth); module_put(THIS_MODULE); } @@ -701,17 +693,27 @@ } static void -gss_destroy_cred(struct rpc_cred *rc) +gss_free_cred(struct gss_cred *cred) { - struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); - - dprintk("RPC: gss_destroy_cred \n"); - + dprintk("RPC: gss_free_cred %p\n", cred); if (cred->gc_ctx) gss_put_ctx(cred->gc_ctx); kfree(cred); } +static void +gss_free_cred_callback(struct rcu_head *head) +{ + struct gss_cred *cred = container_of(head, struct gss_cred, gc_base.cr_rcu); + gss_free_cred(cred); +} + +static void +gss_destroy_cred(struct rpc_cred *rc) +{ + call_rcu(&rc->cr_rcu, gss_free_cred_callback); +} + /* * Lookup RPCSEC_GSS cred for the current process */ @@ -734,15 +736,12 @@ if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) goto out_err; - atomic_set(&cred->gc_count, 1); - cred->gc_uid = acred->uid; + rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); /* * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ - cred->gc_flags = 0; - cred->gc_base.cr_ops = &gss_credops; - cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; + cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; return &cred->gc_base; @@ -774,7 +773,7 @@ * we don't really care if the credential has expired or not, * since the caller should be prepared to reinitialise it. */ - if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW)) + if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) @@ -830,7 +829,7 @@ mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } else if (maj_stat != 0) { printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; @@ -883,7 +882,7 @@ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) goto out_bad; /* We leave it to unwrap to calculate au_rslack. For now we just @@ -937,7 +936,7 @@ maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; q = xdr_encode_opaque(p, NULL, mic.len); @@ -1036,7 +1035,7 @@ /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; @@ -1123,7 +1122,7 @@ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; return 0; @@ -1148,7 +1147,7 @@ maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; if (ntohl(*(*p)++) != rqstp->rq_seqno) @@ -1199,7 +1198,7 @@ return status; } -static struct rpc_authops authgss_ops = { +static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, #ifdef RPC_DEBUG @@ -1211,7 +1210,7 @@ .crcreate = gss_create_cred }; -static struct rpc_credops gss_credops = { +static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c linux-2.6.22-591/net/sunrpc/auth_gss/gss_krb5_mech.c --- linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/auth_gss/gss_krb5_mech.c 2007-12-21 15:36:12.000000000 -0500 @@ -201,7 +201,7 @@ kfree(kctx); } -static struct gss_api_ops gss_kerberos_ops = { +static const struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c linux-2.6.22-591/net/sunrpc/auth_gss/gss_spkm3_mech.c --- linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/auth_gss/gss_spkm3_mech.c 2007-12-21 15:36:12.000000000 -0500 @@ -202,7 +202,7 @@ return err; } -static struct gss_api_ops gss_spkm3_ops = { +static const struct gss_api_ops gss_spkm3_ops = { .gss_import_sec_context = gss_import_sec_context_spkm3, .gss_get_mic = gss_get_mic_spkm3, .gss_verify_mic = gss_verify_mic_spkm3, diff -Nurb linux-2.6.22-570/net/sunrpc/auth_null.c linux-2.6.22-591/net/sunrpc/auth_null.c --- linux-2.6.22-570/net/sunrpc/auth_null.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/auth_null.c 2007-12-21 15:36:12.000000000 -0500 @@ -76,7 +76,7 @@ static int nul_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -101,7 +101,7 @@ return p; } -struct rpc_authops authnull_ops = { +const struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, #ifdef RPC_DEBUG @@ -122,7 +122,7 @@ }; static -struct rpc_credops null_credops = { +const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, .crmatch = nul_match, @@ -133,9 +133,11 @@ static struct rpc_cred null_cred = { + .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru), + .cr_auth = &null_auth, .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), - .cr_flags = RPCAUTH_CRED_UPTODATE, + .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, #ifdef RPC_DEBUG .cr_magic = RPCAUTH_CRED_MAGIC, #endif diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c linux-2.6.22-591/net/sunrpc/auth_unix.c --- linux-2.6.22-570/net/sunrpc/auth_unix.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/sunrpc/auth_unix.c 2007-12-23 02:13:00.000000000 -0500 @@ -22,11 +22,6 @@ gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid -#define uc_count uc_base.cr_count -#define uc_flags uc_base.cr_flags -#define uc_expire uc_base.cr_expire - -#define UNX_CRED_EXPIRE (60 * HZ) #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) @@ -36,15 +31,14 @@ static struct rpc_auth unix_auth; static struct rpc_cred_cache unix_cred_cache; -static struct rpc_credops unix_credops; +static const struct rpc_credops unix_credops; static struct rpc_auth * unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - if (atomic_inc_return(&unix_auth.au_count) == 0) - unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); + atomic_inc(&unix_auth.au_count); return &unix_auth; } @@ -52,7 +46,7 @@ unx_destroy(struct rpc_auth *auth) { dprintk("RPC: destroying UNIX authenticator %p\n", auth); - rpcauth_free_credcache(auth); + rpcauth_clear_credcache(auth->au_credcache); } /* @@ -76,8 +70,8 @@ if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) return ERR_PTR(-ENOMEM); - atomic_set(&cred->uc_count, 1); - cred->uc_flags = RPCAUTH_CRED_UPTODATE; + rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops); + cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; @@ -88,7 +82,6 @@ if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; - cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; cred->uc_tag = acred->tag; for (i = 0; i < groups; i++) @@ -96,17 +89,31 @@ if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; } - cred->uc_base.cr_ops = &unix_credops; - return (struct rpc_cred *) cred; + return &cred->uc_base; } static void -unx_destroy_cred(struct rpc_cred *cred) + unx_free_cred(struct unx_cred *cred) { + dprintk("RPC: unx_free_cred %p\n", cred); kfree(cred); } +static void +unx_free_cred_callback(struct rcu_head *head) +{ + struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu); + unx_free_cred(cred); +} + +static void +unx_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, unx_free_cred_callback); +} + + /* * Match credentials against current process creds. * The root_override argument takes care of cases where the caller may @@ -115,7 +122,7 @@ static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { - struct unx_cred *cred = (struct unx_cred *) rcred; + struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); int i; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { @@ -147,7 +154,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; + struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i, tag; @@ -159,7 +166,6 @@ * Copy the UTS nodename captured when the client was created. */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); - tag = task->tk_client->cl_tag; *p++ = htonl((u32) TAGINO_UID(tag, cred->uc_uid, cred->uc_tag)); @@ -183,7 +189,7 @@ static int unx_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -212,7 +218,12 @@ return p; } -struct rpc_authops authunix_ops = { +void __init rpc_init_authunix(void) +{ + spin_lock_init(&unix_cred_cache.lock); +} + +const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, #ifdef RPC_DEBUG @@ -226,7 +237,6 @@ static struct rpc_cred_cache unix_cred_cache = { - .expire = UNX_CRED_EXPIRE, }; static @@ -240,7 +250,7 @@ }; static -struct rpc_credops unix_credops = { +const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, .crmatch = unx_match, diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c.orig linux-2.6.22-591/net/sunrpc/auth_unix.c.orig --- linux-2.6.22-570/net/sunrpc/auth_unix.c.orig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/net/sunrpc/auth_unix.c.orig 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,261 @@ +/* + * linux/net/sunrpc/auth_unix.c + * + * UNIX-style authentication; no AUTH_SHORT support + * + * Copyright (C) 1996, Olaf Kirch + */ + +#include +#include +#include +#include +#include +#include + +#define NFS_NGROUPS 16 + +struct unx_cred { + struct rpc_cred uc_base; + gid_t uc_gid; + tag_t uc_tag; + gid_t uc_gids[NFS_NGROUPS]; +}; +#define uc_uid uc_base.cr_uid + +#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +static struct rpc_auth unix_auth; +static struct rpc_cred_cache unix_cred_cache; +static const struct rpc_credops unix_credops; + +static struct rpc_auth * +unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) +{ + dprintk("RPC: creating UNIX authenticator for client %p\n", + clnt); + atomic_inc(&unix_auth.au_count); + return &unix_auth; +} + +static void +unx_destroy(struct rpc_auth *auth) +{ + dprintk("RPC: destroying UNIX authenticator %p\n", auth); + rpcauth_clear_credcache(auth->au_credcache); +} + +/* + * Lookup AUTH_UNIX creds for current process + */ +static struct rpc_cred * +unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + return rpcauth_lookup_credcache(auth, acred, flags); +} + +static struct rpc_cred * +unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + struct unx_cred *cred; + int i; + + dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", + acred->uid, acred->gid); + + if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + + rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops); + cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { + cred->uc_uid = 0; + cred->uc_gid = 0; + cred->uc_tag = dx_current_tag(); + cred->uc_gids[0] = NOGROUP; + } else { + int groups = acred->group_info->ngroups; + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + + cred->uc_gid = acred->gid; + cred->uc_tag = acred->tag; + for (i = 0; i < groups; i++) + cred->uc_gids[i] = GROUP_AT(acred->group_info, i); + if (i < NFS_NGROUPS) + cred->uc_gids[i] = NOGROUP; + } + + return &cred->uc_base; +} + +static void + unx_free_cred(struct unx_cred *cred) +{ + dprintk("RPC: unx_free_cred %p\n", cred); + kfree(cred); +} + +static void +unx_free_cred_callback(struct rcu_head *head) +{ + struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu); + unx_free_cred(cred); +} + +static void +unx_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, unx_free_cred_callback); +} + + +/* + * Match credentials against current process creds. + * The root_override argument takes care of cases where the caller may + * request root creds (e.g. for NFS swapping). + */ +static int +unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) +{ + struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); + int i; + + if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { + int groups; + + if (cred->uc_uid != acred->uid + || cred->uc_gid != acred->gid + || cred->uc_tag != acred->tag) + return 0; + + groups = acred->group_info->ngroups; + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + for (i = 0; i < groups ; i++) + if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) + return 0; + return 1; + } + return (cred->uc_uid == 0 + && cred->uc_gid == 0 + && cred->uc_gids[0] == (gid_t) NOGROUP); +} + +/* + * Marshal credentials. + * Maybe we should keep a cached credential for performance reasons. + */ +static __be32 * +unx_marshal(struct rpc_task *task, __be32 *p) +{ + struct rpc_clnt *clnt = task->tk_client; + struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); + __be32 *base, *hold; + int i, tag; + + *p++ = htonl(RPC_AUTH_UNIX); + base = p++; + *p++ = htonl(jiffies/HZ); + + /* + * Copy the UTS nodename captured when the client was created. + */ + p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); + tag = task->tk_client->cl_tag; + + *p++ = htonl((u32) TAGINO_UID(tag, + cred->uc_uid, cred->uc_tag)); + *p++ = htonl((u32) TAGINO_GID(tag, + cred->uc_gid, cred->uc_tag)); + hold = p++; + for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) + *p++ = htonl((u32) cred->uc_gids[i]); + *hold = htonl(p - hold - 1); /* gid array length */ + *base = htonl((p - base - 1) << 2); /* cred length */ + + *p++ = htonl(RPC_AUTH_NULL); + *p++ = htonl(0); + + return p; +} + +/* + * Refresh credentials. This is a no-op for AUTH_UNIX + */ +static int +unx_refresh(struct rpc_task *task) +{ + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + return 0; +} + +static __be32 * +unx_validate(struct rpc_task *task, __be32 *p) +{ + rpc_authflavor_t flavor; + u32 size; + + flavor = ntohl(*p++); + if (flavor != RPC_AUTH_NULL && + flavor != RPC_AUTH_UNIX && + flavor != RPC_AUTH_SHORT) { + printk("RPC: bad verf flavor: %u\n", flavor); + return NULL; + } + + size = ntohl(*p++); + if (size > RPC_MAX_AUTH_SIZE) { + printk("RPC: giant verf size: %u\n", size); + return NULL; + } + task->tk_auth->au_rslack = (size >> 2) + 2; + p += (size >> 2); + + return p; +} + +void __init rpc_init_authunix(void) +{ + spin_lock_init(&unix_cred_cache.lock); +} + +const struct rpc_authops authunix_ops = { + .owner = THIS_MODULE, + .au_flavor = RPC_AUTH_UNIX, +#ifdef RPC_DEBUG + .au_name = "UNIX", +#endif + .create = unx_create, + .destroy = unx_destroy, + .lookup_cred = unx_lookup_cred, + .crcreate = unx_create_cred, +}; + +static +struct rpc_cred_cache unix_cred_cache = { +}; + +static +struct rpc_auth unix_auth = { + .au_cslack = UNX_WRITESLACK, + .au_rslack = 2, /* assume AUTH_NULL verf */ + .au_ops = &authunix_ops, + .au_flavor = RPC_AUTH_UNIX, + .au_count = ATOMIC_INIT(0), + .au_credcache = &unix_cred_cache, +}; + +static +const struct rpc_credops unix_credops = { + .cr_name = "AUTH_UNIX", + .crdestroy = unx_destroy_cred, + .crmatch = unx_match, + .crmarshal = unx_marshal, + .crrefresh = unx_refresh, + .crvalidate = unx_validate, +}; diff -Nurb linux-2.6.22-570/net/sunrpc/clnt.c linux-2.6.22-591/net/sunrpc/clnt.c --- linux-2.6.22-570/net/sunrpc/clnt.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/sunrpc/clnt.c 2007-12-21 15:36:12.000000000 -0500 @@ -45,6 +45,12 @@ dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ __FUNCTION__, t->tk_status) +/* + * All RPC clients are linked into this list + */ +static LIST_HEAD(all_clients); +static DEFINE_SPINLOCK(rpc_client_lock); + static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -67,6 +73,21 @@ static __be32 * call_header(struct rpc_task *task); static __be32 * call_verify(struct rpc_task *task); +static int rpc_ping(struct rpc_clnt *clnt, int flags); + +static void rpc_register_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_add(&clnt->cl_clients, &all_clients); + spin_unlock(&rpc_client_lock); +} + +static void rpc_unregister_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_del(&clnt->cl_clients); + spin_unlock(&rpc_client_lock); +} static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) @@ -112,6 +133,9 @@ dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); + err = rpciod_up(); + if (err) + goto out_no_rpciod; err = -EINVAL; if (!xprt) goto out_no_xprt; @@ -122,8 +146,6 @@ clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - atomic_set(&clnt->cl_users, 0); - atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; @@ -149,6 +171,8 @@ if (clnt->cl_metrics == NULL) goto out_no_stats; clnt->cl_program = program; + INIT_LIST_HEAD(&clnt->cl_tasks); + spin_lock_init(&clnt->cl_lock); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -156,6 +180,8 @@ clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + kref_init(&clnt->cl_kref); + err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) goto out_no_path; @@ -173,6 +199,7 @@ if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); + rpc_register_client(clnt); return clnt; out_no_auth: @@ -189,6 +216,8 @@ out_err: xprt_put(xprt); out_no_xprt: + rpciod_down(); +out_no_rpciod: return ERR_PTR(err); } @@ -246,8 +275,6 @@ clnt->cl_intr = 1; if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_ONESHOT) - clnt->cl_oneshot = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; /* TODO: handle RPC_CLNT_CREATE_TAGGED @@ -271,24 +298,25 @@ new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; - atomic_set(&new->cl_count, 1); - atomic_set(&new->cl_users, 0); + new->cl_parent = clnt; + /* Turn off autobind on clones */ + new->cl_autobind = 0; + INIT_LIST_HEAD(&new->cl_tasks); + spin_lock_init(&new->cl_lock); + rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; + kref_init(&new->cl_kref); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; - new->cl_parent = clnt; - atomic_inc(&clnt->cl_count); - new->cl_xprt = xprt_get(clnt->cl_xprt); - /* Turn off autobind on clones */ - new->cl_autobind = 0; - new->cl_oneshot = 0; - new->cl_dead = 0; - rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + xprt_get(clnt->cl_xprt); + kref_get(&clnt->cl_kref); + rpc_register_client(new); + rpciod_up(); return new; out_no_path: rpc_free_iostats(new->cl_metrics); @@ -301,52 +329,34 @@ /* * Properly shut down an RPC client, terminating all outstanding - * requests. Note that we must be certain that cl_oneshot and - * cl_dead are cleared, or else the client would be destroyed - * when the last task releases it. + * requests. */ -int -rpc_shutdown_client(struct rpc_clnt *clnt) +void rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s, tasks=%d\n", - clnt->cl_protname, clnt->cl_server, - atomic_read(&clnt->cl_users)); - - while (atomic_read(&clnt->cl_users) > 0) { - /* Don't let rpc_release_client destroy us */ - clnt->cl_oneshot = 0; - clnt->cl_dead = 0; + dprintk("RPC: shutting down %s client for %s\n", + clnt->cl_protname, clnt->cl_server); + + while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, - !atomic_read(&clnt->cl_users), 1*HZ); - } - - if (atomic_read(&clnt->cl_users) < 0) { - printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", - clnt, atomic_read(&clnt->cl_users)); -#ifdef RPC_DEBUG - rpc_show_tasks(); -#endif - BUG(); + list_empty(&clnt->cl_tasks), 1*HZ); } - return rpc_destroy_client(clnt); + rpc_release_client(clnt); } /* - * Delete an RPC client + * Free an RPC client */ -int -rpc_destroy_client(struct rpc_clnt *clnt) +static void +rpc_free_client(struct kref *kref) { - if (!atomic_dec_and_test(&clnt->cl_count)) - return 1; - BUG_ON(atomic_read(&clnt->cl_users) != 0); + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (clnt->cl_auth) { - rpcauth_destroy(clnt->cl_auth); + rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; } if (!IS_ERR(clnt->cl_dentry)) { @@ -354,33 +364,31 @@ rpc_put_mount(); } if (clnt->cl_parent != clnt) { - rpc_destroy_client(clnt->cl_parent); + rpc_release_client(clnt->cl_parent); goto out_free; } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); + rpciod_down(); kfree(clnt); - return 0; } /* - * Release an RPC client + * Release reference to the RPC client */ void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p, %d)\n", - clnt, atomic_read(&clnt->cl_users)); + dprintk("RPC: rpc_release_client(%p)\n", clnt); - if (!atomic_dec_and_test(&clnt->cl_users)) - return; + if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - if (clnt->cl_oneshot || clnt->cl_dead) - rpc_destroy_client(clnt); + kref_put(&clnt->cl_kref, rpc_free_client); } /** @@ -471,82 +479,96 @@ rpc_restore_sigmask(oldset); } -/* - * New rpc_call implementation - */ -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +static +struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt, + struct rpc_message *msg, + int flags, + const struct rpc_call_ops *ops, + void *data) { - struct rpc_task *task; + struct rpc_task *task, *ret; sigset_t oldset; - int status; - - /* If this client is slain all further I/O fails */ - if (clnt->cl_dead) - return -EIO; - - BUG_ON(flags & RPC_TASK_ASYNC); - task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); - if (task == NULL) - return -ENOMEM; + task = rpc_new_task(clnt, flags, ops, data); + if (task == NULL) { + rpc_release_calldata(ops, data); + return ERR_PTR(-ENOMEM); + } - /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ + /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ rpc_task_sigmask(task, &oldset); - - /* Set up the call info struct and execute the task */ + if (msg != NULL) { rpc_call_setup(task, msg, 0); - if (task->tk_status == 0) { + if (task->tk_status != 0) { + ret = ERR_PTR(task->tk_status); + rpc_put_task(task); + goto out; + } + } atomic_inc(&task->tk_count); rpc_execute(task); - } + ret = task; +out: + rpc_restore_sigmask(&oldset); + return ret; +} + +/** + * rpc_call_sync - Perform a synchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags + */ +int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +{ + struct rpc_task *task; + int status; + + BUG_ON(flags & RPC_TASK_ASYNC); + + task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); status = task->tk_status; rpc_put_task(task); - rpc_restore_sigmask(&oldset); return status; } -/* - * New rpc_call implementation +/** + * rpc_call_async - Perform an asynchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags + * @ops: RPC call ops + * @data: user call data */ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; - sigset_t oldset; - int status; - /* If this client is slain all further I/O fails */ - status = -EIO; - if (clnt->cl_dead) - goto out_release; - - flags |= RPC_TASK_ASYNC; - - /* Create/initialize a new RPC task */ - status = -ENOMEM; - if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) - goto out_release; - - /* Mask signals on GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - - rpc_call_setup(task, msg, 0); - - /* Set up the call info struct and execute the task */ - status = task->tk_status; - if (status == 0) - rpc_execute(task); - else + task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); + if (IS_ERR(task)) + return PTR_ERR(task); rpc_put_task(task); - - rpc_restore_sigmask(&oldset); - return status; -out_release: - rpc_release_calldata(tk_ops, data); - return status; + return 0; } +/** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @clnt: pointer to RPC client + * @flags: RPC flags + * @ops: RPC call ops + * @data: user call data + */ +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *tk_ops, + void *data) +{ + return rpc_do_run_task(clnt, NULL, flags, tk_ops, data); +} +EXPORT_SYMBOL(rpc_run_task); void rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) @@ -1424,7 +1446,7 @@ .p_decode = rpcproc_decode_null, }; -int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt, int flags) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, @@ -1435,3 +1457,51 @@ put_rpccred(msg.rpc_cred); return err; } + +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, + .rpc_cred = cred, + }; + return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); +} +EXPORT_SYMBOL(rpc_call_null); + +#ifdef RPC_DEBUG +void rpc_show_tasks(void) +{ + struct rpc_clnt *clnt; + struct rpc_task *t; + + spin_lock(&rpc_client_lock); + if (list_empty(&all_clients)) + goto out; + printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " + "-rpcwait -action- ---ops--\n"); + list_for_each_entry(clnt, &all_clients, cl_clients) { + if (list_empty(&clnt->cl_tasks)) + continue; + spin_lock(&clnt->cl_lock); + list_for_each_entry(t, &clnt->cl_tasks, tk_task) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + + printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + t->tk_pid, + (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_flags, t->tk_status, + t->tk_client, + (t->tk_client ? t->tk_client->cl_prog : 0), + t->tk_rqstp, t->tk_timeout, + rpc_waitq, + t->tk_action, t->tk_ops); + } + spin_unlock(&clnt->cl_lock); + } +out: + spin_unlock(&rpc_client_lock); +} +#endif diff -Nurb linux-2.6.22-570/net/sunrpc/rpc_pipe.c linux-2.6.22-591/net/sunrpc/rpc_pipe.c --- linux-2.6.22-570/net/sunrpc/rpc_pipe.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/rpc_pipe.c 2007-12-21 15:36:12.000000000 -0500 @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -344,7 +344,7 @@ mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); m->private = clnt; } else { single_release(inode, file); @@ -448,6 +448,15 @@ simple_release_fs(&rpc_mount, &rpc_mount_count); } +static int rpc_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations rpc_dentry_operations = { + .d_delete = rpc_delete_dentry, +}; + static int rpc_lookup_parent(char *path, struct nameidata *nd) { @@ -506,7 +515,7 @@ * FIXME: This probably has races. */ static void -rpc_depopulate(struct dentry *parent) +rpc_depopulate(struct dentry *parent, int start, int eof) { struct inode *dir = parent->d_inode; struct list_head *pos, *next; @@ -518,6 +527,10 @@ spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { dentry = list_entry(pos, struct dentry, d_u.d_child); + if (!dentry->d_inode || + dentry->d_inode->i_ino < start || + dentry->d_inode->i_ino >= eof) + continue; spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); @@ -533,11 +546,11 @@ if (n) { do { dentry = dvec[--n]; - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); + if (S_ISREG(dentry->d_inode->i_mode)) simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + else if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(dir, dentry); + d_delete(dentry); dput(dentry); } while (n); goto repeat; @@ -560,6 +573,7 @@ dentry = d_alloc_name(parent, files[i].name); if (!dentry) goto out_bad; + dentry->d_op = &rpc_dentry_operations; mode = files[i].mode; inode = rpc_get_inode(dir->i_sb, mode); if (!inode) { @@ -574,6 +588,7 @@ if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); + fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -595,7 +610,7 @@ inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -607,21 +622,14 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) { int error; - - shrink_dcache_parent(dentry); - if (d_unhashed(dentry)) - return 0; - if ((error = simple_rmdir(dir, dentry)) != 0) + error = simple_rmdir(dir, dentry); + if (!error) + d_delete(dentry); return error; - if (!error) { - inode_dir_notify(dir, DN_DELETE); - d_drop(dentry); - } - return 0; } static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len) +rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) { struct inode *dir = parent->d_inode; struct dentry *dentry; @@ -630,7 +638,9 @@ dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; - if (dentry->d_inode) { + if (!dentry->d_inode) + dentry->d_op = &rpc_dentry_operations; + else if (exclusive) { dput(dentry); dentry = ERR_PTR(-EEXIST); goto out_err; @@ -649,7 +659,7 @@ if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); - dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1); if (IS_ERR(dentry)) rpc_release_path(nd); return dentry; @@ -681,7 +691,7 @@ rpc_release_path(&nd); return dentry; err_depopulate: - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); __rpc_rmdir(dir, dentry); err_dput: dput(dentry); @@ -701,7 +711,7 @@ parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); error = __rpc_rmdir(dir, dentry); dput(dentry); mutex_unlock(&dir->i_mutex); @@ -716,10 +726,21 @@ struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_create(parent, name, strlen(name)); + dentry = rpc_lookup_create(parent, name, strlen(name), 0); if (IS_ERR(dentry)) return dentry; dir = parent->d_inode; + if (dentry->d_inode) { + rpci = RPC_I(dentry->d_inode); + if (rpci->private != private || + rpci->ops != ops || + rpci->flags != flags) { + dput (dentry); + dentry = ERR_PTR(-EBUSY); + } + rpci->nkern_readwriters++; + goto out; + } inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; @@ -730,7 +751,8 @@ rpci->private = private; rpci->flags = flags; rpci->ops = ops; - inode_dir_notify(dir, DN_CREATE); + rpci->nkern_readwriters = 1; + fsnotify_create(dir, dentry); dget(dentry); out: mutex_unlock(&dir->i_mutex); @@ -754,13 +776,11 @@ parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (!d_unhashed(dentry)) { - d_drop(dentry); - if (dentry->d_inode) { + if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { rpc_close_pipes(dentry->d_inode); error = simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + if (!error) + d_delete(dentry); } dput(dentry); mutex_unlock(&dir->i_mutex); @@ -833,6 +853,7 @@ rpci->nreaders = 0; rpci->nwriters = 0; INIT_LIST_HEAD(&rpci->in_upcall); + INIT_LIST_HEAD(&rpci->in_downcall); INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); diff -Nurb linux-2.6.22-570/net/sunrpc/rpcb_clnt.c linux-2.6.22-591/net/sunrpc/rpcb_clnt.c --- linux-2.6.22-570/net/sunrpc/rpcb_clnt.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/rpcb_clnt.c 2007-12-21 15:36:12.000000000 -0500 @@ -184,8 +184,7 @@ .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), + .flags = RPC_CLNT_CREATE_NOPING, }; ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); @@ -238,6 +237,7 @@ error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -286,6 +286,7 @@ return PTR_ERR(rpcb_clnt); status = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (status >= 0) { if (map.r_port != 0) @@ -379,6 +380,7 @@ } child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", diff -Nurb linux-2.6.22-570/net/sunrpc/sched.c linux-2.6.22-591/net/sunrpc/sched.c --- linux-2.6.22-570/net/sunrpc/sched.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/sched.c 2007-12-21 15:36:12.000000000 -0500 @@ -25,7 +25,6 @@ #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED #define RPC_TASK_MAGIC_ID 0xf00baa -static int rpc_task_id; #endif /* @@ -40,7 +39,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); -static void rpciod_killall(void); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); @@ -50,23 +48,13 @@ static RPC_WAITQ(delay_queue, "delayq"); /* - * All RPC tasks are linked into this list - */ -static LIST_HEAD(all_tasks); - -/* * rpciod-related stuff */ static DEFINE_MUTEX(rpciod_mutex); -static unsigned int rpciod_users; +static atomic_t rpciod_users = ATOMIC_INIT(0); struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for other critical sections of code. - */ -static DEFINE_SPINLOCK(rpc_sched_lock); - -/* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). @@ -267,18 +255,33 @@ return 0; } +#ifdef RPC_DEBUG +static void rpc_task_set_debuginfo(struct rpc_task *task) +{ + static atomic_t rpc_pid; + + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = atomic_inc_return(&rpc_pid); +} +#else +static inline void rpc_task_set_debuginfo(struct rpc_task *task) +{ +} +#endif + static void rpc_set_active(struct rpc_task *task) { + struct rpc_clnt *clnt; if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) return; - spin_lock(&rpc_sched_lock); -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - task->tk_pid = rpc_task_id++; -#endif + rpc_task_set_debuginfo(task); /* Add to global list of all tasks */ - list_add_tail(&task->tk_task, &all_tasks); - spin_unlock(&rpc_sched_lock); + clnt = task->tk_client; + if (clnt != NULL) { + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } } /* @@ -818,6 +821,7 @@ if (tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; task->tk_calldata = calldata; + INIT_LIST_HEAD(&task->tk_task); /* Initialize retry counters */ task->tk_garb_retry = 2; @@ -830,7 +834,7 @@ task->tk_workqueue = rpciod_workqueue; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) @@ -860,9 +864,7 @@ } /* - * Create a new task for the specified client. We have to - * clean up after an allocation failure, as the client may - * have specified "oneshot". + * Create a new task for the specified client. */ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { @@ -870,7 +872,7 @@ task = rpc_alloc_task(); if (!task) - goto cleanup; + goto out; rpc_init_task(task, clnt, flags, tk_ops, calldata); @@ -878,16 +880,6 @@ task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; - -cleanup: - /* Check whether to release the client */ - if (clnt) { - printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - atomic_read(&clnt->cl_users), clnt->cl_oneshot); - atomic_inc(&clnt->cl_users); /* pretend we were used ... */ - rpc_release_client(clnt); - } - goto out; } @@ -920,11 +912,13 @@ #endif dprintk("RPC: %5u release task\n", task->tk_pid); - /* Remove from global task list */ - spin_lock(&rpc_sched_lock); + if (!list_empty(&task->tk_task)) { + struct rpc_clnt *clnt = task->tk_client; + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); list_del(&task->tk_task); - spin_unlock(&rpc_sched_lock); - + spin_unlock(&clnt->cl_lock); + } BUG_ON (RPC_IS_QUEUED(task)); /* Synchronously delete any running timer */ @@ -939,29 +933,6 @@ rpc_put_task(task); } -/** - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it - * @clnt: pointer to RPC client - * @flags: RPC flags - * @ops: RPC call ops - * @data: user call data - */ -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *ops, - void *data) -{ - struct rpc_task *task; - task = rpc_new_task(clnt, flags, ops, data); - if (task == NULL) { - rpc_release_calldata(ops, data); - return ERR_PTR(-ENOMEM); - } - atomic_inc(&task->tk_count); - rpc_execute(task); - return task; -} -EXPORT_SYMBOL(rpc_run_task); - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -969,44 +940,25 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; - struct list_head *le; - dprintk("RPC: killing all tasks for client %p\n", clnt); + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); /* * Spin lock all_tasks to prevent changes... */ - spin_lock(&rpc_sched_lock); - alltask_for_each(rovr, le, &all_tasks) { + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { if (! RPC_IS_ACTIVATED(rovr)) continue; - if (!clnt || rovr->tk_client == clnt) { + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); rpc_wake_up_task(rovr); } } - spin_unlock(&rpc_sched_lock); -} - -static void rpciod_killall(void) -{ - unsigned long flags; - - while (!list_empty(&all_tasks)) { - clear_thread_flag(TIF_SIGPENDING); - rpc_killall_tasks(NULL); - flush_workqueue(rpciod_workqueue); - if (!list_empty(&all_tasks)) { - dprintk("RPC: rpciod_killall: waiting for tasks " - "to exit\n"); - yield(); - } - } - - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + spin_unlock(&clnt->cl_lock); } /* @@ -1018,28 +970,27 @@ struct workqueue_struct *wq; int error = 0; + if (atomic_inc_not_zero(&rpciod_users)) + return 0; + mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_up: users %u\n", rpciod_users); - rpciod_users++; - if (rpciod_workqueue) - goto out; - /* - * If there's no pid, we should be the first user. - */ - if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users); + + /* Guard against races with rpciod_down() */ + if (rpciod_workqueue != NULL) + goto out_ok; /* * Create the rpciod thread and wait for it to start. */ + dprintk("RPC: creating workqueue rpciod\n"); error = -ENOMEM; wq = create_workqueue("rpciod"); - if (wq == NULL) { - printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); - rpciod_users--; + if (wq == NULL) goto out; - } + rpciod_workqueue = wq; error = 0; +out_ok: + atomic_inc(&rpciod_users); out: mutex_unlock(&rpciod_mutex); return error; @@ -1048,58 +999,18 @@ void rpciod_down(void) { - mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_down sema %u\n", rpciod_users); - if (rpciod_users) { - if (--rpciod_users) - goto out; - } else - printk(KERN_WARNING "rpciod_down: no users??\n"); + if (!atomic_dec_and_test(&rpciod_users)) + return; - if (!rpciod_workqueue) { - dprintk("RPC: rpciod_down: Nothing to do!\n"); - goto out; - } - rpciod_killall(); + mutex_lock(&rpciod_mutex); + dprintk("RPC: destroying workqueue rpciod\n"); + if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { destroy_workqueue(rpciod_workqueue); rpciod_workqueue = NULL; - out: - mutex_unlock(&rpciod_mutex); -} - -#ifdef RPC_DEBUG -void rpc_show_tasks(void) -{ - struct list_head *le; - struct rpc_task *t; - - spin_lock(&rpc_sched_lock); - if (list_empty(&all_tasks)) { - spin_unlock(&rpc_sched_lock); - return; - } - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); - alltask_for_each(t, le, &all_tasks) { - const char *rpc_waitq = "none"; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); } - spin_unlock(&rpc_sched_lock); + mutex_unlock(&rpciod_mutex); } -#endif void rpc_destroy_mempool(void) diff -Nurb linux-2.6.22-570/net/sunrpc/stats.c linux-2.6.22-591/net/sunrpc/stats.c --- linux-2.6.22-570/net/sunrpc/stats.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/stats.c 2007-12-21 15:36:15.000000000 -0500 @@ -21,6 +21,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_MISC @@ -265,7 +266,7 @@ dprintk("RPC: registering /proc/net/rpc\n"); if (!proc_net_rpc) { struct proc_dir_entry *ent; - ent = proc_mkdir("rpc", proc_net); + ent = proc_mkdir("rpc", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_rpc = ent; @@ -279,7 +280,7 @@ dprintk("RPC: unregistering /proc/net/rpc\n"); if (proc_net_rpc) { proc_net_rpc = NULL; - remove_proc_entry("net/rpc", NULL); + remove_proc_entry("rpc", init_net.proc_net); } } diff -Nurb linux-2.6.22-570/net/sunrpc/sunrpc_syms.c linux-2.6.22-591/net/sunrpc/sunrpc_syms.c --- linux-2.6.22-570/net/sunrpc/sunrpc_syms.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/sunrpc_syms.c 2007-12-21 15:36:12.000000000 -0500 @@ -28,15 +28,11 @@ EXPORT_SYMBOL(rpc_sleep_on); EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpciod_down); -EXPORT_SYMBOL(rpciod_up); -EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); -EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_killall_tasks); EXPORT_SYMBOL(rpc_call_sync); @@ -61,7 +57,7 @@ EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); -EXPORT_SYMBOL(rpcauth_free_credcache); +EXPORT_SYMBOL(rpcauth_destroy_credcache); EXPORT_SYMBOL(rpcauth_init_credcache); EXPORT_SYMBOL(put_rpccred); @@ -156,6 +152,7 @@ cache_register(&ip_map_cache); cache_register(&unix_gid_cache); init_socket_xprt(); + rpcauth_init_module(); out: return err; } @@ -163,6 +160,7 @@ static void __exit cleanup_sunrpc(void) { + rpcauth_remove_module(); cleanup_socket_xprt(); unregister_rpc_pipefs(); rpc_destroy_mempool(); diff -Nurb linux-2.6.22-570/net/sunrpc/xprt.c linux-2.6.22-591/net/sunrpc/xprt.c --- linux-2.6.22-570/net/sunrpc/xprt.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/xprt.c 2007-12-21 15:36:12.000000000 -0500 @@ -127,7 +127,7 @@ clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); } else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); } /* @@ -515,7 +515,7 @@ if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff -Nurb linux-2.6.22-570/net/sunrpc/xprtsock.c linux-2.6.22-591/net/sunrpc/xprtsock.c --- linux-2.6.22-570/net/sunrpc/xprtsock.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sunrpc/xprtsock.c 2007-12-21 15:36:12.000000000 -0500 @@ -653,8 +653,7 @@ dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&transport->connect_worker); - flush_scheduled_work(); + cancel_rearming_delayed_work(&transport->connect_worker); xprt_disconnect(xprt); xs_close(xprt); @@ -1001,7 +1000,7 @@ /* Try to schedule an autoclose RPC calls */ set_bit(XPRT_CLOSE_WAIT, &xprt->state); if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); default: xprt_disconnect(xprt); } @@ -1410,18 +1409,16 @@ dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); - schedule_delayed_work(&transport->connect_worker, + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_delayed_work(&transport->connect_worker, 0); - - /* flush_scheduled_work can sleep... */ - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, 0); } } diff -Nurb linux-2.6.22-570/net/sysctl_net.c linux-2.6.22-591/net/sysctl_net.c --- linux-2.6.22-570/net/sysctl_net.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/sysctl_net.c 2007-12-21 15:36:15.000000000 -0500 @@ -54,3 +54,31 @@ #endif { 0 }, }; + +struct ctl_table multi_net_table[] = { + { + .ctl_name = NET_CORE, + .procname = "core", + .mode = 0555, + .child = multi_core_table, + }, +#ifdef CONFIG_INET + { + .ctl_name = NET_IPV4, + .procname = "ipv4", + .mode = 0555, + .child = multi_ipv4_table, + }, +#endif + {}, +}; + +struct ctl_table net_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = multi_net_table, + }, + {}, +}; diff -Nurb linux-2.6.22-570/net/tipc/eth_media.c linux-2.6.22-591/net/tipc/eth_media.c --- linux-2.6.22-570/net/tipc/eth_media.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/tipc/eth_media.c 2007-12-21 15:36:15.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ #include #include #include +#include #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI @@ -87,6 +88,9 @@ /** * recv_msg - handle incoming TIPC message from an Ethernet interface * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using Ethernet multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). * Routine truncates any Ethernet padding/CRC appended to the message, * and ensures message size matches actual length */ @@ -97,10 +101,13 @@ struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; + if (dev->nd_net != &init_net) { + kfree_skb(buf); + return 0; + } + if (likely(eb_ptr->bearer)) { - if (likely(!dev->promiscuity) || - !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) || - !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { size = msg_size((struct tipc_msg *)buf->data); skb_trim(buf, size); if (likely(buf->len == size)) { @@ -128,7 +135,7 @@ /* Find device with specified name */ - for_each_netdev(pdev){ + for_each_netdev(&init_net, pdev){ if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; break; @@ -191,6 +198,9 @@ struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + while ((eb_ptr->dev != dev)) { if (++eb_ptr == stop) return NOTIFY_DONE; /* couldn't find device */ diff -Nurb linux-2.6.22-570/net/tipc/link.c linux-2.6.22-591/net/tipc/link.c --- linux-2.6.22-570/net/tipc/link.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/tipc/link.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 1996-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1260,7 +1260,7 @@ * (Must not hold any locks while building message.) */ - res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt, + res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, !sender->user_port, &buf); read_lock_bh(&tipc_net_lock); @@ -1271,7 +1271,7 @@ if (likely(l_ptr)) { if (likely(buf)) { res = link_send_buf_fast(l_ptr, buf, - &sender->max_pkt); + &sender->publ.max_pkt); if (unlikely(res < 0)) buf_discard(buf); exit: @@ -1299,12 +1299,12 @@ * then re-try fast path or fragment the message */ - sender->max_pkt = link_max_pkt(l_ptr); + sender->publ.max_pkt = link_max_pkt(l_ptr); tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) + if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) goto again; return link_send_sections_long(sender, msg_sect, @@ -1357,7 +1357,7 @@ again: fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; + max_pkt = sender->publ.max_pkt - INT_H_SIZE; /* leave room for tunnel header in case of link changeover */ fragm_sz = max_pkt - INT_H_SIZE; /* leave room for fragmentation header in each fragment */ @@ -1463,7 +1463,7 @@ goto reject; } if (link_max_pkt(l_ptr) < max_pkt) { - sender->max_pkt = link_max_pkt(l_ptr); + sender->publ.max_pkt = link_max_pkt(l_ptr); tipc_node_unlock(node); for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; diff -Nurb linux-2.6.22-570/net/tipc/port.c linux-2.6.22-591/net/tipc/port.c --- linux-2.6.22-570/net/tipc/port.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/tipc/port.c 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/tipc/port.c: TIPC port code * - * Copyright (c) 1992-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 1992-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -239,6 +239,8 @@ } tipc_port_lock(ref); + p_ptr->publ.usr_handle = usr_handle; + p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); @@ -248,11 +250,9 @@ msg_set_importance(msg,importance); p_ptr->last_in_seqno = 41; p_ptr->sent = 1; - p_ptr->publ.usr_handle = usr_handle; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->congested_link = NULL; - p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; p_ptr->user_port = NULL; @@ -1243,7 +1243,7 @@ res = TIPC_OK; exit: tipc_port_unlock(p_ptr); - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); return res; } diff -Nurb linux-2.6.22-570/net/tipc/port.h linux-2.6.22-591/net/tipc/port.h --- linux-2.6.22-570/net/tipc/port.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/tipc/port.h 2007-12-21 15:36:12.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/tipc/port.h: Include file for TIPC port code * - * Copyright (c) 1994-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -81,7 +81,6 @@ * @acked: * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime - * @max_pkt: maximum packet size "hint" used when building messages sent by port * @probing_state: * @probing_interval: * @last_in_seqno: @@ -102,7 +101,6 @@ u32 acked; struct list_head publications; u32 pub_count; - u32 max_pkt; u32 probing_state; u32 probing_interval; u32 last_in_seqno; diff -Nurb linux-2.6.22-570/net/tipc/socket.c linux-2.6.22-591/net/tipc/socket.c --- linux-2.6.22-570/net/tipc/socket.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/tipc/socket.c 2007-12-21 15:36:15.000000000 -0500 @@ -1,8 +1,8 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -162,13 +162,16 @@ * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct socket *sock, int protocol) +static int tipc_create(struct net *net, struct socket *sock, int protocol) { struct tipc_sock *tsock; struct tipc_port *port; struct sock *sk; u32 ref; + if (net != &init_net) + return -EAFNOSUPPORT; + if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -198,7 +201,7 @@ return -EPROTOTYPE; } - sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); if (!sk) { tipc_deleteport(ref); return -ENOMEM; @@ -607,23 +610,24 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { + struct tipc_port *tport; struct msghdr my_msg; struct iovec my_iov; struct iovec *curr_iov; int curr_iovlen; char __user *curr_start; + u32 hdr_size; int curr_left; int bytes_to_send; int bytes_sent; int res; - if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) - return send_packet(iocb, sock, m, total_len); - - /* Can only send large data streams if already connected */ + /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (sock->state == SS_UNCONNECTED) + return send_packet(iocb, sock, m, total_len); + else if (sock->state == SS_DISCONNECTING) return -EPIPE; else return -ENOTCONN; @@ -648,17 +652,25 @@ my_msg.msg_name = NULL; bytes_sent = 0; + tport = tipc_sk(sock->sk)->p; + hdr_size = msg_hdr_sz(&tport->phdr); + while (curr_iovlen--) { curr_start = curr_iov->iov_base; curr_left = curr_iov->iov_len; while (curr_left) { - bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE) - ? curr_left : TIPC_MAX_USER_MSG_SIZE; + bytes_to_send = tport->max_pkt - hdr_size; + if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) + bytes_to_send = TIPC_MAX_USER_MSG_SIZE; + if (curr_left < bytes_to_send) + bytes_to_send = curr_left; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { - return bytes_sent ? bytes_sent : res; + if (bytes_sent != 0) + res = bytes_sent; + return res; } curr_left -= bytes_to_send; curr_start += bytes_to_send; @@ -1363,7 +1375,7 @@ } buf = skb_peek(&sock->sk->sk_receive_queue); - res = tipc_create(newsock, 0); + res = tipc_create(sock->sk->sk_net, newsock, 0); if (!res) { struct tipc_sock *new_tsock = tipc_sk(newsock->sk); struct tipc_portid id; @@ -1600,33 +1612,6 @@ } /** - * Placeholders for non-implemented functionality - * - * Returns error code (POSIX-compliant where defined) - */ - -static int ioctl(struct socket *s, u32 cmd, unsigned long arg) -{ - return -EINVAL; -} - -static int no_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - return -EINVAL; -} -static ssize_t no_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - return -EINVAL; -} - -static int no_skpair(struct socket *s1, struct socket *s2) -{ - return -EOPNOTSUPP; -} - -/** * Protocol switches for the various types of TIPC sockets */ @@ -1636,19 +1621,19 @@ .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_msg, .recvmsg = recv_msg, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct proto_ops packet_ops = { @@ -1657,19 +1642,19 @@ .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_packet, .recvmsg = recv_msg, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct proto_ops stream_ops = { @@ -1678,19 +1663,19 @@ .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_stream, .recvmsg = recv_stream, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct net_proto_family tipc_family_ops = { diff -Nurb linux-2.6.22-570/net/unix/af_unix.c linux-2.6.22-591/net/unix/af_unix.c --- linux-2.6.22-570/net/unix/af_unix.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/unix/af_unix.c 2007-12-21 15:36:15.000000000 -0500 @@ -117,8 +117,8 @@ #include #include #include +#include -int sysctl_unix_max_dgram_qlen __read_mostly = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; DEFINE_SPINLOCK(unix_table_lock); @@ -245,7 +245,8 @@ spin_unlock(&unix_table_lock); } -static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, +static struct sock *__unix_find_socket_byname(struct net *net, + struct sockaddr_un *sunname, int len, int type, unsigned hash) { struct sock *s; @@ -254,7 +255,7 @@ sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); - if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)) + if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT) || (s->sk_net != net)) continue; if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) @@ -265,21 +266,22 @@ return s; } -static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, +static inline struct sock *unix_find_socket_byname(struct net *net, + struct sockaddr_un *sunname, int len, int type, unsigned hash) { struct sock *s; spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(sunname, len, type, hash); + s = __unix_find_socket_byname(net, sunname, len, type, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); return s; } -static struct sock *unix_find_socket_byinode(struct inode *i) +static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) { struct sock *s; struct hlist_node *node; @@ -289,6 +291,9 @@ &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; + if (s->sk_net != net) + continue; + if(dentry && dentry->d_inode == i) { sock_hold(s); @@ -571,7 +576,7 @@ */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct socket *sock) +static struct sock * unix_create1(struct net *net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -579,7 +584,7 @@ if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); if (!sk) goto out; @@ -590,7 +595,7 @@ &af_unix_sk_receive_queue_lock_key); sk->sk_write_space = unix_write_space; - sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; + sk->sk_max_ack_backlog = net->sysctl_unix_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->dentry = NULL; @@ -604,7 +609,7 @@ return sk; } -static int unix_create(struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol) { if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -631,7 +636,7 @@ return -ESOCKTNOSUPPORT; } - return unix_create1(sock) ? 0 : -ENOMEM; + return unix_create1(net, sock) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -649,6 +654,7 @@ static int unix_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; struct unix_address * addr; @@ -675,7 +681,7 @@ spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; - if (__unix_find_socket_byname(addr->name, addr->len, sock->type, + if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ @@ -695,7 +701,8 @@ return err; } -static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, +static struct sock *unix_find_other(struct net *net, + struct sockaddr_un *sunname, int len, int type, unsigned hash, int *error) { struct sock *u; @@ -713,7 +720,7 @@ err = -ECONNREFUSED; if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) goto put_fail; - u=unix_find_socket_byinode(nd.dentry->d_inode); + u=unix_find_socket_byinode(net, nd.dentry->d_inode); if (!u) goto put_fail; @@ -729,7 +736,7 @@ } } else { err = -ECONNREFUSED; - u=unix_find_socket_byname(sunname, len, type, hash); + u=unix_find_socket_byname(net, sunname, len, type, hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->dentry; @@ -751,6 +758,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct dentry * dentry = NULL; @@ -825,7 +833,7 @@ if (!sunaddr->sun_path[0]) { err = -EADDRINUSE; - if (__unix_find_socket_byname(sunaddr, addr_len, + if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { unix_release_addr(addr); goto out_unlock; @@ -891,6 +899,7 @@ int alen, int flags) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; struct sock *other; unsigned hash; @@ -907,7 +916,7 @@ goto out; restart: - other=unix_find_other(sunaddr, alen, sock->type, hash, &err); + other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err); if (!other) goto out; @@ -987,6 +996,7 @@ { struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct sock *newsk = NULL; struct sock *other = NULL; @@ -1015,7 +1025,7 @@ err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(NULL); + newsk = unix_create1(sk->sk_net, NULL); if (newsk == NULL) goto out; @@ -1026,7 +1036,7 @@ restart: /* Find listening sock. */ - other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); if (!other) goto out; @@ -1305,6 +1315,7 @@ { struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=msg->msg_name; struct sock *other = NULL; @@ -1368,7 +1379,7 @@ if (sunaddr == NULL) goto out_free; - other = unix_find_other(sunaddr, namelen, sk->sk_type, + other = unix_find_other(net, sunaddr, namelen, sk->sk_type, hash, &err); if (other==NULL) goto out_free; @@ -1974,12 +1985,18 @@ #ifdef CONFIG_PROC_FS -static struct sock *unix_seq_idx(int *iter, loff_t pos) +struct unix_iter_state { + struct net *net; + int i; +}; +static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) { loff_t off = 0; struct sock *s; - for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { + for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { + if (s->sk_net != iter->net) + continue; if (off == pos) return s; ++off; @@ -1990,17 +2007,24 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { + struct unix_iter_state *iter = seq->private; spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); + return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct unix_iter_state *iter = seq->private; + struct sock *sk = v; ++*pos; if (v == (void *)1) - return first_unix_socket(seq->private); - return next_unix_socket(seq->private, v); + sk = first_unix_socket(&iter->i); + else + sk = next_unix_socket(&iter->i, sk); + while (sk && (sk->sk_net != iter->net)) + sk = next_unix_socket(&iter->i, sk); + return sk; } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -2064,7 +2088,7 @@ { struct seq_file *seq; int rc = -ENOMEM; - int *iter = kmalloc(sizeof(int), GFP_KERNEL); + struct unix_iter_state *iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) goto out; @@ -2075,7 +2099,8 @@ seq = file->private_data; seq->private = iter; - *iter = 0; + iter->net = get_net(PROC_NET(inode)); + iter->i = 0; out: return rc; out_kfree: @@ -2083,12 +2108,20 @@ goto out; } +static int unix_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct unix_iter_state *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations unix_seq_fops = { .owner = THIS_MODULE, .open = unix_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = unix_seq_release, }; #endif @@ -2099,6 +2132,33 @@ .owner = THIS_MODULE, }; + +static int unix_net_init(struct net *net) +{ + int error = -ENOMEM; + + net->sysctl_unix_max_dgram_qlen = 10; +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) + goto out; +#endif + unix_sysctl_register(net); + error = 0; +out: + return 0; +} + +static void unix_net_exit(struct net *net) +{ + unix_sysctl_unregister(net); + proc_net_remove(net, "unix"); +} + +static struct pernet_operations unix_net_ops = { + .init = unix_net_init, + .exit = unix_net_exit, +}; + static int __init af_unix_init(void) { int rc = -1; @@ -2114,10 +2174,7 @@ } sock_register(&unix_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create("unix", 0, &unix_seq_fops); -#endif - unix_sysctl_register(); + register_pernet_subsys(&unix_net_ops); out: return rc; } @@ -2125,9 +2182,8 @@ static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); - unix_sysctl_unregister(); - proc_net_remove("unix"); proto_unregister(&unix_proto); + unregister_pernet_subsys(&unix_net_ops); } module_init(af_unix_init); diff -Nurb linux-2.6.22-570/net/unix/sysctl_net_unix.c linux-2.6.22-591/net/unix/sysctl_net_unix.c --- linux-2.6.22-570/net/unix/sysctl_net_unix.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/unix/sysctl_net_unix.c 2007-12-21 15:36:15.000000000 -0500 @@ -14,47 +14,71 @@ #include -static ctl_table unix_table[] = { +static struct unix_sysctl_table { + struct ctl_table_header *sysctl_header; + struct ctl_table unix_table[2]; + struct ctl_table unix_net_table[2]; + struct ctl_table unix_root_table[2]; +} unix_sysctl = { + .unix_table = { { .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, .procname = "max_dgram_qlen", - .data = &sysctl_unix_max_dgram_qlen, + .data = &init_net.sysctl_unix_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, - { .ctl_name = 0 } -}; - -static ctl_table unix_net_table[] = { + {} + }, + .unix_net_table = { { .ctl_name = NET_UNIX, .procname = "unix", .mode = 0555, - .child = unix_table + .child = unix_sysctl.unix_table }, - { .ctl_name = 0 } -}; - -static ctl_table unix_root_table[] = { + {} + }, + .unix_root_table = { { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, - .child = unix_net_table + .child = unix_sysctl.unix_net_table }, - { .ctl_name = 0 } + {} + } }; -static struct ctl_table_header * unix_sysctl_header; - -void unix_sysctl_register(void) +void unix_sysctl_register(struct net *net) { - unix_sysctl_header = register_sysctl_table(unix_root_table); + struct unix_sysctl_table *table; + int i; + + table = kmemdup(&unix_sysctl, sizeof(*table), GFP_KERNEL); + if (!table) + return; + for (i = 0; i < ARRAY_SIZE(table->unix_table) - 1; i++) + table->unix_table[i].data += (char *)net - (char *)&init_net; + + table->unix_net_table[0].child = table->unix_table; + table->unix_root_table[0].child = table->unix_net_table; + + table->sysctl_header = + register_net_sysctl_table(net, table->unix_root_table); + if (!table->sysctl_header) { + kfree(table); + return; + } + net->unix_sysctl = table; } -void unix_sysctl_unregister(void) +void unix_sysctl_unregister(struct net *net) { - unregister_sysctl_table(unix_sysctl_header); + struct unix_sysctl_table *table = net->unix_sysctl; + if (table) + unregister_net_sysctl_table(table->sysctl_header); + kfree(table); } diff -Nurb linux-2.6.22-570/net/wanrouter/wanproc.c linux-2.6.22-591/net/wanrouter/wanproc.c --- linux-2.6.22-570/net/wanrouter/wanproc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/wanrouter/wanproc.c 2007-12-21 15:36:15.000000000 -0500 @@ -28,6 +28,7 @@ #include /* WAN router API definitions */ #include #include +#include #include @@ -287,7 +288,7 @@ int __init wanrouter_proc_init(void) { struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, proc_net); + proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); if (!proc_router) goto fail; @@ -303,7 +304,7 @@ fail_stat: remove_proc_entry("config", proc_router); fail_config: - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); fail: return -ENOMEM; } @@ -316,7 +317,7 @@ { remove_proc_entry("config", proc_router); remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); } /* diff -Nurb linux-2.6.22-570/net/wireless/wext.c linux-2.6.22-591/net/wireless/wext.c --- linux-2.6.22-570/net/wireless/wext.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/wireless/wext.c 2007-12-21 15:36:15.000000000 -0500 @@ -95,6 +95,7 @@ #include #include /* Pretty obvious */ +#include #include /* New driver API */ #include #include @@ -672,7 +673,22 @@ static int wireless_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &wireless_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &wireless_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int wireless_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations wireless_seq_fops = { @@ -680,17 +696,22 @@ .open = wireless_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = wireless_seq_release, }; -int __init wext_proc_init(void) +int wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; return 0; } + +void wext_proc_exit(struct net *net) +{ + proc_net_remove(net, "wireless"); +} #endif /* CONFIG_PROC_FS */ /************************** IOCTL SUPPORT **************************/ @@ -1010,7 +1031,7 @@ * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) { struct net_device *dev; iw_handler handler; @@ -1019,7 +1040,7 @@ * The copy_to/from_user() of ifr is also dealt with in there */ /* Make sure the device exist */ - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) return -ENODEV; /* A bunch of special cases, then the generic case... @@ -1053,7 +1074,7 @@ } /* entry point from dev ioctl */ -int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { int ret; @@ -1065,9 +1086,9 @@ && !capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr->ifr_name); + dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(ifr, cmd); + ret = wireless_process_ioctl(net, ifr, cmd); rtnl_unlock(); if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq))) return -EFAULT; @@ -1111,8 +1132,13 @@ { struct sk_buff *skb; - while ((skb = skb_dequeue(&wireless_nlevent_queue))) - rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + while ((skb = skb_dequeue(&wireless_nlevent_queue))) { + struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; + skb->dev = NULL; + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + dev_put(dev); + } } static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); @@ -1173,6 +1199,9 @@ kfree_skb(skb); return; } + /* Remember the device until we are in process context */ + dev_hold(dev); + skb->dev = dev; NETLINK_CB(skb).dst_group = RTNLGRP_LINK; skb_queue_tail(&wireless_nlevent_queue, skb); tasklet_schedule(&wireless_nlevent_tasklet); diff -Nurb linux-2.6.22-570/net/x25/af_x25.c linux-2.6.22-591/net/x25/af_x25.c --- linux-2.6.22-570/net/x25/af_x25.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/net/x25/af_x25.c 2007-12-21 15:36:15.000000000 -0500 @@ -191,6 +191,9 @@ struct net_device *dev = ptr; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->type == ARPHRD_X25 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) || dev->type == ARPHRD_ETHER @@ -466,10 +469,10 @@ .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(void) +static struct sock *x25_alloc_socket(struct net *net) { struct x25_sock *x25; - struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); if (!sk) goto out; @@ -485,17 +488,20 @@ return sk; } -static int x25_create(struct socket *sock, int protocol) +static int x25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct x25_sock *x25; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol) goto out; rc = -ENOMEM; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(net)) == NULL) goto out; x25 = x25_sk(sk); @@ -546,7 +552,7 @@ if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) goto out; x25 = x25_sk(sk); diff -Nurb linux-2.6.22-570/net/x25/x25_dev.c linux-2.6.22-591/net/x25/x25_dev.c --- linux-2.6.22-570/net/x25/x25_dev.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/x25/x25_dev.c 2007-12-21 15:36:15.000000000 -0500 @@ -95,6 +95,9 @@ struct sk_buff *nskb; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + goto drop; + nskb = skb_copy(skb, GFP_ATOMIC); if (!nskb) goto drop; diff -Nurb linux-2.6.22-570/net/x25/x25_proc.c linux-2.6.22-591/net/x25/x25_proc.c --- linux-2.6.22-570/net/x25/x25_proc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/x25/x25_proc.c 2007-12-21 15:36:15.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,7 @@ struct proc_dir_entry *p; int rc = -ENOMEM; - x25_proc_dir = proc_mkdir("x25", proc_net); + x25_proc_dir = proc_mkdir("x25", init_net.proc_net); if (!x25_proc_dir) goto out; @@ -328,7 +329,7 @@ out_socket: remove_proc_entry("route", x25_proc_dir); out_route: - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); goto out; } @@ -337,7 +338,7 @@ remove_proc_entry("forward", x25_proc_dir); remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-570/net/x25/x25_route.c linux-2.6.22-591/net/x25/x25_route.c --- linux-2.6.22-570/net/x25/x25_route.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/x25/x25_route.c 2007-12-21 15:36:15.000000000 -0500 @@ -129,7 +129,7 @@ */ struct net_device *x25_dev_get(char *devname) { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(&init_net, devname); if (dev && (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_policy.c linux-2.6.22-591/net/xfrm/xfrm_policy.c --- linux-2.6.22-570/net/xfrm/xfrm_policy.c 2007-12-21 15:35:57.000000000 -0500 +++ linux-2.6.22-591/net/xfrm/xfrm_policy.c 2007-12-21 15:36:15.000000000 -0500 @@ -30,8 +30,6 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly; - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -1570,7 +1568,7 @@ if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && init_net.sysctl_xfrm_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ @@ -1954,8 +1952,8 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = &init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); } } @@ -2357,6 +2355,11 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_DOWN: xfrm_flush_bundles(); diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_state.c linux-2.6.22-591/net/xfrm/xfrm_state.c --- linux-2.6.22-570/net/xfrm/xfrm_state.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/xfrm/xfrm_state.c 2007-12-21 15:36:15.000000000 -0500 @@ -28,14 +28,6 @@ struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -665,8 +657,8 @@ h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires; + x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); xfrm_state_num++; xfrm_hash_grow_check(x->bydst.next != NULL); @@ -815,9 +807,9 @@ x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); @@ -1775,6 +1767,19 @@ EXPORT_SYMBOL(xfrm_init_state); + +static int xfrm_state_pernet_init(struct net *net) +{ + net->sysctl_xfrm_aevent_etime = XFRM_AE_ETIME; + net->sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->sysctl_xfrm_acq_expires = 30; + return 0; +} + +static struct pernet_operations xfrm_state_net_ops = { + .init = xfrm_state_pernet_init, +}; + void __init xfrm_state_init(void) { unsigned int sz; @@ -1789,5 +1794,7 @@ xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + + register_pernet_subsys(&xfrm_state_net_ops); } diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_user.c linux-2.6.22-591/net/xfrm/xfrm_user.c --- linux-2.6.22-570/net/xfrm/xfrm_user.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/net/xfrm/xfrm_user.c 2007-12-21 15:36:15.000000000 -0500 @@ -374,7 +374,8 @@ return err; } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, +static struct xfrm_state *xfrm_state_construct(struct net *net, + struct xfrm_usersa_info *p, struct rtattr **xfrma, int *errp) { @@ -410,9 +411,9 @@ goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->sysctl_xfrm_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (net->sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; @@ -436,6 +437,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct rtattr **xfrma) { + struct net *net = skb->sk->sk_net; struct xfrm_usersa_info *p = NLMSG_DATA(nlh); struct xfrm_state *x; int err; @@ -445,7 +447,7 @@ if (err) return err; - x = xfrm_state_construct(p, xfrma, &err); + x = xfrm_state_construct(net, p, xfrma, &err); if (!x) return err; @@ -2559,7 +2561,7 @@ printk(KERN_INFO "Initializing XFRM netlink socket\n"); - nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; diff -Nurb linux-2.6.22-570/rej linux-2.6.22-591/rej --- linux-2.6.22-570/rej 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/rej 2007-12-21 15:36:12.000000000 -0500 @@ -0,0 +1,28 @@ +vi -o ./drivers/dma/ioatdma.c ./drivers/dma/ioatdma.c.rej +vi -o ./fs/nfs/super.c ./fs/nfs/super.c.rej +vi -o ./fs/ocfs2/aops.c ./fs/ocfs2/aops.c.rej +vi -o ./fs/ocfs2/file.c ./fs/ocfs2/file.c.rej +vi -o ./fs/ocfs2/super.c ./fs/ocfs2/super.c.rej +vi -o ./fs/proc/base.c ./fs/proc/base.c.rej +vi -o ./fs/sysfs/file.c ./fs/sysfs/file.c.rej +vi -o ./fs/sync.c ./fs/sync.c.rej +vi -o ./include/acpi/processor.h ./include/acpi/processor.h.rej +vi -o ./include/linux/sunrpc/clnt.h ./include/linux/sunrpc/clnt.h.rej +vi -o ./include/linux/syscalls.h ./include/linux/syscalls.h.rej +vi -o ./include/linux/nfs_mount.h ./include/linux/nfs_mount.h.rej +vi -o ./include/linux/sched.h ./include/linux/sched.h.rej +vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej +vi -o ./include/linux/fs.h ./include/linux/fs.h.rej +vi -o ./kernel/timer.c ./kernel/timer.c.rej +vi -o ./kernel/fork.c ./kernel/fork.c.rej +vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej +vi -o ./kernel/sys.c ./kernel/sys.c.rej +vi -o ./kernel/user.c ./kernel/user.c.rej +vi -o ./kernel/utsname.c ./kernel/utsname.c.rej +vi -o ./kernel/sched.c ./kernel/sched.c.rej +vi -o ./kernel/container.c ./kernel/container.c.rej +vi -o ./mm/memory.c ./mm/memory.c.rej +vi -o ./mm/hugetlb.c ./mm/hugetlb.c.rej +vi -o ./net/bridge/br_if.c ./net/bridge/br_if.c.rej +vi -o ./net/sunrpc/auth_unix.c ./net/sunrpc/auth_unix.c.rej +vi -o ./scripts/checksyscalls.sh ./scripts/checksyscalls.sh.rej diff -Nurb linux-2.6.22-570/scripts/Makefile.build.orig linux-2.6.22-591/scripts/Makefile.build.orig --- linux-2.6.22-570/scripts/Makefile.build.orig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/scripts/Makefile.build.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,348 +0,0 @@ -# ========================================================================== -# Building -# ========================================================================== - -src := $(obj) - -PHONY := __build -__build: - -# Read .config if it exist, otherwise ignore --include include/config/auto.conf - -include scripts/Kbuild.include - -# The filename Kbuild has precedence over Makefile -kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) - -include scripts/Makefile.lib - -ifdef host-progs -ifneq ($(hostprogs-y),$(host-progs)) -$(warning kbuild: $(obj)/Makefile - Usage of host-progs is deprecated. Please replace with hostprogs-y!) -hostprogs-y += $(host-progs) -endif -endif - -# Do not include host rules unles needed -ifneq ($(hostprogs-y)$(hostprogs-m),) -include scripts/Makefile.host -endif - -ifneq ($(KBUILD_SRC),) -# Create output directory if not already present -_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj)) - -# Create directories for object files if directory does not exist -# Needed when obj-y := dir/file.o syntax is used -_dummy := $(foreach d,$(obj-dirs), $(shell [ -d $(d) ] || mkdir -p $(d))) -endif - - -ifdef EXTRA_TARGETS -$(warning kbuild: $(obj)/Makefile - Usage of EXTRA_TARGETS is obsolete in 2.6. Please fix!) -endif - -ifdef build-targets -$(warning kbuild: $(obj)/Makefile - Usage of build-targets is obsolete in 2.6. Please fix!) -endif - -ifdef export-objs -$(warning kbuild: $(obj)/Makefile - Usage of export-objs is obsolete in 2.6. Please fix!) -endif - -ifdef O_TARGET -$(warning kbuild: $(obj)/Makefile - Usage of O_TARGET := $(O_TARGET) is obsolete in 2.6. Please fix!) -endif - -ifdef L_TARGET -$(error kbuild: $(obj)/Makefile - Use of L_TARGET is replaced by lib-y in 2.6. Please fix!) -endif - -ifdef list-multi -$(warning kbuild: $(obj)/Makefile - list-multi := $(list-multi) is obsolete in 2.6. Please fix!) -endif - -ifndef obj -$(warning kbuild: Makefile.build is included improperly) -endif - -# =========================================================================== - -ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),) -lib-target := $(obj)/lib.a -endif - -ifneq ($(strip $(obj-y) $(obj-m) $(obj-n) $(obj-) $(lib-target)),) -builtin-target := $(obj)/built-in.o -endif - -# We keep a list of all modules in $(MODVERDIR) - -__build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \ - $(if $(KBUILD_MODULES),$(obj-m)) \ - $(subdir-ym) $(always) - @: - -# Linus' kernel sanity checking tool -ifneq ($(KBUILD_CHECKSRC),0) - ifeq ($(KBUILD_CHECKSRC),2) - quiet_cmd_force_checksrc = CHECK $< - cmd_force_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ; - else - quiet_cmd_checksrc = CHECK $< - cmd_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ; - endif -endif - - -# Compile C sources (.c) -# --------------------------------------------------------------------------- - -# Default is built-in, unless we know otherwise -modkern_cflags := $(CFLAGS_KERNEL) -quiet_modtag := $(empty) $(empty) - -$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) -$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) -$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE) -$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) - -$(real-objs-m) : quiet_modtag := [M] -$(real-objs-m:.o=.i) : quiet_modtag := [M] -$(real-objs-m:.o=.s) : quiet_modtag := [M] -$(real-objs-m:.o=.lst): quiet_modtag := [M] - -$(obj-m) : quiet_modtag := [M] - -# Default for not multi-part modules -modname = $(basetarget) - -$(multi-objs-m) : modname = $(modname-multi) -$(multi-objs-m:.o=.i) : modname = $(modname-multi) -$(multi-objs-m:.o=.s) : modname = $(modname-multi) -$(multi-objs-m:.o=.lst) : modname = $(modname-multi) -$(multi-objs-y) : modname = $(modname-multi) -$(multi-objs-y:.o=.i) : modname = $(modname-multi) -$(multi-objs-y:.o=.s) : modname = $(modname-multi) -$(multi-objs-y:.o=.lst) : modname = $(modname-multi) - -quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ -cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $< - -$(obj)/%.s: $(src)/%.c FORCE - $(call if_changed_dep,cc_s_c) - -quiet_cmd_cc_i_c = CPP $(quiet_modtag) $@ -cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< - -$(obj)/%.i: $(src)/%.c FORCE - $(call if_changed_dep,cc_i_c) - -quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ -cmd_cc_symtypes_c = \ - $(CPP) -D__GENKSYMS__ $(c_flags) $< \ - | $(GENKSYMS) -T $@ >/dev/null; \ - test -s $@ || rm -f $@ - -$(obj)/%.symtypes : $(src)/%.c FORCE - $(call if_changed_dep,cc_symtypes_c) - -# C (.c) files -# The C file is compiled and updated dependency information is generated. -# (See cmd_cc_o_c + relevant part of rule_cc_o_c) - -quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ - -ifndef CONFIG_MODVERSIONS -cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< - -else -# When module versioning is enabled the following steps are executed: -# o compile a .tmp_.o from .c -# o if .tmp_.o doesn't contain a __ksymtab version, i.e. does -# not export symbols, we just rename .tmp_.o to .o and -# are done. -# o otherwise, we calculate symbol versions using the good old -# genksyms on the preprocessed source and postprocess them in a way -# that they are usable as a linker script -# o generate .o from .tmp_.o using the linker to -# replace the unresolved symbols __crc_exported_symbol with -# the actual value of the checksum generated by genksyms - -cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< -cmd_modversions = \ - if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ - $(CPP) -D__GENKSYMS__ $(c_flags) $< \ - | $(GENKSYMS) $(if $(KBUILD_SYMTYPES), \ - -T $(@D)/$(@F:.o=.symtypes)) -a $(ARCH) \ - > $(@D)/.tmp_$(@F:.o=.ver); \ - \ - $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ - -T $(@D)/.tmp_$(@F:.o=.ver); \ - rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ - else \ - mv -f $(@D)/.tmp_$(@F) $@; \ - fi; -endif - -define rule_cc_o_c - $(call echo-cmd,checksrc) $(cmd_checksrc) \ - $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ - $(cmd_modversions) \ - scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ - $(dot-target).tmp; \ - rm -f $(depfile); \ - mv -f $(dot-target).tmp $(dot-target).cmd -endef - -# Built-in and composite module parts -$(obj)/%.o: $(src)/%.c FORCE - $(call cmd,force_checksrc) - $(call if_changed_rule,cc_o_c) - -# Single-part modules are special since we need to mark them in $(MODVERDIR) - -$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE - $(call cmd,force_checksrc) - $(call if_changed_rule,cc_o_c) - @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod) - -quiet_cmd_cc_lst_c = MKLST $@ - cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ - $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ - System.map $(OBJDUMP) > $@ - -$(obj)/%.lst: $(src)/%.c FORCE - $(call if_changed_dep,cc_lst_c) - -# Compile assembler sources (.S) -# --------------------------------------------------------------------------- - -modkern_aflags := $(AFLAGS_KERNEL) - -$(real-objs-m) : modkern_aflags := $(AFLAGS_MODULE) -$(real-objs-m:.o=.s): modkern_aflags := $(AFLAGS_MODULE) - -quiet_cmd_as_s_S = CPP $(quiet_modtag) $@ -cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< - -$(obj)/%.s: $(src)/%.S FORCE - $(call if_changed_dep,as_s_S) - -quiet_cmd_as_o_S = AS $(quiet_modtag) $@ -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< - -$(obj)/%.o: $(src)/%.S FORCE - $(call if_changed_dep,as_o_S) - -targets += $(real-objs-y) $(real-objs-m) $(lib-y) -targets += $(extra-y) $(MAKECMDGOALS) $(always) - -# Linker scripts preprocessor (.lds.S -> .lds) -# --------------------------------------------------------------------------- -quiet_cmd_cpp_lds_S = LDS $@ - cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $< - -$(obj)/%.lds: $(src)/%.lds.S FORCE - $(call if_changed_dep,cpp_lds_S) - -# Build the compiled-in targets -# --------------------------------------------------------------------------- - -# To build objects in subdirs, we need to descend into the directories -$(sort $(subdir-obj-y)): $(subdir-ym) ; - -# -# Rule to compile a set of .o files into one .o file -# -ifdef builtin-target -quiet_cmd_link_o_target = LD $@ -# If the list of objects to link is empty, just create an empty built-in.o -cmd_link_o_target = $(if $(strip $(obj-y)),\ - $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^),\ - rm -f $@; $(AR) rcs $@) - -$(builtin-target): $(obj-y) FORCE - $(call if_changed,link_o_target) - -targets += $(builtin-target) -endif # builtin-target - -# -# Rule to compile a set of .o files into one .a file -# -ifdef lib-target -quiet_cmd_link_l_target = AR $@ -cmd_link_l_target = rm -f $@; $(AR) $(EXTRA_ARFLAGS) rcs $@ $(lib-y) - -$(lib-target): $(lib-y) FORCE - $(call if_changed,link_l_target) - -targets += $(lib-target) -endif - -# -# Rule to link composite objects -# -# Composite objects are specified in kbuild makefile as follows: -# -objs := -# or -# -y := -link_multi_deps = \ -$(filter $(addprefix $(obj)/, \ -$($(subst $(obj)/,,$(@:.o=-objs))) \ -$($(subst $(obj)/,,$(@:.o=-y)))), $^) - -quiet_cmd_link_multi-y = LD $@ -cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) - -quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(LD) $(ld_flags) $(LDFLAGS_MODULE) -o $@ $(link_multi_deps) - -# We would rather have a list of rules like -# foo.o: $(foo-objs) -# but that's not so easy, so we rather make all composite objects depend -# on the set of all their parts -$(multi-used-y) : %.o: $(multi-objs-y) FORCE - $(call if_changed,link_multi-y) - -$(multi-used-m) : %.o: $(multi-objs-m) FORCE - $(call if_changed,link_multi-m) - @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod) - -targets += $(multi-used-y) $(multi-used-m) - - -# Descending -# --------------------------------------------------------------------------- - -PHONY += $(subdir-ym) -$(subdir-ym): - $(Q)$(MAKE) $(build)=$@ - -# Add FORCE to the prequisites of a target to force it to be always rebuilt. -# --------------------------------------------------------------------------- - -PHONY += FORCE - -FORCE: - -# Read all saved command lines and dependencies for the $(targets) we -# may be building above, using $(if_changed{,_dep}). As an -# optimization, we don't need to read them if the target does not -# exist, we will rebuild anyway in that case. - -targets := $(wildcard $(sort $(targets))) -cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) - -ifneq ($(cmd_files),) - include $(cmd_files) -endif - - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - -.PHONY: $(PHONY) diff -Nurb linux-2.6.22-570/scripts/Makefile.modpost.orig linux-2.6.22-591/scripts/Makefile.modpost.orig --- linux-2.6.22-570/scripts/Makefile.modpost.orig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/scripts/Makefile.modpost.orig 1969-12-31 19:00:00.000000000 -0500 @@ -1,132 +0,0 @@ -# =========================================================================== -# Module versions -# =========================================================================== -# -# Stage one of module building created the following: -# a) The individual .o files used for the module -# b) A .o file which is the .o files above linked together -# c) A .mod file in $(MODVERDIR)/, listing the name of the -# the preliminary .o file, plus all .o files - -# Stage 2 is handled by this file and does the following -# 1) Find all modules from the files listed in $(MODVERDIR)/ -# 2) modpost is then used to -# 3) create one .mod.c file pr. module -# 4) create one Module.symvers file with CRC for all exported symbols -# 5) compile all .mod.c files -# 6) final link of the module to a file - -# Step 3 is used to place certain information in the module's ELF -# section, including information such as: -# Version magic (see include/vermagic.h for full details) -# - Kernel release -# - SMP is CONFIG_SMP -# - PREEMPT is CONFIG_PREEMPT -# - GCC Version -# Module info -# - Module version (MODULE_VERSION) -# - Module alias'es (MODULE_ALIAS) -# - Module license (MODULE_LICENSE) -# - See include/linux/module.h for more details - -# Step 4 is solely used to allow module versioning in external modules, -# where the CRC of each module is retrieved from the Module.symers file. - -# KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined -# symbols in the final module linking stage -# KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules. -# This is solely usefull to speed up test compiles -PHONY := _modpost -_modpost: __modpost - -include include/config/auto.conf -include scripts/Kbuild.include -include scripts/Makefile.lib - -kernelsymfile := $(objtree)/Module.symvers -modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers - -# Step 1), find all modules listed in $(MODVERDIR)/ -__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) -modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) - -# Stop after building .o files if NOFINAL is set. Makes compile tests quicker -_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) - - -# Step 2), invoke modpost -# Includes step 3,4 -quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = scripts/mod/modpost \ - $(if $(CONFIG_MODVERSIONS),-m) \ - $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ - $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ - $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ - $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) - -PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE - $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^) - -quiet_cmd_kernel-mod = MODPOST $@ - cmd_kernel-mod = $(cmd_modpost) $(KBUILD_VMLINUX_OBJS) - -PHONY += vmlinux -vmlinux: FORCE - $(call cmd,kernel-mod) - -# Declare generated files as targets for modpost -$(symverfile): __modpost ; -$(modules:.ko=.mod.c): __modpost ; - - -# Step 5), compile all *.mod.c files - -# modname is set to make c_flags define KBUILD_MODNAME -modname = $(notdir $(@:.mod.o=)) - -quiet_cmd_cc_o_c = CC $@ - cmd_cc_o_c = $(CC) $(c_flags) $(CFLAGS_MODULE) \ - -c -o $@ $< - -$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE - $(call if_changed_dep,cc_o_c) - -targets += $(modules:.ko=.mod.o) - -# Step 6), final link of the modules -quiet_cmd_ld_ko_o = LD [M] $@ - cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@ \ - $(filter-out FORCE,$^) - -$(modules): %.ko :%.o %.mod.o FORCE - $(call if_changed,ld_ko_o) - -targets += $(modules) - - -# Add FORCE to the prequisites of a target to force it to be always rebuilt. -# --------------------------------------------------------------------------- - -PHONY += FORCE - -FORCE: - -# Read all saved command lines and dependencies for the $(targets) we -# may be building above, using $(if_changed{,_dep}). As an -# optimization, we don't need to read them if the target does not -# exist, we will rebuild anyway in that case. - -targets := $(wildcard $(sort $(targets))) -cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) - -ifneq ($(cmd_files),) - include $(cmd_files) -endif - - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - -.PHONY: $(PHONY) diff -Nurb linux-2.6.22-570/security/commoncap.c linux-2.6.22-591/security/commoncap.c --- linux-2.6.22-570/security/commoncap.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/security/commoncap.c 2007-12-21 15:36:13.000000000 -0500 @@ -150,7 +150,7 @@ if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset (new_permitted, current->cap_permitted)) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { diff -Nurb linux-2.6.22-570/security/dummy.c linux-2.6.22-591/security/dummy.c --- linux-2.6.22-570/security/dummy.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/security/dummy.c 2007-12-21 15:36:13.000000000 -0500 @@ -131,7 +131,7 @@ static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) && !capable(CAP_SETUID)) { bprm->e_uid = current->uid; @@ -421,8 +421,12 @@ static int dummy_file_mmap (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags) + unsigned long flags, + unsigned long addr, + unsigned long addr_only) { + if (addr < mmap_min_addr) + return -EACCES; return 0; } diff -Nurb linux-2.6.22-570/security/keys/request_key.c linux-2.6.22-591/security/keys/request_key.c --- linux-2.6.22-570/security/keys/request_key.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/keys/request_key.c 2007-12-21 15:36:13.000000000 -0500 @@ -108,7 +108,8 @@ argv[i] = NULL; /* do it */ - ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1); + ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, + UMH_WAIT_PROC); error_link: key_put(keyring); diff -Nurb linux-2.6.22-570/security/security.c linux-2.6.22-591/security/security.c --- linux-2.6.22-570/security/security.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/security.c 2007-12-21 15:36:13.000000000 -0500 @@ -24,6 +24,7 @@ extern void security_fixup_ops(struct security_operations *ops); struct security_operations *security_ops; /* Initialized to NULL */ +unsigned long mmap_min_addr; /* 0 means no protection */ static inline int verify(struct security_operations *ops) { @@ -176,4 +177,5 @@ EXPORT_SYMBOL_GPL(unregister_security); EXPORT_SYMBOL_GPL(mod_reg_security); EXPORT_SYMBOL_GPL(mod_unreg_security); +EXPORT_SYMBOL_GPL(mmap_min_addr); EXPORT_SYMBOL(security_ops); diff -Nurb linux-2.6.22-570/security/selinux/avc.c linux-2.6.22-591/security/selinux/avc.c --- linux-2.6.22-570/security/selinux/avc.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/avc.c 2007-12-21 15:36:13.000000000 -0500 @@ -586,7 +586,7 @@ } } if (inode) - audit_log_format(ab, " dev=%s ino=%ld", + audit_log_format(ab, " dev=%s ino=%lu", inode->i_sb->s_id, inode->i_ino); break; @@ -832,6 +832,7 @@ * @tsid: target security identifier * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass + * @flags: AVC_STRICT or 0 * @avd: access vector decisions * * Check the AVC to determine whether the @requested permissions are granted @@ -847,6 +848,7 @@ */ int avc_has_perm_noaudit(u32 ssid, u32 tsid, u16 tclass, u32 requested, + unsigned flags, struct av_decision *avd) { struct avc_node *node; @@ -874,7 +876,7 @@ denied = requested & ~(p_ae->avd.allowed); if (!requested || denied) { - if (selinux_enforcing) + if (selinux_enforcing || (flags & AVC_STRICT)) rc = -EACCES; else if (node) @@ -909,7 +911,7 @@ struct av_decision avd; int rc; - rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, &avd); + rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata); return rc; } diff -Nurb linux-2.6.22-570/security/selinux/hooks.c linux-2.6.22-591/security/selinux/hooks.c --- linux-2.6.22-570/security/selinux/hooks.c 2007-12-21 15:35:59.000000000 -0500 +++ linux-2.6.22-591/security/selinux/hooks.c 2007-12-21 15:36:15.000000000 -0500 @@ -111,6 +111,9 @@ /* Original (dummy) security module. */ static struct security_operations *original_ops = NULL; +/* Did we enable minimum mmap address checking? */ +static int enabled_mmap_min_addr; + /* Minimal support for a secondary security module, just to allow the use of the dummy or capability modules. The owlsm module can alternatively be used as a secondary @@ -1593,6 +1596,7 @@ rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, SECCLASS_CAPABILITY, CAP_TO_MASK(CAP_SYS_ADMIN), + 0, NULL); if (rc == 0) @@ -2570,12 +2574,16 @@ } static int selinux_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) { - int rc; + int rc = 0; + u32 sid = ((struct task_security_struct*)(current->security))->sid; - rc = secondary_ops->file_mmap(file, reqprot, prot, flags); - if (rc) + if (addr < mmap_min_addr) + rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, + MEMPROTECT__MMAP_ZERO, NULL); + if (rc || addr_only) return rc; if (selinux_checkreqprot) @@ -3223,8 +3231,8 @@ /* Range of port numbers used to automatically bind. Need to determine whether we should perform a name_bind permission check between the socket and the port number. */ -#define ip_local_port_range_0 sysctl_local_port_range[0] -#define ip_local_port_range_1 sysctl_local_port_range[1] +#define ip_local_port_range_0 (sk->sk_net->sysctl_local_port_range[0]) +#define ip_local_port_range_1 (sk->sk_net->sysctl_local_port_range[1]) static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -3968,6 +3976,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET); } @@ -3979,6 +3991,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6); } @@ -4628,7 +4644,7 @@ if (p->ptrace & PT_PTRACED) { error = avc_has_perm_noaudit(tsec->ptrace_sid, sid, SECCLASS_PROCESS, - PROCESS__PTRACE, &avd); + PROCESS__PTRACE, 0, &avd); if (!error) tsec->sid = sid; task_unlock(p); @@ -4910,6 +4926,16 @@ sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), 0, SLAB_PANIC, NULL, NULL); + + /* + * Tasks cannot mmap below this without the mmap_zero permission. + * If not enabled already, do so by setting it to 64KB. + */ + if (mmap_min_addr == 0) { + enabled_mmap_min_addr = 1; + mmap_min_addr = 65536; + } + avc_init(); original_ops = secondary_ops = security_ops; @@ -5060,6 +5086,10 @@ selinux_disabled = 1; selinux_enabled = 0; + /* Disable minimum mmap address check only if we enabled it */ + if (enabled_mmap_min_addr) + mmap_min_addr = 0; + /* Reset security_ops to the secondary module, dummy or capability. */ security_ops = secondary_ops; diff -Nurb linux-2.6.22-570/security/selinux/include/av_perm_to_string.h linux-2.6.22-591/security/selinux/include/av_perm_to_string.h --- linux-2.6.22-570/security/selinux/include/av_perm_to_string.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/include/av_perm_to_string.h 2007-12-21 15:36:13.000000000 -0500 @@ -158,3 +158,4 @@ S_(SECCLASS_KEY, KEY__CREATE, "create") S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NODE_BIND, "node_bind") S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NAME_CONNECT, "name_connect") + S_(SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, "mmap_zero") diff -Nurb linux-2.6.22-570/security/selinux/include/av_permissions.h linux-2.6.22-591/security/selinux/include/av_permissions.h --- linux-2.6.22-570/security/selinux/include/av_permissions.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/include/av_permissions.h 2007-12-21 15:36:13.000000000 -0500 @@ -823,3 +823,4 @@ #define DCCP_SOCKET__NAME_BIND 0x00200000UL #define DCCP_SOCKET__NODE_BIND 0x00400000UL #define DCCP_SOCKET__NAME_CONNECT 0x00800000UL +#define MEMPROTECT__MMAP_ZERO 0x00000001UL diff -Nurb linux-2.6.22-570/security/selinux/include/avc.h linux-2.6.22-591/security/selinux/include/avc.h --- linux-2.6.22-570/security/selinux/include/avc.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/include/avc.h 2007-12-21 15:36:13.000000000 -0500 @@ -102,8 +102,10 @@ u16 tclass, u32 requested, struct av_decision *avd, int result, struct avc_audit_data *auditdata); +#define AVC_STRICT 1 /* Ignore permissive mode. */ int avc_has_perm_noaudit(u32 ssid, u32 tsid, u16 tclass, u32 requested, + unsigned flags, struct av_decision *avd); int avc_has_perm(u32 ssid, u32 tsid, diff -Nurb linux-2.6.22-570/security/selinux/include/class_to_string.h linux-2.6.22-591/security/selinux/include/class_to_string.h --- linux-2.6.22-570/security/selinux/include/class_to_string.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/include/class_to_string.h 2007-12-21 15:36:13.000000000 -0500 @@ -63,3 +63,4 @@ S_("key") S_(NULL) S_("dccp_socket") + S_("memprotect") diff -Nurb linux-2.6.22-570/security/selinux/include/flask.h linux-2.6.22-591/security/selinux/include/flask.h --- linux-2.6.22-570/security/selinux/include/flask.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/include/flask.h 2007-12-21 15:36:13.000000000 -0500 @@ -49,6 +49,7 @@ #define SECCLASS_PACKET 57 #define SECCLASS_KEY 58 #define SECCLASS_DCCP_SOCKET 60 +#define SECCLASS_MEMPROTECT 61 /* * Security identifier indices for initial entities diff -Nurb linux-2.6.22-570/security/selinux/include/security.h linux-2.6.22-591/security/selinux/include/security.h --- linux-2.6.22-570/security/selinux/include/security.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/include/security.h 2007-12-21 15:36:13.000000000 -0500 @@ -41,6 +41,7 @@ int security_load_policy(void * data, size_t len); +#define SEL_VEC_MAX 32 struct av_decision { u32 allowed; u32 decided; @@ -87,6 +88,9 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); +int security_get_classes(char ***classes, int *nclasses); +int security_get_permissions(char *class, char ***perms, int *nperms); + #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ diff -Nurb linux-2.6.22-570/security/selinux/netif.c linux-2.6.22-591/security/selinux/netif.c --- linux-2.6.22-570/security/selinux/netif.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/netif.c 2007-12-21 15:36:15.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include "security.h" #include "objsec.h" @@ -234,6 +235,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) sel_netif_kill(dev); diff -Nurb linux-2.6.22-570/security/selinux/netlink.c linux-2.6.22-591/security/selinux/netlink.c --- linux-2.6.22-570/security/selinux/netlink.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/netlink.c 2007-12-21 15:36:15.000000000 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include static struct sock *selnl; @@ -104,8 +105,8 @@ static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL, - THIS_MODULE); + selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, + SELNLGRP_MAX, NULL, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); diff -Nurb linux-2.6.22-570/security/selinux/selinuxfs.c linux-2.6.22-591/security/selinux/selinuxfs.c --- linux-2.6.22-570/security/selinux/selinuxfs.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/selinuxfs.c 2007-12-21 15:36:13.000000000 -0500 @@ -67,6 +67,10 @@ static int bool_num = 0; static int *bool_pending_values = NULL; +/* global data for classes */ +static struct dentry *class_dir = NULL; +static unsigned long last_class_ino; + extern void selnl_notify_setenforce(int val); /* Check whether a task is allowed to use a security operation. */ @@ -106,6 +110,7 @@ #define SEL_INITCON_INO_OFFSET 0x01000000 #define SEL_BOOL_INO_OFFSET 0x02000000 +#define SEL_CLASS_INO_OFFSET 0x04000000 #define SEL_INO_MASK 0x00ffffff #define TMPBUFLEN 12 @@ -237,6 +242,11 @@ /* declaration for sel_write_load */ static int sel_make_bools(void); +static int sel_make_classes(void); + +/* declaration for sel_make_class_dirs */ +static int sel_make_dir(struct inode *dir, struct dentry *dentry, + unsigned long *ino); static ssize_t sel_read_mls(struct file *filp, char __user *buf, size_t count, loff_t *ppos) @@ -287,10 +297,18 @@ goto out; ret = sel_make_bools(); + if (ret) { + length = ret; + goto out1; + } + + ret = sel_make_classes(); if (ret) length = ret; else length = count; + +out1: audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_POLICY_LOAD, "policy loaded auid=%u", audit_get_loginuid(current->audit_context)); @@ -940,9 +958,8 @@ .write = sel_commit_bools_write, }; -/* delete booleans - partial revoke() from - * fs/proc/generic.c proc_kill_inodes */ -static void sel_remove_bools(struct dentry *de) +/* partial revoke() from fs/proc/generic.c proc_kill_inodes */ +static void sel_remove_entries(struct dentry *de) { struct list_head *p, *node; struct super_block *sb = de->d_sb; @@ -998,7 +1015,7 @@ kfree(bool_pending_values); bool_pending_values = NULL; - sel_remove_bools(dir); + sel_remove_entries(dir); if (!(page = (char*)get_zeroed_page(GFP_KERNEL))) return -ENOMEM; @@ -1048,7 +1065,7 @@ return ret; err: kfree(values); - sel_remove_bools(dir); + sel_remove_entries(dir); ret = -ENOMEM; goto out; } @@ -1294,7 +1311,227 @@ return ret; } -static int sel_make_dir(struct inode *dir, struct dentry *dentry) +static inline unsigned int sel_div(unsigned long a, unsigned long b) +{ + return a / b - (a % b < 0); +} + +static inline unsigned long sel_class_to_ino(u16 class) +{ + return (class * (SEL_VEC_MAX + 1)) | SEL_CLASS_INO_OFFSET; +} + +static inline u16 sel_ino_to_class(unsigned long ino) +{ + return sel_div(ino & SEL_INO_MASK, SEL_VEC_MAX + 1); +} + +static inline unsigned long sel_perm_to_ino(u16 class, u32 perm) +{ + return (class * (SEL_VEC_MAX + 1) + perm) | SEL_CLASS_INO_OFFSET; +} + +static inline u32 sel_ino_to_perm(unsigned long ino) +{ + return (ino & SEL_INO_MASK) % (SEL_VEC_MAX + 1); +} + +static ssize_t sel_read_class(struct file * file, char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t rc, len; + char *page; + unsigned long ino = file->f_path.dentry->d_inode->i_ino; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) { + rc = -ENOMEM; + goto out; + } + + len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino)); + rc = simple_read_from_buffer(buf, count, ppos, page, len); + free_page((unsigned long)page); +out: + return rc; +} + +static const struct file_operations sel_class_ops = { + .read = sel_read_class, +}; + +static ssize_t sel_read_perm(struct file * file, char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t rc, len; + char *page; + unsigned long ino = file->f_path.dentry->d_inode->i_ino; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) { + rc = -ENOMEM; + goto out; + } + + len = snprintf(page, PAGE_SIZE,"%d", sel_ino_to_perm(ino)); + rc = simple_read_from_buffer(buf, count, ppos, page, len); + free_page((unsigned long)page); +out: + return rc; +} + +static const struct file_operations sel_perm_ops = { + .read = sel_read_perm, +}; + +static int sel_make_perm_files(char *objclass, int classvalue, + struct dentry *dir) +{ + int i, rc = 0, nperms; + char **perms; + + rc = security_get_permissions(objclass, &perms, &nperms); + if (rc) + goto out; + + for (i = 0; i < nperms; i++) { + struct inode *inode; + struct dentry *dentry; + + dentry = d_alloc_name(dir, perms[i]); + if (!dentry) { + rc = -ENOMEM; + goto out1; + } + + inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); + if (!inode) { + rc = -ENOMEM; + goto out1; + } + inode->i_fop = &sel_perm_ops; + /* i+1 since perm values are 1-indexed */ + inode->i_ino = sel_perm_to_ino(classvalue, i+1); + d_add(dentry, inode); + } + +out1: + for (i = 0; i < nperms; i++) + kfree(perms[i]); + kfree(perms); +out: + return rc; +} + +static int sel_make_class_dir_entries(char *classname, int index, + struct dentry *dir) +{ + struct dentry *dentry = NULL; + struct inode *inode = NULL; + int rc; + + dentry = d_alloc_name(dir, "index"); + if (!dentry) { + rc = -ENOMEM; + goto out; + } + + inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); + if (!inode) { + rc = -ENOMEM; + goto out; + } + + inode->i_fop = &sel_class_ops; + inode->i_ino = sel_class_to_ino(index); + d_add(dentry, inode); + + dentry = d_alloc_name(dir, "perms"); + if (!dentry) { + rc = -ENOMEM; + goto out; + } + + rc = sel_make_dir(dir->d_inode, dentry, &last_class_ino); + if (rc) + goto out; + + rc = sel_make_perm_files(classname, index, dentry); + +out: + return rc; +} + +static void sel_remove_classes(void) +{ + struct list_head *class_node; + + list_for_each(class_node, &class_dir->d_subdirs) { + struct dentry *class_subdir = list_entry(class_node, + struct dentry, d_u.d_child); + struct list_head *class_subdir_node; + + list_for_each(class_subdir_node, &class_subdir->d_subdirs) { + struct dentry *d = list_entry(class_subdir_node, + struct dentry, d_u.d_child); + + if (d->d_inode) + if (d->d_inode->i_mode & S_IFDIR) + sel_remove_entries(d); + } + + sel_remove_entries(class_subdir); + } + + sel_remove_entries(class_dir); +} + +static int sel_make_classes(void) +{ + int rc = 0, nclasses, i; + char **classes; + + /* delete any existing entries */ + sel_remove_classes(); + + rc = security_get_classes(&classes, &nclasses); + if (rc < 0) + goto out; + + /* +2 since classes are 1-indexed */ + last_class_ino = sel_class_to_ino(nclasses+2); + + for (i = 0; i < nclasses; i++) { + struct dentry *class_name_dir; + + class_name_dir = d_alloc_name(class_dir, classes[i]); + if (!class_name_dir) { + rc = -ENOMEM; + goto out1; + } + + rc = sel_make_dir(class_dir->d_inode, class_name_dir, + &last_class_ino); + if (rc) + goto out1; + + /* i+1 since class values are 1-indexed */ + rc = sel_make_class_dir_entries(classes[i], i+1, + class_name_dir); + if (rc) + goto out1; + } + +out1: + for (i = 0; i < nclasses; i++) + kfree(classes[i]); + kfree(classes); +out: + return rc; +} + +static int sel_make_dir(struct inode *dir, struct dentry *dentry, + unsigned long *ino) { int ret = 0; struct inode *inode; @@ -1306,7 +1543,7 @@ } inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - inode->i_ino = ++sel_last_ino; + inode->i_ino = ++(*ino); /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_add(dentry, inode); @@ -1352,7 +1589,7 @@ goto err; } - ret = sel_make_dir(root_inode, dentry); + ret = sel_make_dir(root_inode, dentry, &sel_last_ino); if (ret) goto err; @@ -1385,7 +1622,7 @@ goto err; } - ret = sel_make_dir(root_inode, dentry); + ret = sel_make_dir(root_inode, dentry, &sel_last_ino); if (ret) goto err; @@ -1399,7 +1636,7 @@ goto err; } - ret = sel_make_dir(root_inode, dentry); + ret = sel_make_dir(root_inode, dentry, &sel_last_ino); if (ret) goto err; @@ -1407,6 +1644,18 @@ if (ret) goto err; + dentry = d_alloc_name(sb->s_root, "class"); + if (!dentry) { + ret = -ENOMEM; + goto err; + } + + ret = sel_make_dir(root_inode, dentry, &sel_last_ino); + if (ret) + goto err; + + class_dir = dentry; + out: return ret; err: diff -Nurb linux-2.6.22-570/security/selinux/ss/policydb.c linux-2.6.22-591/security/selinux/ss/policydb.c --- linux-2.6.22-570/security/selinux/ss/policydb.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/ss/policydb.c 2007-12-21 15:36:13.000000000 -0500 @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -598,6 +599,7 @@ struct range_trans *rt, *lrt = NULL; for (i = 0; i < SYM_NUM; i++) { + cond_resched(); hashtab_map(p->symtab[i].table, destroy_f[i], NULL); hashtab_destroy(p->symtab[i].table); } @@ -612,6 +614,7 @@ avtab_destroy(&p->te_avtab); for (i = 0; i < OCON_NUM; i++) { + cond_resched(); c = p->ocontexts[i]; while (c) { ctmp = c; @@ -623,6 +626,7 @@ g = p->genfs; while (g) { + cond_resched(); kfree(g->fstype); c = g->head; while (c) { @@ -639,18 +643,21 @@ cond_policydb_destroy(p); for (tr = p->role_tr; tr; tr = tr->next) { + cond_resched(); kfree(ltr); ltr = tr; } kfree(ltr); for (ra = p->role_allow; ra; ra = ra -> next) { + cond_resched(); kfree(lra); lra = ra; } kfree(lra); for (rt = p->range_tr; rt; rt = rt -> next) { + cond_resched(); if (lrt) { ebitmap_destroy(&lrt->target_range.level[0].cat); ebitmap_destroy(&lrt->target_range.level[1].cat); diff -Nurb linux-2.6.22-570/security/selinux/ss/services.c linux-2.6.22-591/security/selinux/ss/services.c --- linux-2.6.22-570/security/selinux/ss/services.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-591/security/selinux/ss/services.c 2007-12-21 15:36:13.000000000 -0500 @@ -1587,19 +1587,18 @@ u32 *nel) { struct context *fromcon, usercon; - u32 *mysids, *mysids2, sid; + u32 *mysids = NULL, *mysids2, sid; u32 mynel = 0, maxnel = SIDS_NEL; struct user_datum *user; struct role_datum *role; - struct av_decision avd; struct ebitmap_node *rnode, *tnode; int rc = 0, i, j; - if (!ss_initialized) { *sids = NULL; *nel = 0; + + if (!ss_initialized) goto out; - } POLICY_RDLOCK; @@ -1635,17 +1634,9 @@ if (mls_setup_user_range(fromcon, user, &usercon)) continue; - rc = context_struct_compute_av(fromcon, &usercon, - SECCLASS_PROCESS, - PROCESS__TRANSITION, - &avd); - if (rc || !(avd.allowed & PROCESS__TRANSITION)) - continue; rc = sidtab_context_to_sid(&sidtab, &usercon, &sid); - if (rc) { - kfree(mysids); + if (rc) goto out_unlock; - } if (mynel < maxnel) { mysids[mynel++] = sid; } else { @@ -1653,7 +1644,6 @@ mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC); if (!mysids2) { rc = -ENOMEM; - kfree(mysids); goto out_unlock; } memcpy(mysids2, mysids, mynel * sizeof(*mysids2)); @@ -1664,11 +1654,32 @@ } } - *sids = mysids; - *nel = mynel; - out_unlock: POLICY_RDUNLOCK; + if (rc || !mynel) { + kfree(mysids); + goto out; + } + + mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL); + if (!mysids2) { + rc = -ENOMEM; + kfree(mysids); + goto out; + } + for (i = 0, j = 0; i < mynel; i++) { + rc = avc_has_perm_noaudit(fromsid, mysids[i], + SECCLASS_PROCESS, + PROCESS__TRANSITION, AVC_STRICT, + NULL); + if (!rc) + mysids2[j++] = mysids[i]; + cond_resched(); + } + rc = 0; + kfree(mysids); + *sids = mysids2; + *nel = j; out: return rc; } @@ -1996,6 +2007,101 @@ return rc; } +static int get_classes_callback(void *k, void *d, void *args) +{ + struct class_datum *datum = d; + char *name = k, **classes = args; + int value = datum->value - 1; + + classes[value] = kstrdup(name, GFP_ATOMIC); + if (!classes[value]) + return -ENOMEM; + + return 0; +} + +int security_get_classes(char ***classes, int *nclasses) +{ + int rc = -ENOMEM; + + POLICY_RDLOCK; + + *nclasses = policydb.p_classes.nprim; + *classes = kcalloc(*nclasses, sizeof(*classes), GFP_ATOMIC); + if (!*classes) + goto out; + + rc = hashtab_map(policydb.p_classes.table, get_classes_callback, + *classes); + if (rc < 0) { + int i; + for (i = 0; i < *nclasses; i++) + kfree((*classes)[i]); + kfree(*classes); + } + +out: + POLICY_RDUNLOCK; + return rc; +} + +static int get_permissions_callback(void *k, void *d, void *args) +{ + struct perm_datum *datum = d; + char *name = k, **perms = args; + int value = datum->value - 1; + + perms[value] = kstrdup(name, GFP_ATOMIC); + if (!perms[value]) + return -ENOMEM; + + return 0; +} + +int security_get_permissions(char *class, char ***perms, int *nperms) +{ + int rc = -ENOMEM, i; + struct class_datum *match; + + POLICY_RDLOCK; + + match = hashtab_search(policydb.p_classes.table, class); + if (!match) { + printk(KERN_ERR "%s: unrecognized class %s\n", + __FUNCTION__, class); + rc = -EINVAL; + goto out; + } + + *nperms = match->permissions.nprim; + *perms = kcalloc(*nperms, sizeof(*perms), GFP_ATOMIC); + if (!*perms) + goto out; + + if (match->comdatum) { + rc = hashtab_map(match->comdatum->permissions.table, + get_permissions_callback, *perms); + if (rc < 0) + goto err; + } + + rc = hashtab_map(match->permissions.table, get_permissions_callback, + *perms); + if (rc < 0) + goto err; + +out: + POLICY_RDUNLOCK; + return rc; + +err: + POLICY_RDUNLOCK; + for (i = 0; i < *nperms; i++) + kfree((*perms)[i]); + kfree(*perms); + return rc; +} + struct selinux_audit_rule { u32 au_seqno; struct context au_ctxt; diff -Nurb linux-2.6.22-570/toapply linux-2.6.22-591/toapply --- linux-2.6.22-570/toapply 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/toapply 2007-12-21 15:36:15.000000000 -0500 @@ -0,0 +1,51 @@ +cat ../broken-out/cpuidle-fix-the-uninitialized-variable-in-sysfs-routine.patch | patch -p1 +cat ../broken-out/cpuidle-make-cpuidle-sysfs-driver-governor-switch-off-by-default.patch | patch -p1 +cat ../broken-out/acpi-video-dont-export-sysfs-backlight-interface-if-query-_bcl-fail.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-rules.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-root-sysfs_dirent-root-dentry-association.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1 +cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1 +cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link-fix.patch | patch -p1 +cat ../broken-out/git-scsi-misc-vs-greg-sysfs-stuff.patch | patch -p1 +cat ../broken-out/gregkh-usb-usb-cxacru-cleanup-sysfs-attribute-code.patch | patch -p1 +cat ../broken-out/gregkh-usb-usb-add-iad-support-to-usbfs-and-sysfs.patch | patch -p1 +cat ../broken-out/x86_64-mm-xen-add-the-xenbus-sysfs-and-virtual-device-hotplug-driver.patch | patch -p1 +cat ../broken-out/drivers-edac-mc-sysfs-add-missing-mem-types.patch | patch -p1 +cat ../broken-out/drivers-edac-edac_device-sysfs-cleanup.patch | patch -p1 +cat ../broken-out/drivers-edac-add-device-sysfs-attributes.patch | patch -p1 diff -Nurb linux-2.6.22-570/trellis-mm1-1.sh linux-2.6.22-591/trellis-mm1-1.sh --- linux-2.6.22-570/trellis-mm1-1.sh 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-591/trellis-mm1-1.sh 2007-12-21 15:36:13.000000000 -0500 @@ -0,0 +1,142 @@ +cat ../broken-out/origin.patch | patch -p1 +cat ../broken-out/ioatdma-fix-section-mismatches.patch | patch -p1 +cat ../broken-out/introduce-fixed-sys_sync_file_range2-syscall-implement-on.patch | patch -p1 +cat ../broken-out/git-acpi.patch | patch -p1 +cat ../broken-out/agk-dm-dm-netlink.patch | patch -p1 +cat ../broken-out/git-powerpc.patch | patch -p1 +cat ../broken-out/make-drivers-char-hvc_consoleckhvcd-static.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1 +cat ../broken-out/gregkh-driver-driver-core-make-devt_attr-and-uevent_attr-static.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1 +cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1 +cat ../broken-out/gregkh-driver-block-device.patch | patch -p1 +cat ../broken-out/revert-gregkh-driver-block-device.patch | patch -p1 +cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1 +cat ../broken-out/git-md-accel.patch | patch -p1 +cat ../broken-out/git-mmc.patch | patch -p1 +cat ../broken-out/git-net.patch | patch -p1 +cat ../broken-out/tun-tap-allow-group-ownership-of-tun-tap-devices.patch | patch -p1 +cat ../broken-out/git-nfs.patch | patch -p1 +cat ../broken-out/git-ocfs2.patch | patch -p1 +cat ../broken-out/git-selinux.patch | patch -p1 +cat ../broken-out/revert-acpi-change-for-scsi.patch | patch -p1 +cat ../broken-out/git-scsi-misc.patch | patch -p1 +cat ../broken-out/git-unionfs.patch | patch -p1 +cat ../broken-out/x86_64-mm-unwinder.patch | patch -p1 +cat ../broken-out/x86_64-mm-xencleanup-add-kstrndup.patch | patch -p1 +cat ../broken-out/x86_64-mm-xencleanup-add-argv_split.patch | patch -p1 +cat ../broken-out/x86_64-mm-xencleanup-split-usermodehelper-setup-from-execution.patch | patch -p1 +cat ../broken-out/x86_64-mm-add-common-orderly_poweroff.patch | patch -p1 +cat ../broken-out/x86_64-mm-xencleanup-tidy-up-usermode-helper-waiting-a-bit.patch | patch -p1 +cat ../broken-out/x86_64-mm-xen-add-the-xen-virtual-network-device-driver.patch | patch -p1 +cat ../broken-out/i386-show-unhandled-signals.patch | patch -p1 +cat ../broken-out/git-kgdb.patch | patch -p1 +cat ../broken-out/hugetlb-remove-unnecessary-nid-initialization.patch | patch -p1 +cat ../broken-out/mm-alloc_large_system_hash-can-free-some-memory-for.patch | patch -p1 +cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings.patch | patch -p1 +cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings-fix.patch | patch -p1 +cat ../broken-out/mm-merge-populate-and-nopage-into-fault-fixes-nonlinear.patch | patch -p1 +cat ../broken-out/add-a-bitmap-that-is-used-to-track-flags-affecting-a-block-of-pages.patch | patch -p1 +cat ../broken-out/add-__gfp_movable-for-callers-to-flag-allocations-from-high-memory-that-may-be-migrated.patch | patch -p1 +cat ../broken-out/split-the-free-lists-for-movable-and-unmovable-allocations.patch | patch -p1 +cat ../broken-out/choose-pages-from-the-per-cpu-list-based-on-migration-type.patch | patch -p1 +cat ../broken-out/add-a-configure-option-to-group-pages-by-mobility.patch | patch -p1 +cat ../broken-out/move-free-pages-between-lists-on-steal.patch | patch -p1 +cat ../broken-out/group-short-lived-and-reclaimable-kernel-allocations.patch | patch -p1 +cat ../broken-out/allow-huge-page-allocations-to-use-gfp_high_movable.patch | patch -p1 +cat ../broken-out/maps2-uninline-some-functions-in-the-page-walker.patch | patch -p1 +cat ../broken-out/maps2-eliminate-the-pmd_walker-struct-in-the-page-walker.patch | patch -p1 +cat ../broken-out/maps2-remove-vma-from-args-in-the-page-walker.patch | patch -p1 +cat ../broken-out/maps2-propagate-errors-from-callback-in-page-walker.patch | patch -p1 +cat ../broken-out/maps2-add-callbacks-for-each-level-to-page-walker.patch | patch -p1 +cat ../broken-out/maps2-move-the-page-walker-code-to-lib.patch | patch -p1 +cat ../broken-out/maps2-simplify-interdependence-of-proc-pid-maps-and-smaps.patch | patch -p1 +cat ../broken-out/maps2-move-clear_refs-code-to-task_mmuc.patch | patch -p1 +cat ../broken-out/maps2-regroup-task_mmu-by-interface.patch | patch -p1 +cat ../broken-out/maps2-make-proc-pid-smaps-optional-under-config_embedded.patch | patch -p1 +cat ../broken-out/maps2-make-proc-pid-clear_refs-option-under-config_embedded.patch | patch -p1 +cat ../broken-out/maps2-add-proc-pid-pagemap-interface.patch | patch -p1 +cat ../broken-out/have-kswapd-keep-a-minimum-order-free-other-than-order-0.patch | patch -p1 +cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default.patch | patch -p1 +cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix.patch | patch -p1 +cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix-2.patch | patch -p1 +cat ../broken-out/uml-use-get_free_pages-to-allocate-kernel-stacks.patch | patch -p1 +cat ../broken-out/add-generic-exit-time-stack-depth-checking-to-config_debug_stack_usage.patch | patch -p1 +cat ../broken-out/cpuset-remove-sched-domain-hooks-from-cpusets.patch | patch -p1 +cat ../broken-out/clone-flag-clone_parent_tidptr-leaves-invalid-results-in-memory.patch | patch -p1 +cat ../broken-out/use-boot-based-time-for-process-start-time-and-boot-time.patch | patch -p1 +cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock.patch | patch -p1 +cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock-fix.patch | patch -p1 +cat ../broken-out/taskstats-add-context-switch-counters.patch | patch -p1 +cat ../broken-out/taskstats-add-context-switch-counters-fix.patch | patch -p1 +cat ../broken-out/remove-config_uts_ns-and-config_ipc_ns.patch | patch -p1 +cat ../broken-out/user-namespace-add-the-framework.patch | patch -p1 +cat ../broken-out/user-namespace-add-unshare.patch | patch -p1 +cat ../broken-out/mm-fix-create_new_namespaces-return-value.patch | patch -p1 +cat ../broken-out/add-a-kmem_cache-for-nsproxy-objects.patch | patch -p1 +cat ../broken-out/namespace-ensure-clone_flags-are-always-stored-in-an-unsigned-long.patch | patch -p1 +cat ../broken-out/sysctlc-add-text-telling-people-to-use-ctl_unnumbered.patch | patch -p1 +cat ../broken-out/proper-prototype-for-proc_nr_files.patch | patch -p1 +cat ../broken-out/move-seccomp-from-proc-to-a-prctl.patch | patch -p1 +cat ../broken-out/uninline-check_signature.patch | patch -p1 +cat ../broken-out/revoke-core-code.patch | patch -p1 +cat ../broken-out/revoke-wire-up-i386-system-calls.patch | patch -p1 +cat ../broken-out/fallocate-implementation-on-i86-x86_64-and-powerpc.patch | patch -p1 +cat ../broken-out/coredump-masking-reimplementation-of-dumpable-using-two-flags.patch | patch -p1 +cat ../broken-out/coredump-masking-add-an-interface-for-core-dump-filter.patch | patch -p1 +cat ../broken-out/cpuset-zero-malloc-revert-the-old-cpuset-fix.patch | patch -p1 +cat ../broken-out/containersv10-basic-container-framework.patch | patch -p1 +cat ../broken-out/containersv10-basic-container-framework-fix.patch | patch -p1 +cat ../broken-out/containersv10-basic-container-framework-fix-for-bad-lock-balance-in-containers.patch | patch -p1 +cat ../broken-out/containersv10-example-cpu-accounting-subsystem.patch | patch -p1 +cat ../broken-out/containersv10-add-tasks-file-interface.patch | patch -p1 +cat ../broken-out/containersv10-add-tasks-file-interface-fix.patch | patch -p1 +cat ../broken-out/containersv10-add-tasks-file-interface-fix-2.patch | patch -p1 +cat ../broken-out/containersv10-add-fork-exit-hooks.patch | patch -p1 +cat ../broken-out/containersv10-add-fork-exit-hooks-fix.patch | patch -p1 +cat ../broken-out/containersv10-add-container_clone-interface.patch | patch -p1 +cat ../broken-out/containersv10-add-procfs-interface.patch | patch -p1 +cat ../broken-out/containersv10-add-procfs-interface-fix.patch | patch -p1 +cat ../broken-out/containersv10-make-cpusets-a-client-of-containers.patch | patch -p1 +cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships.patch | patch -p1 +cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships-cpuset-zero-malloc-fix-for-new-containers.patch | patch -p1 +cat ../broken-out/containersv10-simple-debug-info-subsystem.patch | patch -p1 +cat ../broken-out/containersv10-support-for-automatic-userspace-release-agents.patch | patch -p1 +cat ../broken-out/containers-implement-subsys-post_clone.patch | patch -p1 +cat ../broken-out/containers-implement-namespace-tracking-subsystem-v3.patch | patch -p1 +cat ../broken-out/keep-track-of-network-interface-renaming.patch | patch -p1 +cat ../broken-out/v2.6.22-rc6-mm1-netns23.patch | patch -p1